This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]
UCNs in identifiers

From: gkeating at geoffk5 dot apple dot com (Geoffrey Keating)
To: gcc-patches at gcc dot gnu dot org
Date: Fri, 11 Mar 2005 00:29:27 -0800 (PST)
Subject: UCNs in identifiers
This patch improves my previous patch.  In particular, it has
many testcases.

It doesn't fully implement UCNs-in-IDs.  The missing piece is
stringification for C99, which as I read the C standard requires you
to keep track of the original spelling of the token.  It produces an
error in this case, just as the current compiler does for any use of
UCNs-in-IDs.

I'll wait a day or so (I guess until Friday night) in case there are
any glaring mistakes; but I'm pretty confident that this is an
incremental step forwards to full support.

Bootstrapped & tested on powerpc-darwin8 for C and C++ (Java doesn't
build due to 64-bit changes, but it doesn't use cpplib anyway).

-- 
- Geoffrey Keating <geoffk@apple.com>

===File ~/patches/gcc-ucns-2.patch==========================
Index: libcpp/ChangeLog
2005-03-10  Geoffrey Keating  <geoffk@apple.com>

	* directives.c (glue_header_name): Update call to cpp_spell_token.
	* internal.h (_cpp_interpret_identifier): New.
	* charset.c (_cpp_interpret_identifier): New.
	* lex.c (lex_identifier): Add extra parameter to indicate if initial
	character was '$' or '\'.  Support identifiers with UCNs.
	(forms_identifier_p): Allow UCNs.
	(_cpp_lex_direct): Pass extra parameter to lex_identifier.
	(utf8_to_ucn): New.
	(cpp_spell_token): Add FORSTRING parameter.  Use it.
	(cpp_token_as_text): Update call to cpp_spell_token.
	(cpp_output_token): Write UCNs back out.
	(stringify_arg): Update call to cpp_spell_token.
	(paste_tokens): Update call to cpp_spell_token.
	(cpp_macro_definition): Update call to cpp_spell_token.
	* include/cpplib.h: Add parameter to cpp_spell_token.

Index: gcc/ChangeLog
2005-03-10  Geoffrey Keating  <geoffk@apple.com>

	* c-lex.c (c_lex_with_flags): Add parameter to call to 
	cpp_spell_token.

Index: gcc/testsuite/ChangeLog
2005-03-10  Geoffrey Keating  <geoffk@apple.com>

	* gcc.dg/ucnid-1.c: New.
	* gcc.dg/ucnid-2.c: New.
	* gcc.dg/ucnid-3.c: New.
	* gcc.dg/ucnid-4.c: New.
	* gcc.dg/ucnid-5.c: New.
	* gcc.dg/ucnid-6.c: New.
	* gcc.dg/cpp/ucnid-1.c: New.
	* gcc.dg/cpp/ucnid-2.c: New.
	* gcc.dg/cpp/ucnid-3.c: New.
	* g++.dg/other/ucnid-1.C: New.
	* g++.dg/cpp/ucnid-1.C: New.

Index: libcpp/charset.c
===================================================================
RCS file: /cvs/gcc/gcc/libcpp/charset.c,v
retrieving revision 1.5
diff -u -p -u -p -r1.5 charset.c
--- libcpp/charset.c	20 Feb 2005 17:01:31 -0000	1.5
+++ libcpp/charset.c	11 Mar 2005 01:53:54 -0000
@@ -1414,7 +1414,60 @@ cpp_interpret_charconst (cpp_reader *pfi
 
   return result;
 }
+
+/* Convert an identifier denoted by ID and LEN, which might contain
+   UCN escapes, to the source character set, either UTF-8 or
+   UTF-EBCDIC.  Assumes that the identifier is actually a valid identifier.  */
+cpp_hashnode *
+_cpp_interpret_identifier (cpp_reader *pfile, const uchar *id, size_t len)
+{
+  /* It turns out that a UCN escape always turns into fewer characters
+     than the escape itself, so we can allocate a temporary in advance.  */
+  uchar * buf = alloca (len + 1);
+  uchar * bufp = buf;
+  size_t idp;
+  
+  for (idp = 0; idp < len; idp++)
+    if (id[idp] != '\\')
+      *bufp++ = id[idp];
+    else
+      {
+	unsigned length = id[idp+1] == 'u' ? 4 : 8;
+	cppchar_t value = 0;
+	size_t bufleft = len - (bufp - buf);
+	int rval;
+
+	idp += 2;
+	while (length && idp < len && ISXDIGIT (id[idp]))
+	  {
+	    value = (value << 4) + hex_value (id[idp]);
+	    idp++;
+	    length--;
+	  }
+	idp--;
+
+	/* Special case for EBCDIC: if the identifier contains
+	   a '$' specified using a UCN, translate it to EBCDIC.  */
+	if (value == 0x24)
+	  {
+	    *bufp++ = '$';
+	    continue;
+	  }
+
+	rval = one_cppchar_to_utf8 (value, &bufp, &bufleft);
+	if (rval)
+	  {
+	    errno = rval;
+	    cpp_errno (pfile, CPP_DL_ERROR,
+		       "converting UCN to source character set");
+	    break;
+	  }
+      }
 
+  return CPP_HASHNODE (ht_lookup (pfile->hash_table, 
+				  buf, bufp - buf, HT_ALLOC));
+}
+
 /* Convert an input buffer (containing the complete contents of one
    source file) from INPUT_CHARSET to the source character set.  INPUT
    points to the input buffer, SIZE is its allocated size, and LEN is
Index: libcpp/directives.c
===================================================================
RCS file: /cvs/gcc/gcc/libcpp/directives.c,v
retrieving revision 1.13
diff -u -p -u -p -r1.13 directives.c
--- libcpp/directives.c	28 Feb 2005 19:04:19 -0000	1.13
+++ libcpp/directives.c	11 Mar 2005 01:53:55 -0000
@@ -608,7 +608,8 @@ glue_header_name (cpp_reader *pfile)
       if (token->flags & PREV_WHITE)
 	buffer[total_len++] = ' ';
 
-      total_len = (cpp_spell_token (pfile, token, (uchar *) &buffer[total_len])
+      total_len = (cpp_spell_token (pfile, token, (uchar *) &buffer[total_len],
+				    true)
 		   - (uchar *) buffer);
     }
 
Index: libcpp/internal.h
===================================================================
RCS file: /cvs/gcc/gcc/libcpp/internal.h,v
retrieving revision 1.13
diff -u -p -u -p -r1.13 internal.h
--- libcpp/internal.h	14 Feb 2005 14:43:56 -0000	1.13
+++ libcpp/internal.h	11 Mar 2005 01:53:55 -0000
@@ -571,6 +571,9 @@ extern unsigned char *_cpp_convert_input
 					  unsigned char *, size_t, size_t,
 					  off_t *);
 extern const char *_cpp_default_encoding (void);
+extern cpp_hashnode * _cpp_interpret_identifier (cpp_reader *pfile,
+						 const unsigned char *id,
+						 size_t len);
 
 /* Utility routines and macros.  */
 #define DSC(str) (const unsigned char *)str, sizeof str - 1
Index: libcpp/lex.c
===================================================================
RCS file: /cvs/gcc/gcc/libcpp/lex.c,v
retrieving revision 1.5
diff -u -p -u -p -r1.5 lex.c
--- libcpp/lex.c	9 Sep 2004 19:16:55 -0000	1.5
+++ libcpp/lex.c	11 Mar 2005 01:53:55 -0000
@@ -53,7 +53,7 @@ static const struct token_spelling token
 static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
 static int skip_line_comment (cpp_reader *);
 static void skip_whitespace (cpp_reader *, cppchar_t);
-static cpp_hashnode *lex_identifier (cpp_reader *, const uchar *);
+static cpp_hashnode *lex_identifier (cpp_reader *, const uchar *, bool);
 static void lex_number (cpp_reader *, cpp_string *);
 static bool forms_identifier_p (cpp_reader *, int);
 static void lex_string (cpp_reader *, cpp_token *, const uchar *);
@@ -453,7 +453,7 @@ forms_identifier_p (cpp_reader *pfile, i
     }
 
   /* Is this a syntactically valid UCN?  */
-  if (0 && *buffer->cur == '\\'
+  if (*buffer->cur == '\\'
       && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
     {
       buffer->cur += 2;
@@ -467,39 +467,39 @@ forms_identifier_p (cpp_reader *pfile, i
 
 /* Lex an identifier starting at BUFFER->CUR - 1.  */
 static cpp_hashnode *
-lex_identifier (cpp_reader *pfile, const uchar *base)
+lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn)
 {
   cpp_hashnode *result;
-  const uchar *cur, *limit;
+  const uchar *cur;
   unsigned int len;
   unsigned int hash = HT_HASHSTEP (0, *base);
 
   cur = pfile->buffer->cur;
-  for (;;)
+  if (! starts_ucn)
+    while (ISIDNUM (*cur))
+      {
+	hash = HT_HASHSTEP (hash, *cur);
+	cur++;
+      }
+  pfile->buffer->cur = cur;
+  if (starts_ucn || forms_identifier_p (pfile, false))
     {
-      /* N.B. ISIDNUM does not include $.  */
-      while (ISIDNUM (*cur))
-	{
-	  hash = HT_HASHSTEP (hash, *cur);
-	  cur++;
-	}
-
-      pfile->buffer->cur = cur;
-      if (!forms_identifier_p (pfile, false))
-	break;
-
-      limit = pfile->buffer->cur;
-      while (cur < limit)
-	{
-	  hash = HT_HASHSTEP (hash, *cur);
-	  cur++;
-	}
+      /* Slower version for identifiers containing UCNs (or $).  */
+      do {
+	while (ISIDNUM (*pfile->buffer->cur))
+	  pfile->buffer->cur++;
+      } while (forms_identifier_p (pfile, false));
+      result = _cpp_interpret_identifier (pfile, base,
+					  pfile->buffer->cur - base);
     }
-  len = cur - base;
-  hash = HT_HASHFINISH (hash, len);
+  else
+    {
+      len = cur - base;
+      hash = HT_HASHFINISH (hash, len);
 
-  result = (cpp_hashnode *)
-    ht_lookup_with_hash (pfile->hash_table, base, len, hash, HT_ALLOC);
+      result = (cpp_hashnode *)
+	ht_lookup_with_hash (pfile->hash_table, base, len, hash, HT_ALLOC);
+    }
 
   /* Rarely, identifiers require diagnostics when lexed.  */
   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
@@ -922,7 +922,7 @@ _cpp_lex_direct (cpp_reader *pfile)
     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
     case 'Y': case 'Z':
       result->type = CPP_NAME;
-      result->val.node = lex_identifier (pfile, buffer->cur - 1);
+      result->val.node = lex_identifier (pfile, buffer->cur - 1, false);
 
       /* Convert named operators to their proper types.  */
       if (result->val.node->flags & NODE_OPERATOR)
@@ -1155,7 +1155,7 @@ _cpp_lex_direct (cpp_reader *pfile)
 	if (forms_identifier_p (pfile, true))
 	  {
 	    result->type = CPP_NAME;
-	    result->val.node = lex_identifier (pfile, base);
+	    result->val.node = lex_identifier (pfile, base, true);
 	    break;
 	  }
 	buffer->cur++;
@@ -1180,19 +1180,56 @@ cpp_token_len (const cpp_token *token)
     {
     default:		len = 4;				break;
     case SPELL_LITERAL:	len = token->val.str.len;		break;
-    case SPELL_IDENT:	len = NODE_LEN (token->val.node);	break;
+    case SPELL_IDENT:	len = NODE_LEN (token->val.node) * 10;	break;
     }
 
   return len;
 }
 
+/* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
+   Return the number of bytes read out of NAME.  (There are always
+   10 bytes written to BUFFER.)  */
+
+static size_t
+utf8_to_ucn (unsigned char *buffer, const unsigned char *name)
+{
+  int j;
+  int ucn_len = 0;
+  int ucn_len_c;
+  unsigned t;
+  unsigned long utf32;
+  
+  /* Compute the length of the UTF-8 sequence.  */
+  for (t = *name; t & 0x80; t <<= 1)
+    ucn_len++;
+  
+  utf32 = *name & (0x7F >> ucn_len);
+  for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++)
+    {
+      utf32 = (utf32 << 6) | (*++name & 0x3F);
+      
+      /* Ill-formed UTF-8.  */
+      if ((*name & ~0x3F) != 0x80)
+	abort ();
+    }
+  
+  *buffer++ = '\\';
+  *buffer++ = 'U';
+  for (j = 7; j >= 0; j--)
+    *buffer++ = "0123456789abcdef"[(utf32 >> (4 * j)) & 0xF];
+  return ucn_len;
+}
+
+
 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
    already contain the enough space to hold the token's spelling.
    Returns a pointer to the character after the last character written.
+   FORSTRING is true if this is to be the spelling after translation
+   phase 1 (this is different for UCNs).
    FIXME: Would be nice if we didn't need the PFILE argument.  */
 unsigned char *
 cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
-		 unsigned char *buffer)
+		 unsigned char *buffer, bool forstring)
 {
   switch (TOKEN_SPELL (token))
     {
@@ -1216,8 +1253,26 @@ cpp_spell_token (cpp_reader *pfile, cons
 
     spell_ident:
     case SPELL_IDENT:
-      memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
-      buffer += NODE_LEN (token->val.node);
+      if (forstring)
+	{
+	  memcpy (buffer, NODE_NAME (token->val.node),
+		  NODE_LEN (token->val.node));
+	  buffer += NODE_LEN (token->val.node);
+	}
+      else
+	{
+	  size_t i;
+	  const unsigned char * name = NODE_NAME (token->val.node);
+	  
+	  for (i = 0; i < NODE_LEN (token->val.node); i++)
+	    if (name[i] & ~0x7F)
+	      {
+		i += utf8_to_ucn (buffer, name + i) - 1;
+		buffer += 10;
+	      }
+	    else
+	      *buffer++ = NODE_NAME (token->val.node)[i];
+	}
       break;
 
     case SPELL_LITERAL:
@@ -1242,7 +1297,7 @@ cpp_token_as_text (cpp_reader *pfile, co
   unsigned int len = cpp_token_len (token) + 1;
   unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
 
-  end = cpp_spell_token (pfile, token, start);
+  end = cpp_spell_token (pfile, token, start, false);
   end[0] = '\0';
 
   return start;
@@ -1286,8 +1341,21 @@ cpp_output_token (const cpp_token *token
 
     spell_ident:
     case SPELL_IDENT:
-      fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
-    break;
+      {
+	size_t i;
+	const unsigned char * name = NODE_NAME (token->val.node);
+	
+	for (i = 0; i < NODE_LEN (token->val.node); i++)
+	  if (name[i] & ~0x7F)
+	    {
+	      unsigned char buffer[10];
+	      i += utf8_to_ucn (buffer, name + i) - 1;
+	      fwrite (buffer, 1, 10, fp);
+	    }
+	  else
+	    fputc (NODE_NAME (token->val.node)[i], fp);
+      }
+      break;
 
     case SPELL_LITERAL:
       fwrite (token->val.str.text, 1, token->val.str.len, fp);
Index: libcpp/macro.c
===================================================================
RCS file: /cvs/gcc/gcc/libcpp/macro.c,v
retrieving revision 1.10
diff -u -p -u -p -r1.10 macro.c
--- libcpp/macro.c	4 Mar 2005 15:33:23 -0000	1.10
+++ libcpp/macro.c	11 Mar 2005 01:53:55 -0000
@@ -352,6 +352,19 @@ stringify_arg (cpp_reader *pfile, macro_
       escape_it = (token->type == CPP_STRING || token->type == CPP_WSTRING
 		   || token->type == CPP_CHAR || token->type == CPP_WCHAR);
 
+      if (! CPP_OPTION (pfile, cplusplus) && token->type == CPP_NAME)
+	{
+	  size_t s;
+	  for (s = 0; s < NODE_LEN (token->val.node); s++)
+	    if (NODE_NAME (token->val.node)[s] & ~0x7F)
+	      {
+		cpp_error (pfile, CPP_DL_ERROR,
+			   "stringizing identifier containing UCN "
+			   "is supported only in C++");
+		break;
+	      }
+	}
+
       /* Room for each char being written in octal, initial space and
 	 final quote and NUL.  */
       len = cpp_token_len (token);
@@ -380,12 +393,12 @@ stringify_arg (cpp_reader *pfile, macro_
 	{
 	  _cpp_buff *buff = _cpp_get_buff (pfile, len);
 	  unsigned char *buf = BUFF_FRONT (buff);
-	  len = cpp_spell_token (pfile, token, buf) - buf;
+	  len = cpp_spell_token (pfile, token, buf, true) - buf;
 	  dest = cpp_quote_string (dest, buf, len);
 	  _cpp_release_buff (pfile, buff);
 	}
       else
-	dest = cpp_spell_token (pfile, token, dest);
+	dest = cpp_spell_token (pfile, token, dest, true);
 
       if (token->type == CPP_OTHER && token->val.str.text[0] == '\\')
 	backslash_count++;
@@ -422,7 +435,7 @@ paste_tokens (cpp_reader *pfile, const c
   lhs = *plhs;
   len = cpp_token_len (lhs) + cpp_token_len (rhs) + 1;
   buf = alloca (len);
-  end = cpp_spell_token (pfile, lhs, buf);
+  end = cpp_spell_token (pfile, lhs, buf, false);
 
   /* Avoid comment headers, since they are still processed in stage 3.
      It is simpler to insert a space here, rather than modifying the
@@ -430,7 +443,7 @@ paste_tokens (cpp_reader *pfile, const c
      false doesn't work, since we want to clear the PASTE_LEFT flag.  */
   if (lhs->type == CPP_DIV && rhs->type != CPP_EQ)
     *end++ = ' ';
-  end = cpp_spell_token (pfile, rhs, end);
+  end = cpp_spell_token (pfile, rhs, end, false);
   *end = '\n';
 
   cpp_push_buffer (pfile, buf, end - buf, /* from_stage3 */ true);
@@ -1751,7 +1764,7 @@ cpp_macro_definition (cpp_reader *pfile,
 	      buffer += NODE_LEN (macro->params[token->val.arg_no - 1]);
 	    }
 	  else
-	    buffer = cpp_spell_token (pfile, token, buffer);
+	    buffer = cpp_spell_token (pfile, token, buffer, false);
 
 	  if (token->flags & PASTE_LEFT)
 	    {
Index: libcpp/include/cpplib.h
===================================================================
RCS file: /cvs/gcc/gcc/libcpp/include/cpplib.h,v
retrieving revision 1.9
diff -u -p -u -p -r1.9 cpplib.h
--- libcpp/include/cpplib.h	20 Feb 2005 17:01:32 -0000	1.9
+++ libcpp/include/cpplib.h	11 Mar 2005 01:53:55 -0000
@@ -637,7 +637,7 @@ extern unsigned int cpp_errors (cpp_read
 extern unsigned int cpp_token_len (const cpp_token *);
 extern unsigned char *cpp_token_as_text (cpp_reader *, const cpp_token *);
 extern unsigned char *cpp_spell_token (cpp_reader *, const cpp_token *,
-				       unsigned char *);
+				       unsigned char *, bool);
 extern void cpp_register_pragma (cpp_reader *, const char *, const char *,
 				 void (*) (cpp_reader *), bool);
 extern void cpp_handle_deferred_pragma (cpp_reader *, const cpp_string *);
Index: gcc/c-lex.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/c-lex.c,v
retrieving revision 1.242
diff -u -p -u -p -r1.242 c-lex.c
--- gcc/c-lex.c	27 Oct 2004 17:24:20 -0000	1.242
+++ gcc/c-lex.c	11 Mar 2005 01:53:56 -0000
@@ -420,7 +420,7 @@ c_lex_with_flags (tree *value, unsigned 
       {
 	unsigned char name[4];
 	
-	*cpp_spell_token (parse_in, tok, name) = 0;
+	*cpp_spell_token (parse_in, tok, name, true) = 0;
 	
 	error ("stray %qs in program", name);
       }
Index: gcc/testsuite/g++.dg/cpp/ucnid-1.C
===================================================================
RCS file: gcc/testsuite/g++.dg/cpp/ucnid-1.C
diff -N gcc/testsuite/g++.dg/cpp/ucnid-1.C
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ gcc/testsuite/g++.dg/cpp/ucnid-1.C	11 Mar 2005 01:54:28 -0000
@@ -0,0 +1,15 @@
+/* { dg-do run } */
+#include <cstdlib>
+#include <cstring>
+
+#define str(t) #t
+
+int main (void)
+{
+  const char s[] = str (\u30b2);
+
+  if (strcmp (s, "\u30b2") != 0)
+    abort ();
+  
+  return 0;
+}
Index: gcc/testsuite/g++.dg/other/ucnid-1.C
===================================================================
RCS file: gcc/testsuite/g++.dg/other/ucnid-1.C
diff -N gcc/testsuite/g++.dg/other/ucnid-1.C
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ gcc/testsuite/g++.dg/other/ucnid-1.C	11 Mar 2005 01:54:32 -0000
@@ -0,0 +1,25 @@
+/* { dg-do run } */
+#include <cstdlib>
+
+int \u00C0(void) { return 1; }
+int \u00C1(void) { return 2; }
+int \U000000C2(void) { return 3; }
+int wh\u00ff(void) { return 4; }
+int a\u00c4b\u0441\U000003b4e(void) { return 5; }
+
+int main (void)
+{
+  
+  if (\u00C0() != 1)
+    abort ();
+  if (\u00c1() != 2)
+    abort ();
+  if (\u00C2() != 3)
+    abort ();
+  if (wh\u00ff() != 4)
+    abort ();
+  if (a\u00c4b\u0441\U000003b4e() != 5)
+    abort ();
+  
+  return 0;
+}
Index: gcc/testsuite/gcc.dg/ucnid-1.c
===================================================================
RCS file: gcc/testsuite/gcc.dg/ucnid-1.c
diff -N gcc/testsuite/gcc.dg/ucnid-1.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ gcc/testsuite/gcc.dg/ucnid-1.c	11 Mar 2005 01:54:58 -0000
@@ -0,0 +1,25 @@
+/* { dg-do run } */
+/* { dg-options "-std=c99" } */
+void abort (void);
+
+int main (void)
+{
+  int \u00C0 = 1;
+  int \u00C1 = 2;
+  int \U000000C2 = 3;
+  int wh\u00ff = 4;
+  int a\u00c4b\u0441\U000003b4e = 5;
+  
+  if (\u00C0 != 1)
+    abort ();
+  if (\u00c1 != 2)
+    abort ();
+  if (\u00C2 != 3)
+    abort ();
+  if (wh\u00ff != 4)
+    abort ();
+  if (a\u00c4b\u0441\U000003b4e != 5)
+    abort ();
+  
+  return 0;
+}
Index: gcc/testsuite/gcc.dg/ucnid-2.c
===================================================================
RCS file: gcc/testsuite/gcc.dg/ucnid-2.c
diff -N gcc/testsuite/gcc.dg/ucnid-2.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ gcc/testsuite/gcc.dg/ucnid-2.c	11 Mar 2005 01:54:58 -0000
@@ -0,0 +1,26 @@
+/* { dg-do run } */
+/* { dg-options "-std=c99" } */
+void abort (void);
+
+static int \u00C0 = 1;
+static int \u00C1 = 2;
+static int \U000000C2 = 3;
+static int wh\u00ff = 4;
+static int a\u00c4b\u0441\U000003b4e = 5;
+
+int main (void)
+{
+  
+  if (\u00C0 != 1)
+    abort ();
+  if (\u00c1 != 2)
+    abort ();
+  if (\u00C2 != 3)
+    abort ();
+  if (wh\u00ff != 4)
+    abort ();
+  if (a\u00c4b\u0441\U000003b4e != 5)
+    abort ();
+  
+  return 0;
+}
Index: gcc/testsuite/gcc.dg/ucnid-3.c
===================================================================
RCS file: gcc/testsuite/gcc.dg/ucnid-3.c
diff -N gcc/testsuite/gcc.dg/ucnid-3.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ gcc/testsuite/gcc.dg/ucnid-3.c	11 Mar 2005 01:54:58 -0000
@@ -0,0 +1,26 @@
+/* { dg-do run } */
+/* { dg-options "-std=c99" } */
+void abort (void);
+
+int \u00C0 = 1;
+int \u00C1 = 2;
+int \U000000C2 = 3;
+int wh\u00ff = 4;
+int a\u00c4b\u0441\U000003b4e = 5;
+
+int main (void)
+{
+  
+  if (\u00C0 != 1)
+    abort ();
+  if (\u00c1 != 2)
+    abort ();
+  if (\u00C2 != 3)
+    abort ();
+  if (wh\u00ff != 4)
+    abort ();
+  if (a\u00c4b\u0441\U000003b4e != 5)
+    abort ();
+  
+  return 0;
+}
Index: gcc/testsuite/gcc.dg/ucnid-4.c
===================================================================
RCS file: gcc/testsuite/gcc.dg/ucnid-4.c
diff -N gcc/testsuite/gcc.dg/ucnid-4.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ gcc/testsuite/gcc.dg/ucnid-4.c	11 Mar 2005 01:54:58 -0000
@@ -0,0 +1,26 @@
+/* { dg-do run } */
+/* { dg-options "-std=c99" } */
+void abort (void);
+
+int \u00C0(void) { return 1; }
+int \u00C1(void) { return 2; }
+int \U000000C2(void) { return 3; }
+int wh\u00ff(void) { return 4; }
+int a\u00c4b\u0441\U000003b4e(void) { return 5; }
+
+int main (void)
+{
+  
+  if (\u00C0() != 1)
+    abort ();
+  if (\u00c1() != 2)
+    abort ();
+  if (\u00C2() != 3)
+    abort ();
+  if (wh\u00ff() != 4)
+    abort ();
+  if (a\u00c4b\u0441\U000003b4e() != 5)
+    abort ();
+  
+  return 0;
+}
Index: gcc/testsuite/gcc.dg/ucnid-6.c
===================================================================
RCS file: gcc/testsuite/gcc.dg/ucnid-6.c
diff -N gcc/testsuite/gcc.dg/ucnid-6.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ gcc/testsuite/gcc.dg/ucnid-6.c	11 Mar 2005 01:54:58 -0000
@@ -0,0 +1,26 @@
+/* { dg-do run } */
+/* { dg-options "-std=c99 -save-temps" } */
+void abort (void);
+
+int \u00C0(void) { return 1; }
+int \u00C1(void) { return 2; }
+int \U000000C2(void) { return 3; }
+int wh\u00ff(void) { return 4; }
+int a\u00c4b\u0441\U000003b4e(void) { return 5; }
+
+int main (void)
+{
+  
+  if (\u00C0() != 1)
+    abort ();
+  if (\u00c1() != 2)
+    abort ();
+  if (\u00C2() != 3)
+    abort ();
+  if (wh\u00ff() != 4)
+    abort ();
+  if (a\u00c4b\u0441\U000003b4e() != 5)
+    abort ();
+  
+  return 0;
+}
Index: gcc/testsuite/gcc.dg/cpp/ucnid-1.c
===================================================================
RCS file: gcc/testsuite/gcc.dg/cpp/ucnid-1.c
diff -N gcc/testsuite/gcc.dg/cpp/ucnid-1.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ gcc/testsuite/gcc.dg/cpp/ucnid-1.c	11 Mar 2005 01:55:00 -0000
@@ -0,0 +1,26 @@
+/* { dg-do run } */
+/* { dg-options "-std=c99" } */
+void abort (void);
+
+#define \u00C0 1
+#define \u00C1 2
+#define \U000000C2 3
+#define wh\u00ff 4
+#define a\u00c4b\u0441\U000003b4e 5
+
+int main (void)
+{
+  
+  if (\u00C0 != 1)
+    abort ();
+  if (\u00c1 != 2)
+    abort ();
+  if (\u00C2 != 3)
+    abort ();
+  if (wh\u00ff != 4)
+    abort ();
+  if (a\u00c4b\u0441\U000003b4e != 5)
+    abort ();
+  
+  return 0;
+}
Index: gcc/testsuite/gcc.dg/cpp/ucnid-2.c
===================================================================
RCS file: gcc/testsuite/gcc.dg/cpp/ucnid-2.c
diff -N gcc/testsuite/gcc.dg/cpp/ucnid-2.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ gcc/testsuite/gcc.dg/cpp/ucnid-2.c	11 Mar 2005 01:55:00 -0000
@@ -0,0 +1,4 @@
+/* { dg-do preprocess } */
+/* { dg-options "-std=c99" } */
+#define str(x) #x
+const char s[] = str(\u00d3);  /* { dg-error "identifier containing UCN" } */
Index: gcc/testsuite/gcc.dg/cpp/ucnid-3.c
===================================================================
RCS file: gcc/testsuite/gcc.dg/cpp/ucnid-3.c
diff -N gcc/testsuite/gcc.dg/cpp/ucnid-3.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ gcc/testsuite/gcc.dg/cpp/ucnid-3.c	11 Mar 2005 01:55:00 -0000
@@ -0,0 +1,7 @@
+/* { dg-do compile } */
+/* { dg-options "-std=c99" } */
+
+#define paste(x, y) x ## y
+
+int paste(\u00aa, \u0531) = 3;
+
============================================================
Follow-Ups:
- Re: UCNs in identifiers
  - From: Joseph S. Myers
- Re: UCNs in identifiers
  - From: Neil Booth
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]