This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Fix wide charater literals (take 2)


On Tue, Mar 12, 2002 at 06:31:51PM +0000, Neil Booth wrote:
> Jakub Jelinek wrote:-
> 
> > Here is a fix (note that pre-3.0 c-lex used here token_getch which
> > gave unsigned character in int variable, not signed).
> 
> > --- gcc/c-lex.c.jj	Fri Feb  1 14:34:16 2002
> > +++ gcc/c-lex.c	Tue Mar 12 16:46:12 2002
> > @@ -1333,7 +1333,7 @@ lex_string (str, len, wide)
> >  	  c = wc;
> >  	}
> >  #else
> > -      c = *p++;
> > +      c = *(const unsigned char *) p++;
> >  #endif
> >  
> 
> I think you should make p and limit a const unsigned char *.   Then,
> rather than adding a cast, you can take a lot away, including the caller
> and the functions it calls.
> 
> Note that just above the above lines there is another c = *p++; which
> has no reason to be different.

You're right.
Here is what I have bootstrapped with no regressions on i386-redhat-linux.
Ok to commit this instead?

2002-03-12  Jakub Jelinek  <jakub@redhat.com>

	* c-lex.c (cb_ident, c_lex): Remove unnecessary cast.
	(lex_string): Use unsigned char pointers.

	* gcc.c-torture/execute/wchar_t-1.c: New test.

--- gcc/c-lex.c.jj	Fri Feb  1 14:34:16 2002
+++ gcc/c-lex.c	Tue Mar 12 20:51:43 2002
@@ -85,7 +85,8 @@ static int ignore_escape_flag;
 
 static void parse_float		PARAMS ((PTR));
 static tree lex_number		PARAMS ((const char *, unsigned int));
-static tree lex_string		PARAMS ((const char *, unsigned int, int));
+static tree lex_string		PARAMS ((const unsigned char *, unsigned int,
+					 int));
 static tree lex_charconst	PARAMS ((const cpp_token *));
 static void update_header_times	PARAMS ((const char *));
 static int dump_one_header	PARAMS ((splay_tree_node, void *));
@@ -239,7 +240,7 @@ cb_ident (pfile, line, str)
   if (! flag_no_ident)
     {
       /* Convert escapes in the string.  */
-      tree value = lex_string ((const char *)str->text, str->len, 0);
+      tree value = lex_string (str->text, str->len, 0);
       ASM_OUTPUT_IDENT (asm_out_file, TREE_STRING_POINTER (value));
     }
 #endif
@@ -807,8 +808,8 @@ c_lex (value)
 
     case CPP_STRING:
     case CPP_WSTRING:
-      *value = lex_string ((const char *)tok->val.str.text,
-			   tok->val.str.len, tok->type == CPP_WSTRING);
+      *value = lex_string (tok->val.str.text, tok->val.str.len,
+			   tok->type == CPP_WSTRING);
       break;
 
       /* These tokens should not be visible outside cpplib.  */
@@ -1297,14 +1298,14 @@ lex_number (str, len)
 
 static tree
 lex_string (str, len, wide)
-     const char *str;
+     const unsigned char *str;
      unsigned int len;
      int wide;
 {
   tree value;
   char *buf = alloca ((len + 1) * (wide ? WCHAR_BYTES : 1));
   char *q = buf;
-  const char *p = str, *limit = str + len;
+  const unsigned char *p = str, *limit = str + len;
   unsigned int c;
   unsigned width = wide ? WCHAR_TYPE_SIZE
 			: TYPE_PRECISION (char_type_node);
@@ -1320,7 +1321,7 @@ lex_string (str, len, wide)
       wchar_t wc;
       int char_len;
 
-      char_len = local_mbtowc (&wc, p, limit - p);
+      char_len = local_mbtowc (&wc, (const char *) p, limit - p);
       if (char_len == -1)
 	{
 	  warning ("ignoring invalid multibyte character");
@@ -1344,8 +1345,7 @@ lex_string (str, len, wide)
 	    mask = ((unsigned int) 1 << width) - 1;
 	  else
 	    mask = ~0;
-	  c = cpp_parse_escape (parse_in, (const unsigned char **) &p,
-				(const unsigned char *) limit,
+	  c = cpp_parse_escape (parse_in, &p, limit,
 				mask, flag_traditional);
 	}
 	
--- gcc/testsuite/gcc.c-torture/execute/wchar_t-1.c.jj	Tue Mar 12 16:58:40 2002
+++ gcc/testsuite/gcc.c-torture/execute/wchar_t-1.c	Tue Mar 12 16:53:00 2002
@@ -0,0 +1,16 @@
+typedef __WCHAR_TYPE__ wchar_t;
+wchar_t x[] = L"Ä";
+wchar_t y = L'Ä';
+extern void abort (void);
+extern void exit (int);
+
+int main (void)
+{
+  if (sizeof (x) / sizeof (wchar_t) != 2)
+    abort ();
+  if (x[0] != L'Ä' || x[1] != L'\0')
+    abort ();
+  if (y != L'Ä')
+    abort ();
+  exit (0);
+}

	Jakub


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]