This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]

Unreviewed-patch: common cpp/C front-end charconst handling


The original patch was submitted about 3 wks ago as

http://gcc.gnu.org/ml/gcc-patches/2001-03/msg00354.html

OK to commit, trunk only?

Neil.

        * c-lex.c (lex_charconst): Convert into a simple wrapper
        around cpp_interpret_charconst, to which most of the code
        body is moved.
        * cppexp.c (MAX_CHAR_TYPE_SIZE, MAX_WCHAR_TYPE_SIZE,
        MAX_LONG_TYPE_SIZE, MAX_INT_TYPE_SIZE, MAX_CHAR_TYPE_MASK,
        MAX_WCHAR_TYPE_MASK, parse_escape, parse_charconst): Remove.
        (lex): Use cpp_interpret_charconst.
        * cpplex.c (parse_escape, read_ucs, cpp_interpret_charconst,
        hex_digit_value): New functions.
        (MAX_CHAR_TYPE_SIZE, MAX_WCHAR_TYPE_SIZE): New macros.
        * cpplib.h (cpp_interpret_charconst): New prototype.
        * Makefile.in: Update.

Index: gcc/Makefile.in
===================================================================
RCS file: /cvs/gcc/gcc/gcc/Makefile.in,v
retrieving revision 1.633
diff -u -p -c -r1.633 Makefile.in
*** Makefile.in	2001/03/28 17:50:11	1.633
--- Makefile.in	2001/03/28 19:43:51
*************** DRIVER_DEFINES = \
*** 1297,1302 ****
--- 1297,1304 ----
    `test "$${SHLIB_LINK}" -a "@enable_shared@" = "yes" && echo "-DENABLE_SHARED_LIBGCC"` \
    `test "$${SHLIB_MULTILIB}" && echo "-DNO_SHARED_LIBGCC_MULTILIB"`
  
+ gccnew.o: gccnew.c $(CONFIG_H) system.h intl.h
+ 
  gcc.o: gcc.c $(CONFIG_H) $(SYSTEM_H) intl.h multilib.h \
      Makefile $(lang_specs_files) prefix.h $(GCC_H)
  	(SHLIB_LINK='$(SHLIB_LINK)' \
*************** cppmain.o:  cppmain.c  $(CONFIG_H) cppli
*** 1952,1958 ****
  
  cpperror.o: cpperror.c $(CONFIG_H) $(LIBCPP_DEPS)
  cppexp.o:   cppexp.c   $(CONFIG_H) $(LIBCPP_DEPS)
! cpplex.o:   cpplex.c   $(CONFIG_H) $(LIBCPP_DEPS)
  cppmacro.o: cppmacro.c $(CONFIG_H) $(LIBCPP_DEPS)
  cpplib.o:   cpplib.c   $(CONFIG_H) $(LIBCPP_DEPS) $(OBSTACK_H)
  cpphash.o:  cpphash.c  $(CONFIG_H) $(LIBCPP_DEPS) $(OBSTACK_H)
--- 1954,1960 ----
  
  cpperror.o: cpperror.c $(CONFIG_H) $(LIBCPP_DEPS)
  cppexp.o:   cppexp.c   $(CONFIG_H) $(LIBCPP_DEPS)
! cpplex.o:   cpplex.c   $(CONFIG_H) $(LIBCPP_DEPS) mbchar.h
  cppmacro.o: cppmacro.c $(CONFIG_H) $(LIBCPP_DEPS)
  cpplib.o:   cpplib.c   $(CONFIG_H) $(LIBCPP_DEPS) $(OBSTACK_H)
  cpphash.o:  cpphash.c  $(CONFIG_H) $(LIBCPP_DEPS) $(OBSTACK_H)
Index: gcc/c-lex.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/c-lex.c,v
retrieving revision 1.133
diff -u -p -c -r1.133 c-lex.c
*** c-lex.c	2001/03/07 01:31:59	1.133
--- c-lex.c	2001/03/28 19:43:57
*************** static const char *read_ucs 	PARAMS ((co
*** 86,92 ****
  static void parse_float		PARAMS ((PTR));
  static tree lex_number		PARAMS ((const char *, unsigned int));
  static tree lex_string		PARAMS ((const char *, unsigned int, int));
! static tree lex_charconst	PARAMS ((const char *, unsigned int, int));
  static void update_header_times	PARAMS ((const char *));
  static int dump_one_header	PARAMS ((splay_tree_node, void *));
  static void cb_ident		PARAMS ((cpp_reader *, const cpp_string *));
--- 86,92 ----
  static void parse_float		PARAMS ((PTR));
  static tree lex_number		PARAMS ((const char *, unsigned int));
  static tree lex_string		PARAMS ((const char *, unsigned int, int));
! static tree lex_charconst	PARAMS ((const cpp_token *));
  static void update_header_times	PARAMS ((const char *));
  static int dump_one_header	PARAMS ((splay_tree_node, void *));
  static void cb_ident		PARAMS ((cpp_reader *, const cpp_string *));
*************** c_lex (value)
*** 1008,1015 ****
  
      case CPP_CHAR:
      case CPP_WCHAR:
!       *value = lex_charconst ((const char *)tok.val.str.text,
! 			      tok.val.str.len, tok.type == CPP_WCHAR);
        break;
  
      case CPP_STRING:
--- 1008,1014 ----
  
      case CPP_CHAR:
      case CPP_WCHAR:
!       *value = lex_charconst (&tok);
        break;
  
      case CPP_STRING:
*************** lex_string (str, len, wide)
*** 1608,1718 ****
  }
  
  static tree
! lex_charconst (str, len, wide)
!      const char *str;
!      unsigned int len;
!      int wide;
  {
!   const char *limit = str + len;
!   int result = 0;
!   int num_chars = 0;
!   int chars_seen = 0;
!   unsigned width = TYPE_PRECISION (char_type_node);
!   int max_chars;
!   unsigned int c;
    tree value;
  
! #ifdef MULTIBYTE_CHARS
!   int longest_char = local_mb_cur_max ();
!   (void) local_mbtowc (NULL_PTR, NULL_PTR, 0);
! #endif
! 
!   max_chars = TYPE_PRECISION (integer_type_node) / width;
!   if (wide)
!     width = WCHAR_TYPE_SIZE;
! 
!   while (str < limit)
      {
! #ifdef MULTIBYTE_CHARS
!       wchar_t wc;
!       int char_len;
! 
!       char_len = local_mbtowc (&wc, str, limit - str);
!       if (char_len == -1)
! 	{
! 	  warning ("Ignoring invalid multibyte character");
! 	  char_len = 1;
! 	  c = *str++;
! 	}
!       else
! 	{
! 	  str += char_len;
! 	  c = wc;
! 	}
! #else
!       c = *str++;
! #endif
! 
!       ++chars_seen;
!       if (c == '\\')
! 	{
! 	  str = readescape (str, limit, &c);
! 	  if (width < HOST_BITS_PER_INT
! 	      && (unsigned) c >= ((unsigned)1 << width))
! 	    pedwarn ("escape sequence out of range for character");
! 	}
! #ifdef MAP_CHARACTER
!       if (ISPRINT (c))
! 	c = MAP_CHARACTER (c);
! #endif
!       
!       /* Merge character into result; ignore excess chars.  */
!       num_chars += (width / TYPE_PRECISION (char_type_node));
!       if (num_chars < max_chars + 1)
! 	{
! 	  if (width < HOST_BITS_PER_INT)
! 	    result = (result << width) | (c & ((1 << width) - 1));
! 	  else
! 	    result = c;
! 	}
!     }
! 
!   if (chars_seen == 0)
!     error ("empty character constant");
!   else if (num_chars > max_chars)
!     {
!       num_chars = max_chars;
!       error ("character constant too long");
      }
!   else if (chars_seen != 1 && ! flag_traditional && warn_multichar)
!     warning ("multi-character character constant");
! 
!   /* If char type is signed, sign-extend the constant.  */
!   if (! wide)
      {
!       int num_bits = num_chars * width;
!       if (num_bits == 0)
! 	/* We already got an error; avoid invalid shift.  */
! 	value = build_int_2 (0, 0);
!       else if (TREE_UNSIGNED (char_type_node)
! 	       || ((result >> (num_bits - 1)) & 1) == 0)
! 	value = build_int_2 (result & (~(unsigned HOST_WIDE_INT) 0
! 				       >> (HOST_BITS_PER_WIDE_INT - num_bits)),
! 			     0);
        else
! 	value = build_int_2 (result | ~(~(unsigned HOST_WIDE_INT) 0
! 					>> (HOST_BITS_PER_WIDE_INT - num_bits)),
! 			     -1);
!       /* In C, a character constant has type 'int'; in C++, 'char'.  */
!       if (chars_seen <= 1 && c_language == clk_cplusplus)
  	TREE_TYPE (value) = char_type_node;
        else
  	TREE_TYPE (value) = integer_type_node;
-     }
-   else
-     {
-       value = build_int_2 (result, 0);
-       TREE_TYPE (value) = wchar_type_node;
      }
  
    return value;
--- 1607,1639 ----
  }
  
  static tree
! lex_charconst (token)
!      const cpp_token *token;
  {
!   HOST_WIDE_INT result;
    tree value;
+   unsigned int chars_seen;
  
!   result = cpp_interpret_charconst (parse_in, token, warn_multichar,
! 				    flag_traditional, &chars_seen);
!   if (token->type == CPP_WCHAR)
      {
!       value = build_int_2 (result, 0);
!       TREE_TYPE (value) = wchar_type_node;
      }
!   else
      {
!       if (result < 0)
! 	value = build_int_2 (result, -1);
        else
! 	value = build_int_2 (result, 0);
! 
!       /* In C, a character constant has type 'int'.
! 	 In C++ 'char', but multi-char charconsts have type 'int'.  */
!       if (c_language == clk_cplusplus && chars_seen <= 1)
  	TREE_TYPE (value) = char_type_node;
        else
  	TREE_TYPE (value) = integer_type_node;
      }
  
    return value;
Index: gcc/cppexp.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/cppexp.c,v
retrieving revision 1.90
diff -u -p -c -r1.90 cppexp.c
*** cppexp.c	2001/03/03 11:32:31	1.90
--- cppexp.c	2001/03/28 19:44:01
*************** along with this program; if not, write t
*** 18,54 ****
  Foundation, 59 Temple Place - Suite 330,
  Boston, MA 02111-1307, USA.  */
  
- /* Parse a C expression from text in a string  */
-    
  #include "config.h"
  #include "system.h"
  #include "cpplib.h"
  #include "cpphash.h"
  
- #ifndef MAX_CHAR_TYPE_SIZE
- #define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
- #endif
- 
- #ifndef MAX_INT_TYPE_SIZE
- #define MAX_INT_TYPE_SIZE INT_TYPE_SIZE
- #endif
- 
- #ifndef MAX_LONG_TYPE_SIZE
- #define MAX_LONG_TYPE_SIZE LONG_TYPE_SIZE
- #endif
- 
- #ifndef MAX_WCHAR_TYPE_SIZE
- #define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
- #endif
- 
- #define MAX_CHAR_TYPE_MASK (MAX_CHAR_TYPE_SIZE < HOST_BITS_PER_WIDEST_INT \
- 		    ? (~(~(HOST_WIDEST_INT) 0 << MAX_CHAR_TYPE_SIZE)) \
- 		    : ~ (HOST_WIDEST_INT) 0)
- 
- #define MAX_WCHAR_TYPE_MASK (MAX_WCHAR_TYPE_SIZE < HOST_BITS_PER_WIDEST_INT \
- 			     ? ~(~(HOST_WIDEST_INT) 0 << MAX_WCHAR_TYPE_SIZE) \
- 			     : ~ (HOST_WIDEST_INT) 0)
- 
  /* Yield nonzero if adding two numbers with A's and B's signs can yield a
     number with SUM's sign, where A, B, and SUM are all C integers.  */
  #define possible_sum_sign(a, b, sum) ((((a) ^ (b)) | ~ ((a) ^ (sum))) < 0)
--- 18,28 ----
*************** static HOST_WIDEST_INT right_shift PARAM
*** 61,70 ****
  					    unsigned int,
  					    unsigned HOST_WIDEST_INT));
  static struct op parse_number PARAMS ((cpp_reader *, const cpp_token *));
- static struct op parse_charconst PARAMS ((cpp_reader *, const cpp_token *));
  static struct op parse_defined PARAMS ((cpp_reader *));
- static HOST_WIDEST_INT parse_escape PARAMS ((cpp_reader *, const U_CHAR **,
- 					     const U_CHAR *, HOST_WIDEST_INT));
  static struct op lex PARAMS ((cpp_reader *, int, cpp_token *));
  static const unsigned char *op_as_text PARAMS ((cpp_reader *, enum cpp_ttype));
  
--- 35,41 ----
*************** parse_number (pfile, tok)
*** 238,318 ****
    return op;
  }
  
- /* Parse and convert a character constant for #if.  Understands backslash
-    escapes (\n, \031) and multibyte characters (if so configured).  */
- static struct op
- parse_charconst (pfile, tok)
-      cpp_reader *pfile;
-      const cpp_token *tok;
- {
-   struct op op;
-   HOST_WIDEST_INT result = 0;
-   int num_chars = 0;
-   int num_bits;
-   unsigned int width = MAX_CHAR_TYPE_SIZE;
-   HOST_WIDEST_INT mask = MAX_CHAR_TYPE_MASK;
-   int max_chars;
-   const U_CHAR *ptr = tok->val.str.text;
-   const U_CHAR *end = ptr + tok->val.str.len;
- 
-   int c = -1;
- 
-   if (tok->type == CPP_WCHAR)
-     width = MAX_WCHAR_TYPE_SIZE, mask = MAX_WCHAR_TYPE_MASK;
-   max_chars = MAX_LONG_TYPE_SIZE / width;
- 
-   while (ptr < end)
-     {
-       c = *ptr++;
-       if (c == '\'')
- 	CPP_ICE ("unescaped ' in character constant");
-       else if (c == '\\')
- 	{
- 	  c = parse_escape (pfile, &ptr, end, mask);
- 	  if (width < HOST_BITS_PER_INT
- 	      && (unsigned int) c >= (unsigned int)(1 << width))
- 	    cpp_pedwarn (pfile,
- 			 "escape sequence out of range for character");
- 	}
- 	  
-       /* Merge character into result; ignore excess chars.  */
-       if (++num_chars <= max_chars)
- 	{
- 	  if (width < HOST_BITS_PER_INT)
- 	    result = (result << width) | (c & ((1 << width) - 1));
- 	  else
- 	    result = c;
- 	}
-     }
- 
-   if (num_chars == 0)
-     SYNTAX_ERROR ("empty character constant");
-   else if (num_chars > max_chars)
-     SYNTAX_ERROR ("character constant too long");
-   else if (num_chars != 1)
-     cpp_warning (pfile, "multi-character character constant");
- 
-   /* If char type is signed, sign-extend the constant.  */
-   num_bits = num_chars * width;
-       
-   if (pfile->spec_nodes.n__CHAR_UNSIGNED__->type == NT_MACRO
-       || ((result >> (num_bits - 1)) & 1) == 0)
-     op.value = result & ((unsigned HOST_WIDEST_INT) ~0
- 			 >> (HOST_BITS_PER_WIDEST_INT - num_bits));
-   else
-     op.value = result | ~((unsigned HOST_WIDEST_INT) ~0
- 			  >> (HOST_BITS_PER_WIDEST_INT - num_bits));
- 
-   /* This is always a signed type.  */
-   op.unsignedp = 0;
-   op.op = CPP_INT;
-   return op;
- 
-  syntax_error:
-   op.op = CPP_ERROR;
-   return op;
- }
- 
  static struct op
  parse_defined (pfile)
       cpp_reader *pfile;
--- 209,214 ----
*************** lex (pfile, skip_evaluation, token)
*** 405,411 ****
  
      case CPP_CHAR:
      case CPP_WCHAR:
!       return parse_charconst (pfile, token);
  
      case CPP_STRING:
      case CPP_WSTRING:
--- 301,315 ----
  
      case CPP_CHAR:
      case CPP_WCHAR:
!       {
! 	unsigned int chars_seen;
! 
! 	/* This is always a signed type.  */
! 	op.unsignedp = 0;
! 	op.op = CPP_INT;
! 	op.value = cpp_interpret_charconst (pfile, token, 1, 0, &chars_seen);
! 	return op;
!       }
  
      case CPP_STRING:
      case CPP_WSTRING:
*************** lex (pfile, skip_evaluation, token)
*** 492,593 ****
   syntax_error:
    op.op = CPP_ERROR;
    return op;
- }
- 
- /* Parse a C escape sequence.  STRING_PTR points to a variable
-    containing a pointer to the string to parse.  That pointer
-    is updated past the characters we use.  The value of the
-    escape sequence is returned.
- 
-    If \ is followed by 000, we return 0 and leave the string pointer
-    after the zeros.  A value of 0 does not mean end of string.  */
- 
- static HOST_WIDEST_INT
- parse_escape (pfile, string_ptr, limit, result_mask)
-      cpp_reader *pfile;
-      const U_CHAR **string_ptr;
-      const U_CHAR *limit;
-      HOST_WIDEST_INT result_mask;
- {
-   const U_CHAR *ptr = *string_ptr;
-   /* We know we have at least one following character.  */
-   int c = *ptr++;
-   switch (c)
-     {
-     case 'a': c = TARGET_BELL;	  break;
-     case 'b': c = TARGET_BS;	  break;
-     case 'f': c = TARGET_FF;	  break;
-     case 'n': c = TARGET_NEWLINE; break;
-     case 'r': c = TARGET_CR;	  break;
-     case 't': c = TARGET_TAB;	  break;
-     case 'v': c = TARGET_VT;	  break;
- 
-     case 'e': case 'E':
-       if (CPP_PEDANTIC (pfile))
- 	cpp_pedwarn (pfile, "non-ISO-standard escape sequence, '\\%c'", c);
-       c = TARGET_ESC;
-       break;
-       
-     case '0': case '1': case '2': case '3':
-     case '4': case '5': case '6': case '7':
-       {
- 	unsigned int i = c - '0';
- 	int count = 0;
- 	while (++count < 3)
- 	  {
- 	    if (ptr >= limit)
- 	      break;
- 	    
- 	    c = *ptr;
- 	    if (c < '0' || c > '7')
- 	      break;
- 	    ptr++;
- 	    i = (i << 3) + c - '0';
- 	  }
- 	if (i != (i & result_mask))
- 	  {
- 	    i &= result_mask;
- 	    cpp_pedwarn (pfile, "octal escape sequence out of range");
- 	  }
- 	c = i;
- 	break;
-       }
- 
-     case 'x':
-       {
- 	unsigned int i = 0, overflow = 0;
- 	int digits_found = 0, digit;
- 	for (;;)
- 	  {
- 	    if (ptr >= limit)
- 	      break;
- 	    c = *ptr;
- 	    if (c >= '0' && c <= '9')
- 	      digit = c - '0';
- 	    else if (c >= 'a' && c <= 'f')
- 	      digit = c - 'a' + 10;
- 	    else if (c >= 'A' && c <= 'F')
- 	      digit = c - 'A' + 10;
- 	    else
- 	      break;
- 	    ptr++;
- 	    overflow |= i ^ (i << 4 >> 4);
- 	    i = (i << 4) + digit;
- 	    digits_found = 1;
- 	  }
- 	if (!digits_found)
- 	  cpp_error (pfile, "\\x used with no following hex digits");
- 	if (overflow | (i != (i & result_mask)))
- 	  {
- 	    i &= result_mask;
- 	    cpp_pedwarn (pfile, "hex escape sequence out of range");
- 	  }
- 	c = i;
- 	break;
-       }
-     }
-   *string_ptr = ptr;
-   return c;
  }
  
  static void
--- 396,401 ----
Index: gcc/cppinternals.texi
===================================================================
RCS file: /cvs/gcc/gcc/gcc/cppinternals.texi,v
retrieving revision 1.5
diff -u -p -c -r1.5 cppinternals.texi
*** cppinternals.texi	2001/03/12 23:53:35	1.5
--- cppinternals.texi	2001/03/28 19:44:05
*************** macro is defined when we leave the heade
*** 391,397 ****
  the host supports it, we try to map suitably large files into memory,
  rather than reading them in directly.
  
! The include paths are intenally stored on a null-terminated
  singly-linked list, starting with the @code{"header.h"} directory search
  chain, which then links into the @code{<header.h>} directory chain.
  
--- 391,397 ----
  the host supports it, we try to map suitably large files into memory,
  rather than reading them in directly.
  
! The include paths are internally stored on a null-terminated
  singly-linked list, starting with the @code{"header.h"} directory search
  chain, which then links into the @code{<header.h>} directory chain.
  
Index: gcc/cpplex.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/cpplex.c,v
retrieving revision 1.136
diff -u -p -c -r1.136 cpplex.c
*** cpplex.c	2001/03/27 15:31:45	1.136
--- cpplex.c	2001/03/28 19:44:11
*************** Foundation, 59 Temple Place - Suite 330,
*** 39,44 ****
--- 39,56 ----
  #include "cpplib.h"
  #include "cpphash.h"
  
+ /* MULTIBYTE_CHARS support only works for native compilers.
+    ??? Ideally what we want is to model widechar support after
+    the current floating point support.  */
+ #ifdef CROSS_COMPILE
+ #undef MULTIBYTE_CHARS
+ #endif
+ 
+ #ifdef MULTIBYTE_CHARS
+ #include "mbchar.h"
+ #include <locale.h>
+ #endif
+ 
  /* Tokens with SPELL_STRING store their spelling in the token list,
     and it's length in the token->val.name.len.  */
  enum spell_type
*************** static void save_comment PARAMS ((cpp_re
*** 86,94 ****
--- 98,112 ----
  static void lex_percent PARAMS ((cpp_buffer *, cpp_token *));
  static void lex_dot PARAMS ((cpp_reader *, cpp_token *));
  static int name_p PARAMS ((cpp_reader *, const cpp_string *));
+ static unsigned int parse_escape PARAMS ((cpp_reader *, const unsigned char **,
+ 					  const unsigned char *, HOST_WIDE_INT,
+ 					  int));
+ static unsigned int read_ucs PARAMS ((cpp_reader *, const unsigned char **,
+ 				      const unsigned char *, unsigned int));
  
  static cpp_chunk *new_chunk PARAMS ((unsigned int));
  static int chunk_suitable PARAMS ((cpp_pool *, cpp_chunk *, unsigned int));
+ static unsigned int hex_digit_value PARAMS ((unsigned int));
  
  /* Utility routine:
  
*************** cpp_output_line (pfile, fp)
*** 1638,1643 ****
--- 1656,1986 ----
      }
  
    putc ('\n', fp);
+ }
+ 
+ /* Returns the value of a hexadecimal digit.  */
+ static unsigned int
+ hex_digit_value (c)
+      unsigned int c;
+ {
+   if (c >= 'a' && c <= 'f')
+     return c - 'a' + 10;
+   if (c >= 'A' && c <= 'F')
+     return c - 'A' + 10;
+   if (c >= '0' && c <= '9')
+     return c - '0';
+   abort ();
+ }
+ 
+ /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence (C++ and C99).
+ 
+    [lex.charset]: The character designated by the universal character
+    name \UNNNNNNNN is that character whose character short name in
+    ISO/IEC 10646 is NNNNNNNN; the character designated by the
+    universal character name \uNNNN is that character whose character
+    short name in ISO/IEC 10646 is 0000NNNN.  If the hexadecimal value
+    for a universal character name is less than 0x20 or in the range
+    0x7F-0x9F (inclusive), or if the universal character name
+    designates a character in the basic source character set, then the
+    program is ill-formed.
+ 
+    We assume that wchar_t is Unicode, so we don't need to do any
+    mapping.  Is this ever wrong?  */
+ 
+ static unsigned int
+ read_ucs (pfile, pstr, limit, length)
+      cpp_reader *pfile;
+      const unsigned char **pstr;
+      const unsigned char *limit;
+      unsigned int length;
+ {
+   const unsigned char *p = *pstr;
+   unsigned int c, code = 0;
+ 
+   for (; length; --length)
+     {
+       if (p >= limit)
+ 	{
+ 	  cpp_error (pfile, "incomplete universal-character-name");
+ 	  break;
+ 	}
+ 
+       c = *p;
+       if (ISXDIGIT (c))
+ 	{
+ 	  code = (code << 4) + hex_digit_value (c);
+ 	  p++;
+ 	}
+       else
+ 	{
+ 	  cpp_error (pfile,
+ 		     "non-hex digit '%c' in universal-character-name", c);
+ 	  break;
+ 	}
+ 
+     }
+ 
+ #ifdef TARGET_EBCDIC
+   cpp_error (pfile, "universal-character-name on EBCDIC target");
+   code = 0x3f;  /* EBCDIC invalid character */
+ #else
+   if (code > 0x9f && !(code & 0x80000000))
+     ; /* True extended character, OK.  */
+   else if (code >= 0x20 && code < 0x7f)
+     {
+       /* ASCII printable character.  The C character set consists of all of
+ 	 these except $, @ and `.  We use hex escapes so that this also
+ 	 works with EBCDIC hosts.  */
+       if (code != 0x24 && code != 0x40 && code != 0x60)
+ 	cpp_error (pfile, "universal-character-name used for '%c'", code);
+     }
+   else
+     cpp_error (pfile, "invalid universal-character-name");
+ #endif
+ 
+   *pstr = p;
+   return code;
+ }
+ 
+ /* Interpret an escape sequence, and return its value.  PSTR points to
+    the input pointer, which is just after the backslash.  LIMIT is how
+    much text we have.  MASK is the precision for the target type (char
+    or wchar_t).  */
+ 
+ static unsigned int
+ parse_escape (pfile, pstr, limit, mask, traditional)
+      cpp_reader *pfile;
+      const unsigned char **pstr;
+      const unsigned char *limit;
+      HOST_WIDE_INT mask;
+      int traditional;
+ {
+   int unknown = 0;
+   const unsigned char *str = *pstr;
+   unsigned int c = *str++;
+ 
+   switch (c)
+     {
+     case '\\': case '\'': case '"': case '?': break;
+     case 'b': c = TARGET_BS;	  break;
+     case 'f': c = TARGET_FF;	  break;
+     case 'n': c = TARGET_NEWLINE; break;
+     case 'r': c = TARGET_CR;	  break;
+     case 't': c = TARGET_TAB;	  break;
+     case 'v': c = TARGET_VT;	  break;
+ 
+     case '(': case '{': case '[': case '%':
+       /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
+ 	 '\%' is used to prevent SCCS from getting confused.  */
+       unknown = CPP_PEDANTIC (pfile);
+       break;
+ 
+     case 'a':
+       if (CPP_WTRADITIONAL (pfile))
+ 	cpp_warning (pfile, "the meaning of '\\a' varies with -traditional");
+       if (!traditional)
+ 	c = TARGET_BELL;
+       break;
+ 
+     case 'e': case 'E':
+       if (CPP_PEDANTIC (pfile))
+ 	cpp_pedwarn (pfile, "non-ISO-standard escape sequence, '\\%c'", c);
+       c = TARGET_ESC;
+       break;
+       
+       /* Warnings and support checks handled by read_ucs().  */
+     case 'u': case 'U':
+       if (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99))
+ 	{
+ 	  if (CPP_WTRADITIONAL (pfile))
+ 	    cpp_warning (pfile,
+ 			 "the meaning of '\\%c' varies with -traditional", c);
+ 	  c = read_ucs (pfile, &str, limit, c == 'u' ? 4 : 8);
+ 	}
+       else
+ 	unknown = 1;
+       break;
+ 
+     case 'x':
+       if (CPP_WTRADITIONAL (pfile))
+ 	cpp_warning (pfile, "the meaning of '\\x' varies with -traditional");
+ 
+       if (!traditional)
+ 	{
+ 	  unsigned int i = 0, overflow = 0;
+ 	  int digits_found = 0;
+ 
+ 	  while (str < limit)
+ 	    {
+ 	      c = *str;
+ 	      if (! ISXDIGIT (c))
+ 		break;
+ 	      str++;
+ 	      overflow |= i ^ (i << 4 >> 4);
+ 	      i = (i << 4) + hex_digit_value (c);
+ 	      digits_found = 1;
+ 	    }
+ 
+ 	  if (!digits_found)
+ 	    cpp_error (pfile, "\\x used with no following hex digits");
+ 
+ 	  if (overflow | (i != (i & mask)))
+ 	    {
+ 	      cpp_pedwarn (pfile, "hex escape sequence out of range");
+ 	      i &= mask;
+ 	    }
+ 	  c = i;
+ 	}
+       break;
+ 
+     case '0':  case '1':  case '2':  case '3':
+     case '4':  case '5':  case '6':  case '7':
+       {
+ 	unsigned int i = c - '0';
+ 	int count = 0;
+ 
+ 	while (str < limit && ++count < 3)
+ 	  {
+ 	    c = *str;
+ 	    if (c < '0' || c > '7')
+ 	      break;
+ 	    str++;
+ 	    i = (i << 3) + c - '0';
+ 	  }
+ 
+ 	if (i != (i & mask))
+ 	  {
+ 	    cpp_pedwarn (pfile, "octal escape sequence out of range");
+ 	    i &= mask;
+ 	  }
+ 	c = i;
+       }
+       break;
+ 
+     default:
+       unknown = 1;
+       break;
+     }
+ 
+   if (unknown)
+     {
+       if (ISGRAPH (c))
+ 	cpp_pedwarn (pfile, "unknown escape sequence '\\%c'", c);
+       else
+ 	cpp_pedwarn (pfile, "unknown escape sequence: '\\%03o'", c);
+     }
+ 
+   *pstr = str;
+   return c;
+ }
+ 
+ #ifndef MAX_CHAR_TYPE_SIZE
+ #define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
+ #endif
+ 
+ #ifndef MAX_WCHAR_TYPE_SIZE
+ #define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
+ #endif
+ 
+ HOST_WIDE_INT
+ cpp_interpret_charconst (pfile, token, warn_multi, traditional, pchars_seen)
+      cpp_reader *pfile;
+      const cpp_token *token;
+      int warn_multi;
+      int traditional;
+      unsigned int *pchars_seen;
+ {
+   const unsigned char *str = token->val.str.text;
+   const unsigned char *limit = str + token->val.str.len;
+   unsigned int chars_seen = 0;
+   unsigned int width, max_chars, c;
+   HOST_WIDE_INT result = 0, mask;
+ 
+ #ifdef MULTIBYTE_CHARS
+   (void) local_mbtowc (NULL_PTR, NULL_PTR, 0);
+ #endif
+ 
+   /* Width in bits.  */
+   if (token->type == CPP_CHAR)
+     width = MAX_CHAR_TYPE_SIZE;
+   else
+     width = MAX_WCHAR_TYPE_SIZE;
+ 
+   if (width < HOST_BITS_PER_WIDE_INT)
+     mask = ((unsigned) 1 << width) - 1;
+   else
+     mask = ~0;
+   max_chars = HOST_BITS_PER_WIDE_INT / width;
+ 
+   while (str < limit)
+     {
+ #ifdef MULTIBYTE_CHARS
+       wchar_t wc;
+       int char_len;
+ 
+       char_len = local_mbtowc (&wc, str, limit - str);
+       if (char_len == -1)
+ 	{
+ 	  cpp_warning (pfile, "ignoring invalid multibyte character");
+ 	  c = *str++;
+ 	}
+       else
+ 	{
+ 	  str += char_len;
+ 	  c = wc;
+ 	}
+ #else
+       c = *str++;
+ #endif
+ 
+       if (c == '\\')
+ 	{
+ 	  c = parse_escape (pfile, &str, limit, mask, traditional);
+ 	  if (width < HOST_BITS_PER_WIDE_INT && c >= ((unsigned) 1 << width))
+ 	    cpp_pedwarn (pfile, "escape sequence out of range for character");
+ 	}
+ 
+ #ifdef MAP_CHARACTER
+       if (ISPRINT (c))
+ 	c = MAP_CHARACTER (c);
+ #endif
+       
+       /* Merge character into result; ignore excess chars.  */
+       if (++chars_seen <= max_chars)
+ 	{
+ 	  if (width < HOST_BITS_PER_WIDE_INT)
+ 	    result = (result << width) | (c & ((1 << width) - 1));
+ 	  else
+ 	    result = c;
+ 	}
+     }
+ 
+   if (chars_seen == 0)
+     cpp_error (pfile, "empty character constant");
+   else if (chars_seen > max_chars)
+     {
+       chars_seen = max_chars;
+       cpp_error (pfile, "character constant too long");
+     }
+   else if (chars_seen > 1 && !traditional && warn_multi)
+     cpp_warning (pfile, "multi-character character constant");
+ 
+   /* If char type is signed, sign-extend the constant.  The
+      __CHAR_UNSIGNED__ macro is set by the driver if appropriate.  */
+   if (token->type == CPP_CHAR && chars_seen)
+     {
+       unsigned int nbits = chars_seen * width;
+       unsigned int mask = (unsigned int) ~0 >> (HOST_BITS_PER_INT - nbits);
+ 
+       if (pfile->spec_nodes.n__CHAR_UNSIGNED__->type == NT_MACRO
+ 	  || ((result >> (nbits - 1)) & 1) == 0)
+ 	result &= mask;
+       else
+ 	result |= ~mask;
+     }
+ 
+   *pchars_seen = chars_seen;
+   return result;
  }
  
  /* Memory pools.  */
Index: gcc/cpplib.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/cpplib.h,v
retrieving revision 1.169
diff -u -p -c -r1.169 cpplib.h
*** cpplib.h	2001/03/07 01:32:00	1.169
--- cpplib.h	2001/03/28 19:44:15
*************** extern const cpp_lexer_pos *cpp_get_line
*** 542,547 ****
--- 542,552 ----
  extern const unsigned char *cpp_macro_definition PARAMS ((cpp_reader *,
  						  const cpp_hashnode *));
  
+ /* Evaluate a CPP_CHAR or CPP_WCHAR token.  */
+ extern HOST_WIDE_INT
+ cpp_interpret_charconst PARAMS ((cpp_reader *, const cpp_token *,
+ 				 int, int, unsigned int *));
+ 
  extern void cpp_define PARAMS ((cpp_reader *, const char *));
  extern void cpp_assert PARAMS ((cpp_reader *, const char *));
  extern void cpp_undef  PARAMS ((cpp_reader *, const char *));


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]