This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]

[RFA] Charconsts, take 3


This incorporates Matt's bugfix, Fergus' cleanup, and an
update to Makefile.in.  Once this patch goes in, I'll write
some testcases.

OK to commit?

Neil.

	* c-lex.c (lex_charconst): Convert into a simple wrapper
	around cpp_interpret_charconst, to which most of the code
	body is moved.
	* cppexp.c (MAX_CHAR_TYPE_SIZE, MAX_WCHAR_TYPE_SIZE,
	MAX_LONG_TYPE_SIZE, MAX_INT_TYPE_SIZE, MAX_CHAR_TYPE_MASK,
	MAX_WCHAR_TYPE_MASK, parse_escape, parse_charconst): Remove.
	(lex): Use cpp_interpret_charconst.
	* cpplex.c (parse_escape, read_ucs, cpp_interpret_charconst,
	hex_digit_value): New functions.
	(MAX_CHAR_TYPE_SIZE, MAX_WCHAR_TYPE_SIZE): New macros.
	* cpplib.h (cpp_interpret_charconst): New prototype.
	* Makefile.in: Update.

Index: Makefile.in
===================================================================
RCS file: /cvs/gcc/gcc/gcc/Makefile.in,v
retrieving revision 1.617
diff -u -p -r1.617 Makefile.in
--- Makefile.in	2001/03/06 09:52:28	1.617
+++ Makefile.in	2001/03/07 08:23:51
@@ -1917,7 +1917,7 @@ cppmain.o:  cppmain.c  $(CONFIG_H) cppli
 
 cpperror.o: cpperror.c $(CONFIG_H) $(LIBCPP_DEPS)
 cppexp.o:   cppexp.c   $(CONFIG_H) $(LIBCPP_DEPS)
-cpplex.o:   cpplex.c   $(CONFIG_H) $(LIBCPP_DEPS)
+cpplex.o:   cpplex.c   $(CONFIG_H) $(LIBCPP_DEPS) mbchar.h
 cppmacro.o: cppmacro.c $(CONFIG_H) $(LIBCPP_DEPS)
 cpplib.o:   cpplib.c   $(CONFIG_H) $(LIBCPP_DEPS) $(OBSTACK_H)
 cpphash.o:  cpphash.c  $(CONFIG_H) $(LIBCPP_DEPS) $(OBSTACK_H)
Index: c-lex.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/c-lex.c,v
retrieving revision 1.133
diff -u -p -r1.133 c-lex.c
--- c-lex.c	2001/03/07 01:31:59	1.133
+++ c-lex.c	2001/03/07 08:23:52
@@ -86,7 +86,7 @@ static const char *read_ucs 	PARAMS ((co
 static void parse_float		PARAMS ((PTR));
 static tree lex_number		PARAMS ((const char *, unsigned int));
 static tree lex_string		PARAMS ((const char *, unsigned int, int));
-static tree lex_charconst	PARAMS ((const char *, unsigned int, int));
+static tree lex_charconst	PARAMS ((const cpp_token *));
 static void update_header_times	PARAMS ((const char *));
 static int dump_one_header	PARAMS ((splay_tree_node, void *));
 static void cb_ident		PARAMS ((cpp_reader *, const cpp_string *));
@@ -1008,8 +1008,7 @@ c_lex (value)
 
     case CPP_CHAR:
     case CPP_WCHAR:
-      *value = lex_charconst ((const char *)tok.val.str.text,
-			      tok.val.str.len, tok.type == CPP_WCHAR);
+      *value = lex_charconst (&tok);
       break;
 
     case CPP_STRING:
@@ -1608,111 +1607,33 @@ lex_string (str, len, wide)
 }
 
 static tree
-lex_charconst (str, len, wide)
-     const char *str;
-     unsigned int len;
-     int wide;
+lex_charconst (token)
+     const cpp_token *token;
 {
-  const char *limit = str + len;
-  int result = 0;
-  int num_chars = 0;
-  int chars_seen = 0;
-  unsigned width = TYPE_PRECISION (char_type_node);
-  int max_chars;
-  unsigned int c;
+  HOST_WIDE_INT result;
   tree value;
+  unsigned int chars_seen;
 
-#ifdef MULTIBYTE_CHARS
-  int longest_char = local_mb_cur_max ();
-  (void) local_mbtowc (NULL_PTR, NULL_PTR, 0);
-#endif
-
-  max_chars = TYPE_PRECISION (integer_type_node) / width;
-  if (wide)
-    width = WCHAR_TYPE_SIZE;
-
-  while (str < limit)
+  result = cpp_interpret_charconst (parse_in, token, warn_multichar,
+				    flag_traditional, &chars_seen);
+  if (token->type == CPP_WCHAR)
     {
-#ifdef MULTIBYTE_CHARS
-      wchar_t wc;
-      int char_len;
-
-      char_len = local_mbtowc (&wc, str, limit - str);
-      if (char_len == -1)
-	{
-	  warning ("Ignoring invalid multibyte character");
-	  char_len = 1;
-	  c = *str++;
-	}
-      else
-	{
-	  str += char_len;
-	  c = wc;
-	}
-#else
-      c = *str++;
-#endif
-
-      ++chars_seen;
-      if (c == '\\')
-	{
-	  str = readescape (str, limit, &c);
-	  if (width < HOST_BITS_PER_INT
-	      && (unsigned) c >= ((unsigned)1 << width))
-	    pedwarn ("escape sequence out of range for character");
-	}
-#ifdef MAP_CHARACTER
-      if (ISPRINT (c))
-	c = MAP_CHARACTER (c);
-#endif
-      
-      /* Merge character into result; ignore excess chars.  */
-      num_chars += (width / TYPE_PRECISION (char_type_node));
-      if (num_chars < max_chars + 1)
-	{
-	  if (width < HOST_BITS_PER_INT)
-	    result = (result << width) | (c & ((1 << width) - 1));
-	  else
-	    result = c;
-	}
-    }
-
-  if (chars_seen == 0)
-    error ("empty character constant");
-  else if (num_chars > max_chars)
-    {
-      num_chars = max_chars;
-      error ("character constant too long");
+      value = build_int_2 (result, 0);
+      TREE_TYPE (value) = wchar_type_node;
     }
-  else if (chars_seen != 1 && ! flag_traditional && warn_multichar)
-    warning ("multi-character character constant");
-
-  /* If char type is signed, sign-extend the constant.  */
-  if (! wide)
+  else
     {
-      int num_bits = num_chars * width;
-      if (num_bits == 0)
-	/* We already got an error; avoid invalid shift.  */
-	value = build_int_2 (0, 0);
-      else if (TREE_UNSIGNED (char_type_node)
-	       || ((result >> (num_bits - 1)) & 1) == 0)
-	value = build_int_2 (result & (~(unsigned HOST_WIDE_INT) 0
-				       >> (HOST_BITS_PER_WIDE_INT - num_bits)),
-			     0);
+      if (result < 0)
+	value = build_int_2 (result, -1);
       else
-	value = build_int_2 (result | ~(~(unsigned HOST_WIDE_INT) 0
-					>> (HOST_BITS_PER_WIDE_INT - num_bits)),
-			     -1);
-      /* In C, a character constant has type 'int'; in C++, 'char'.  */
-      if (chars_seen <= 1 && c_language == clk_cplusplus)
+	value = build_int_2 (result, 0);
+
+      /* In C, a character constant has type 'int'.
+	 In C++ 'char', but multi-char charconsts have type 'int'.  */
+      if (c_language == clk_cplusplus && chars_seen <= 1)
 	TREE_TYPE (value) = char_type_node;
       else
 	TREE_TYPE (value) = integer_type_node;
-    }
-  else
-    {
-      value = build_int_2 (result, 0);
-      TREE_TYPE (value) = wchar_type_node;
     }
 
   return value;
Index: cppexp.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/cppexp.c,v
retrieving revision 1.90
diff -u -p -r1.90 cppexp.c
--- cppexp.c	2001/03/03 11:32:31	1.90
+++ cppexp.c	2001/03/07 08:23:54
@@ -18,37 +18,11 @@ along with this program; if not, write t
 Foundation, 59 Temple Place - Suite 330,
 Boston, MA 02111-1307, USA.  */
 
-/* Parse a C expression from text in a string  */
-   
 #include "config.h"
 #include "system.h"
 #include "cpplib.h"
 #include "cpphash.h"
 
-#ifndef MAX_CHAR_TYPE_SIZE
-#define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
-#endif
-
-#ifndef MAX_INT_TYPE_SIZE
-#define MAX_INT_TYPE_SIZE INT_TYPE_SIZE
-#endif
-
-#ifndef MAX_LONG_TYPE_SIZE
-#define MAX_LONG_TYPE_SIZE LONG_TYPE_SIZE
-#endif
-
-#ifndef MAX_WCHAR_TYPE_SIZE
-#define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
-#endif
-
-#define MAX_CHAR_TYPE_MASK (MAX_CHAR_TYPE_SIZE < HOST_BITS_PER_WIDEST_INT \
-		    ? (~(~(HOST_WIDEST_INT) 0 << MAX_CHAR_TYPE_SIZE)) \
-		    : ~ (HOST_WIDEST_INT) 0)
-
-#define MAX_WCHAR_TYPE_MASK (MAX_WCHAR_TYPE_SIZE < HOST_BITS_PER_WIDEST_INT \
-			     ? ~(~(HOST_WIDEST_INT) 0 << MAX_WCHAR_TYPE_SIZE) \
-			     : ~ (HOST_WIDEST_INT) 0)
-
 /* Yield nonzero if adding two numbers with A's and B's signs can yield a
    number with SUM's sign, where A, B, and SUM are all C integers.  */
 #define possible_sum_sign(a, b, sum) ((((a) ^ (b)) | ~ ((a) ^ (sum))) < 0)
@@ -61,10 +35,7 @@ static HOST_WIDEST_INT right_shift PARAM
 					    unsigned int,
 					    unsigned HOST_WIDEST_INT));
 static struct op parse_number PARAMS ((cpp_reader *, const cpp_token *));
-static struct op parse_charconst PARAMS ((cpp_reader *, const cpp_token *));
 static struct op parse_defined PARAMS ((cpp_reader *));
-static HOST_WIDEST_INT parse_escape PARAMS ((cpp_reader *, const U_CHAR **,
-					     const U_CHAR *, HOST_WIDEST_INT));
 static struct op lex PARAMS ((cpp_reader *, int, cpp_token *));
 static const unsigned char *op_as_text PARAMS ((cpp_reader *, enum cpp_ttype));
 
@@ -238,81 +209,6 @@ parse_number (pfile, tok)
   return op;
 }
 
-/* Parse and convert a character constant for #if.  Understands backslash
-   escapes (\n, \031) and multibyte characters (if so configured).  */
-static struct op
-parse_charconst (pfile, tok)
-     cpp_reader *pfile;
-     const cpp_token *tok;
-{
-  struct op op;
-  HOST_WIDEST_INT result = 0;
-  int num_chars = 0;
-  int num_bits;
-  unsigned int width = MAX_CHAR_TYPE_SIZE;
-  HOST_WIDEST_INT mask = MAX_CHAR_TYPE_MASK;
-  int max_chars;
-  const U_CHAR *ptr = tok->val.str.text;
-  const U_CHAR *end = ptr + tok->val.str.len;
-
-  int c = -1;
-
-  if (tok->type == CPP_WCHAR)
-    width = MAX_WCHAR_TYPE_SIZE, mask = MAX_WCHAR_TYPE_MASK;
-  max_chars = MAX_LONG_TYPE_SIZE / width;
-
-  while (ptr < end)
-    {
-      c = *ptr++;
-      if (c == '\'')
-	CPP_ICE ("unescaped ' in character constant");
-      else if (c == '\\')
-	{
-	  c = parse_escape (pfile, &ptr, end, mask);
-	  if (width < HOST_BITS_PER_INT
-	      && (unsigned int) c >= (unsigned int)(1 << width))
-	    cpp_pedwarn (pfile,
-			 "escape sequence out of range for character");
-	}
-	  
-      /* Merge character into result; ignore excess chars.  */
-      if (++num_chars <= max_chars)
-	{
-	  if (width < HOST_BITS_PER_INT)
-	    result = (result << width) | (c & ((1 << width) - 1));
-	  else
-	    result = c;
-	}
-    }
-
-  if (num_chars == 0)
-    SYNTAX_ERROR ("empty character constant");
-  else if (num_chars > max_chars)
-    SYNTAX_ERROR ("character constant too long");
-  else if (num_chars != 1)
-    cpp_warning (pfile, "multi-character character constant");
-
-  /* If char type is signed, sign-extend the constant.  */
-  num_bits = num_chars * width;
-      
-  if (pfile->spec_nodes.n__CHAR_UNSIGNED__->type == NT_MACRO
-      || ((result >> (num_bits - 1)) & 1) == 0)
-    op.value = result & ((unsigned HOST_WIDEST_INT) ~0
-			 >> (HOST_BITS_PER_WIDEST_INT - num_bits));
-  else
-    op.value = result | ~((unsigned HOST_WIDEST_INT) ~0
-			  >> (HOST_BITS_PER_WIDEST_INT - num_bits));
-
-  /* This is always a signed type.  */
-  op.unsignedp = 0;
-  op.op = CPP_INT;
-  return op;
-
- syntax_error:
-  op.op = CPP_ERROR;
-  return op;
-}
-
 static struct op
 parse_defined (pfile)
      cpp_reader *pfile;
@@ -405,7 +301,15 @@ lex (pfile, skip_evaluation, token)
 
     case CPP_CHAR:
     case CPP_WCHAR:
-      return parse_charconst (pfile, token);
+      {
+	unsigned int chars_seen;
+
+	/* This is always a signed type.  */
+	op.unsignedp = 0;
+	op.op = CPP_INT;
+	op.value = cpp_interpret_charconst (pfile, token, 1, 0, &chars_seen);
+	return op;
+      }
 
     case CPP_STRING:
     case CPP_WSTRING:
@@ -492,102 +396,6 @@ lex (pfile, skip_evaluation, token)
  syntax_error:
   op.op = CPP_ERROR;
   return op;
-}
-
-/* Parse a C escape sequence.  STRING_PTR points to a variable
-   containing a pointer to the string to parse.  That pointer
-   is updated past the characters we use.  The value of the
-   escape sequence is returned.
-
-   If \ is followed by 000, we return 0 and leave the string pointer
-   after the zeros.  A value of 0 does not mean end of string.  */
-
-static HOST_WIDEST_INT
-parse_escape (pfile, string_ptr, limit, result_mask)
-     cpp_reader *pfile;
-     const U_CHAR **string_ptr;
-     const U_CHAR *limit;
-     HOST_WIDEST_INT result_mask;
-{
-  const U_CHAR *ptr = *string_ptr;
-  /* We know we have at least one following character.  */
-  int c = *ptr++;
-  switch (c)
-    {
-    case 'a': c = TARGET_BELL;	  break;
-    case 'b': c = TARGET_BS;	  break;
-    case 'f': c = TARGET_FF;	  break;
-    case 'n': c = TARGET_NEWLINE; break;
-    case 'r': c = TARGET_CR;	  break;
-    case 't': c = TARGET_TAB;	  break;
-    case 'v': c = TARGET_VT;	  break;
-
-    case 'e': case 'E':
-      if (CPP_PEDANTIC (pfile))
-	cpp_pedwarn (pfile, "non-ISO-standard escape sequence, '\\%c'", c);
-      c = TARGET_ESC;
-      break;
-      
-    case '0': case '1': case '2': case '3':
-    case '4': case '5': case '6': case '7':
-      {
-	unsigned int i = c - '0';
-	int count = 0;
-	while (++count < 3)
-	  {
-	    if (ptr >= limit)
-	      break;
-	    
-	    c = *ptr;
-	    if (c < '0' || c > '7')
-	      break;
-	    ptr++;
-	    i = (i << 3) + c - '0';
-	  }
-	if (i != (i & result_mask))
-	  {
-	    i &= result_mask;
-	    cpp_pedwarn (pfile, "octal escape sequence out of range");
-	  }
-	c = i;
-	break;
-      }
-
-    case 'x':
-      {
-	unsigned int i = 0, overflow = 0;
-	int digits_found = 0, digit;
-	for (;;)
-	  {
-	    if (ptr >= limit)
-	      break;
-	    c = *ptr;
-	    if (c >= '0' && c <= '9')
-	      digit = c - '0';
-	    else if (c >= 'a' && c <= 'f')
-	      digit = c - 'a' + 10;
-	    else if (c >= 'A' && c <= 'F')
-	      digit = c - 'A' + 10;
-	    else
-	      break;
-	    ptr++;
-	    overflow |= i ^ (i << 4 >> 4);
-	    i = (i << 4) + digit;
-	    digits_found = 1;
-	  }
-	if (!digits_found)
-	  cpp_error (pfile, "\\x used with no following hex digits");
-	if (overflow | (i != (i & result_mask)))
-	  {
-	    i &= result_mask;
-	    cpp_pedwarn (pfile, "hex escape sequence out of range");
-	  }
-	c = i;
-	break;
-      }
-    }
-  *string_ptr = ptr;
-  return c;
 }
 
 static void
Index: cpplex.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/cpplex.c,v
retrieving revision 1.135
diff -u -p -r1.135 cpplex.c
--- cpplex.c	2001/03/07 01:31:59	1.135
+++ cpplex.c	2001/03/07 08:24:04
@@ -40,6 +40,18 @@ Foundation, 59 Temple Place - Suite 330,
 #include "cpphash.h"
 #include "symcat.h"
 
+/* MULTIBYTE_CHARS support only works for native compilers.
+   ??? Ideally what we want is to model widechar support after
+   the current floating point support.  */
+#ifdef CROSS_COMPILE
+#undef MULTIBYTE_CHARS
+#endif
+
+#ifdef MULTIBYTE_CHARS
+#include "mbchar.h"
+#include <locale.h>
+#endif
+
 /* Tokens with SPELL_STRING store their spelling in the token list,
    and it's length in the token->val.name.len.  */
 enum spell_type
@@ -87,9 +99,15 @@ static void save_comment PARAMS ((cpp_re
 static void lex_percent PARAMS ((cpp_buffer *, cpp_token *));
 static void lex_dot PARAMS ((cpp_reader *, cpp_token *));
 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
+static unsigned int parse_escape PARAMS ((cpp_reader *, const unsigned char **,
+					  const unsigned char *, HOST_WIDE_INT,
+					  int));
+static unsigned int read_ucs PARAMS ((cpp_reader *, const unsigned char **,
+				      const unsigned char *, unsigned int));
 
 static cpp_chunk *new_chunk PARAMS ((unsigned int));
 static int chunk_suitable PARAMS ((cpp_pool *, cpp_chunk *, unsigned int));
+static unsigned int hex_digit_value PARAMS ((unsigned int));
 
 /* Utility routine:
 
@@ -1639,6 +1657,331 @@ cpp_output_line (pfile, fp)
     }
 
   putc ('\n', fp);
+}
+
+/* Returns the value of a hexadecimal digit.  */
+static unsigned int
+hex_digit_value (c)
+     unsigned int c;
+{
+  if (c >= 'a' && c <= 'f')
+    return c - 'a' + 10;
+  if (c >= 'A' && c <= 'F')
+    return c - 'A' + 10;
+  if (c >= '0' && c <= '9')
+    return c - '0';
+  abort ();
+}
+
+/* Parse a '\uNNNN' or '\UNNNNNNNN' sequence (C++ and C99).
+
+   [lex.charset]: The character designated by the universal character
+   name \UNNNNNNNN is that character whose character short name in
+   ISO/IEC 10646 is NNNNNNNN; the character designated by the
+   universal character name \uNNNN is that character whose character
+   short name in ISO/IEC 10646 is 0000NNNN.  If the hexadecimal value
+   for a universal character name is less than 0x20 or in the range
+   0x7F-0x9F (inclusive), or if the universal character name
+   designates a character in the basic source character set, then the
+   program is ill-formed.
+
+   We assume that wchar_t is Unicode, so we don't need to do any
+   mapping.  Is this ever wrong?  */
+
+static unsigned int
+read_ucs (pfile, pstr, limit, length)
+     cpp_reader *pfile;
+     const unsigned char **pstr;
+     const unsigned char *limit;
+     unsigned int length;
+{
+  const unsigned char *p = *pstr;
+  unsigned int c, code = 0;
+
+  for (; length; --length)
+    {
+      if (p >= limit)
+	{
+	  cpp_error (pfile, "incomplete universal-character-name");
+	  break;
+	}
+
+      c = *p;
+      if (ISXDIGIT (c))
+	{
+	  code = (code << 4) + hex_digit_value (c);
+	  p++;
+	}
+      else
+	{
+	  cpp_error (pfile,
+		     "non-hex digit '%c' in universal-character-name", c);
+	  break;
+	}
+
+    }
+
+#ifdef TARGET_EBCDIC
+  cpp_error (pfile, "universal-character-name on EBCDIC target");
+  code = 0x3f;  /* EBCDIC invalid character */
+#else
+  if (code > 0x9f && !(code & 0x80000000))
+    ; /* True extended character, OK.  */
+  else if (code >= 0x20 && code < 0x7f)
+    {
+      /* ASCII printable character.  The C character set consists of all of
+	 these except $, @ and `.  We use hex escapes so that this also
+	 works with EBCDIC hosts.  */
+      if (code != 0x24 && code != 0x40 && code != 0x60)
+	cpp_error (pfile, "universal-character-name used for '%c'", code);
+    }
+  else
+    cpp_error (pfile, "invalid universal-character-name");
+#endif
+
+  *pstr = p;
+  return code;
+}
+
+/* Interpret an escape sequence, and return its value.  PSTR points to
+   the input pointer, which is just after the backslash.  LIMIT is how
+   much text we have.  MASK is the precision for the target type (char
+   or wchar_t).  */
+
+static unsigned int
+parse_escape (pfile, pstr, limit, mask, traditional)
+     cpp_reader *pfile;
+     const unsigned char **pstr;
+     const unsigned char *limit;
+     HOST_WIDE_INT mask;
+     int traditional;
+{
+  int unknown = 0;
+  const unsigned char *str = *pstr;
+  unsigned int c = *str++;
+
+  switch (c)
+    {
+    case '\\': case '\'': case '"': case '?': break;
+    case 'b': c = TARGET_BS;	  break;
+    case 'f': c = TARGET_FF;	  break;
+    case 'n': c = TARGET_NEWLINE; break;
+    case 'r': c = TARGET_CR;	  break;
+    case 't': c = TARGET_TAB;	  break;
+    case 'v': c = TARGET_VT;	  break;
+
+    case '(': case '{': case '[': case '%':
+      /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
+	 '\%' is used to prevent SCCS from getting confused.  */
+      unknown = CPP_PEDANTIC (pfile);
+      break;
+
+    case 'a':
+      if (CPP_WTRADITIONAL (pfile))
+	cpp_warning (pfile, "the meaning of '\\a' varies with -traditional");
+      if (!traditional)
+	c = TARGET_BELL;
+      break;
+
+    case 'e': case 'E':
+      if (CPP_PEDANTIC (pfile))
+	cpp_pedwarn (pfile, "non-ISO-standard escape sequence, '\\%c'", c);
+      c = TARGET_ESC;
+      break;
+      
+      /* Warnings and support checks handled by read_ucs().  */
+    case 'u': case 'U':
+      if (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99))
+	{
+	  if (CPP_WTRADITIONAL (pfile))
+	    cpp_warning (pfile,
+			 "the meaning of '\\%c' varies with -traditional", c);
+	  c = read_ucs (pfile, &str, limit, c == 'u' ? 4 : 8);
+	}
+      else
+	unknown = 1;
+      break;
+
+    case 'x':
+      if (CPP_WTRADITIONAL (pfile))
+	cpp_warning (pfile, "the meaning of '\\x' varies with -traditional");
+
+      if (!traditional)
+	{
+	  unsigned int i = 0, overflow = 0;
+	  int digits_found = 0;
+
+	  while (str < limit)
+	    {
+	      c = *str;
+	      if (! ISXDIGIT (c))
+		break;
+	      str++;
+	      overflow |= i ^ (i << 4 >> 4);
+	      i = (i << 4) + hex_digit_value (c);
+	      digits_found = 1;
+	    }
+
+	  if (!digits_found)
+	    cpp_error (pfile, "\\x used with no following hex digits");
+
+	  if (overflow | (i != (i & mask)))
+	    {
+	      cpp_pedwarn (pfile, "hex escape sequence out of range");
+	      i &= mask;
+	    }
+	  c = i;
+	}
+      break;
+
+    case '0':  case '1':  case '2':  case '3':
+    case '4':  case '5':  case '6':  case '7':
+      {
+	unsigned int i = c - '0';
+	int count = 0;
+
+	while (str < limit && ++count < 3)
+	  {
+	    c = *str;
+	    if (c < '0' || c > '7')
+	      break;
+	    str++;
+	    i = (i << 3) + c - '0';
+	  }
+
+	if (i != (i & mask))
+	  {
+	    cpp_pedwarn (pfile, "octal escape sequence out of range");
+	    i &= mask;
+	  }
+	c = i;
+      }
+      break;
+
+    default:
+      unknown = 1;
+      break;
+    }
+
+  if (unknown)
+    {
+      if (ISGRAPH (c))
+	cpp_pedwarn (pfile, "unknown escape sequence '\\%c'", c);
+      else
+	cpp_pedwarn (pfile, "unknown escape sequence: '\\%03o'", c);
+    }
+
+  *pstr = str;
+  return c;
+}
+
+#ifndef MAX_CHAR_TYPE_SIZE
+#define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
+#endif
+
+#ifndef MAX_WCHAR_TYPE_SIZE
+#define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
+#endif
+
+HOST_WIDE_INT
+cpp_interpret_charconst (pfile, token, warn_multi, traditional, pchars_seen)
+     cpp_reader *pfile;
+     const cpp_token *token;
+     int warn_multi;
+     int traditional;
+     unsigned int *pchars_seen;
+{
+  const unsigned char *str = token->val.str.text;
+  const unsigned char *limit = str + token->val.str.len;
+  unsigned int chars_seen = 0;
+  unsigned int width, max_chars, c;
+  HOST_WIDE_INT result = 0, mask;
+
+#ifdef MULTIBYTE_CHARS
+  (void) local_mbtowc (NULL_PTR, NULL_PTR, 0);
+#endif
+
+  /* Width in bits.  */
+  if (token->type == CPP_CHAR)
+    width = MAX_CHAR_TYPE_SIZE;
+  else
+    width = MAX_WCHAR_TYPE_SIZE;
+
+  if (width < HOST_BITS_PER_WIDE_INT)
+    mask = ((unsigned) 1 << width) - 1;
+  else
+    mask = ~0;
+  max_chars = HOST_BITS_PER_WIDE_INT / width;
+
+  while (str < limit)
+    {
+#ifdef MULTIBYTE_CHARS
+      wchar_t wc;
+      int char_len;
+
+      char_len = local_mbtowc (&wc, str, limit - str);
+      if (char_len == -1)
+	{
+	  cpp_warning (pfile, "ignoring invalid multibyte character");
+	  c = *str++;
+	}
+      else
+	{
+	  str += char_len;
+	  c = wc;
+	}
+#else
+      c = *str++;
+#endif
+
+      if (c == '\\')
+	{
+	  c = parse_escape (pfile, &str, limit, mask, traditional);
+	  if (width < HOST_BITS_PER_WIDE_INT && c >= ((unsigned) 1 << width))
+	    cpp_pedwarn (pfile, "escape sequence out of range for character");
+	}
+
+#ifdef MAP_CHARACTER
+      if (ISPRINT (c))
+	c = MAP_CHARACTER (c);
+#endif
+      
+      /* Merge character into result; ignore excess chars.  */
+      if (++chars_seen <= max_chars)
+	{
+	  if (width < HOST_BITS_PER_WIDE_INT)
+	    result = (result << width) | (c & ((1 << width) - 1));
+	  else
+	    result = c;
+	}
+    }
+
+  if (chars_seen == 0)
+    cpp_error (pfile, "empty character constant");
+  else if (chars_seen > max_chars)
+    {
+      chars_seen = max_chars;
+      cpp_error (pfile, "character constant too long");
+    }
+  else if (chars_seen > 1 && !traditional && warn_multi)
+    cpp_warning (pfile, "multi-character character constant");
+
+  /* If char type is signed, sign-extend the constant.  The
+     __CHAR_UNSIGNED__ macro is set by the driver if appropriate.  */
+  if (token->type == CPP_CHAR && chars_seen)
+    {
+      unsigned int nbits = chars_seen * width;
+      unsigned int mask = (unsigned int) ~0 >> (HOST_BITS_PER_INT - nbits);
+
+      if (pfile->spec_nodes.n__CHAR_UNSIGNED__->type == NT_MACRO
+	  || ((result >> (nbits - 1)) & 1) == 0)
+	result &= mask;
+      else
+	result |= ~mask;
+    }
+
+  *pchars_seen = chars_seen;
+  return result;
 }
 
 /* Memory pools.  */
Index: cpplib.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/cpplib.h,v
retrieving revision 1.169
diff -u -p -r1.169 cpplib.h
--- cpplib.h	2001/03/07 01:32:00	1.169
+++ cpplib.h	2001/03/07 08:24:04
@@ -542,6 +542,11 @@ extern const cpp_lexer_pos *cpp_get_line
 extern const unsigned char *cpp_macro_definition PARAMS ((cpp_reader *,
 						  const cpp_hashnode *));
 
+/* Evaluate a CPP_CHAR or CPP_WCHAR token.  */
+extern HOST_WIDE_INT
+cpp_interpret_charconst PARAMS ((cpp_reader *, const cpp_token *,
+				 int, int, unsigned int *));
+
 extern void cpp_define PARAMS ((cpp_reader *, const char *));
 extern void cpp_assert PARAMS ((cpp_reader *, const char *));
 extern void cpp_undef  PARAMS ((cpp_reader *, const char *));


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]