This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Re: cpplib: free-standing tokens

To: gcc-patches at gcc dot gnu dot org
Subject: Re: cpplib: free-standing tokens
From: Neil Booth <NeilB at earthling dot net>
Date: Sun, 14 May 2000 23:27:45 +0900
References: <E12qrwc-0007pq-00@monkey.rosenet.ne.jp>
[Apologies for the re-post; I screwed up the ASCII formatting for the
web site]

The previous patch didn't bootstrap, because the existing lexer still
uses the list's namebuf for all tokens, but the buffer expansion
routine would only fixup pointers for a restricted class of tokens.

This patch is the same as the previous patch, plus a change to the
existing lexer so that it uses the operator spelling table like the
new lexer does, rather than storing its own copy of the spelling in
the list's namebuf.  To eliminate warnings, a few variables and
function prototypes of the existing lexer were made more
const-correct.

The only difference between the two now is that the current lexer
still uses the list's namebuf for CPP_OTHER tokens.  I had to fudge a
couple of lines in the new lexer to work around this.  If we could get
the current lexer to store CPP_OTHER characters in the AUX member like
the new lexer does, I can revert those fudges.  They're only temporary
for the transition anyway.

Bootstraps i686 linux.

Neil.

	* cpphash.c (trad_stringify, warn_trad_stringify,
	collect_params): Make some pointers pointers to const.
	* cpplex.c (auto_expand_name_space) Guaranteed to always
	expand by at least one character.
	(SPELL_CHAR, SPELL_NONE): Temporarily reverse order.
	(struct token_spelling): Use const U_CHAR * rather than PTR.
	(expand_name_space): Fix up token pointers if name space
	is moved when expanding.
	(INIT_NAME, cpp_scan_line, parse_name, parse_number,
	parse_string2, save_comment, spell_token, cpp_output_list):
	Update so the routines handle tokens with a direct pointer to
	their text, rather than an offset into the token's list's namebuf.

	* cpplib.c (_cpp_check_directive): Similarly.
	(do_define): Make SYM a pointer to const.
	* cpplib.h (struct cpp_name): Replace offset with direct pointer.
	(CPP_INT, CPP_FLOAT): Spelling type should be SPELL_IDENT.
	(TOK_OFFSET): Delete.
	(TOK_NAME): Update.


Index: cpphash.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/cpphash.c,v
retrieving revision 1.88
diff -u -p -r1.88 cpphash.c
--- cpphash.c	2000/05/11 08:43:55	1.88
+++ cpphash.c	2000/05/14 14:07:58
@@ -162,9 +162,10 @@ collect_funlike_expansion PARAMS ((cpp_r
 static unsigned int collect_params PARAMS ((cpp_reader *, cpp_toklist *,
 					    struct arglist *));
 
-static void warn_trad_stringify	PARAMS ((cpp_reader *, U_CHAR *, size_t,
+static void warn_trad_stringify	PARAMS ((cpp_reader *, const U_CHAR *, size_t,
 					 unsigned int, const struct arg *));
-static unsigned int trad_stringify PARAMS ((cpp_reader *, U_CHAR *, size_t,
+static unsigned int trad_stringify PARAMS ((cpp_reader *, const U_CHAR *,
+					    size_t,
 					    unsigned int, const struct arg *,
 					    struct reflist **,
 					    struct reflist **, unsigned int));
@@ -371,12 +372,12 @@ add_pat (pat, endpat, nchars, argno, raw
 static void
 warn_trad_stringify (pfile, p, len, argc, argv)
      cpp_reader *pfile;
-     U_CHAR *p;
+     const U_CHAR *p;
      size_t len;
      unsigned int argc;
      const struct arg *argv;
 {
-  U_CHAR *limit;
+  const U_CHAR *limit;
   unsigned int i;
 
   limit = p + len;
@@ -406,14 +407,14 @@ warn_trad_stringify (pfile, p, len, argc
 static unsigned int
 trad_stringify (pfile, base, len, argc, argv, pat, endpat, last)
      cpp_reader *pfile;
-     U_CHAR *base;
+     const U_CHAR *base;
      size_t len;
      unsigned int argc;
      const struct arg *argv;
      struct reflist **pat, **endpat;
      unsigned int last;
 {
-  U_CHAR *p, *limit;
+  const U_CHAR *p, *limit;
   unsigned int i;
 
   p = base;
@@ -548,7 +549,8 @@ collect_funlike_expansion (pfile, list, 
   int j, argc;
   size_t len;
   const struct arg *argv;
-  U_CHAR *tok, *exp;
+  const U_CHAR *tok;
+  U_CHAR *exp;
   enum { START = 0, NORM, ARG, STRIZE, PASTE } last_token = START;
 
   argv = arglist->argv;
@@ -728,7 +730,8 @@ collect_params (pfile, list, arglist)
      struct arglist *arglist;
 {
   struct arg *argv = 0;
-  U_CHAR *namebuf, *p, *tok;
+  const U_CHAR *tok;
+  U_CHAR *namebuf, *p;
   unsigned int len, argslen;
   unsigned int argc, a, i, j;
 
Index: cpplex.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/cpplex.c,v
retrieving revision 1.40
diff -u -p -r1.40 cpplex.c
--- cpplex.c	2000/05/10 09:39:18	1.40
+++ cpplex.c	2000/05/14 14:08:07
@@ -60,7 +60,7 @@ static void pedantic_whitespace	PARAMS (
 					 unsigned int));
 
 #define auto_expand_name_space(list) \
-    expand_name_space ((list), (list)->name_cap / 2)
+    expand_name_space ((list), 1 + (list)->name_cap / 2)
 
 #ifdef NEW_LEXER
 
@@ -85,15 +85,15 @@ void _cpp_lex_line PARAMS ((cpp_reader *
 static void _cpp_output_list PARAMS ((cpp_reader *, cpp_toklist *));
 
 static unsigned char * spell_token PARAMS ((cpp_reader *, cpp_token *,
-					    cpp_toklist *, unsigned char *,
-					    int));
+					    unsigned char *, int));
 
 typedef unsigned int (* speller) PARAMS ((unsigned char *, cpp_toklist *,
 					  cpp_token *));
 
 /* Macros on a cpp_name.  */
 #define INIT_NAME(list, name) \
-  do {(name).len = 0; (name).offset = (list)->name_used;} while (0)
+  do {(name).len = 0; \
+      (name).text = (list)->namebuf + (list)->name_used;} while (0)
 
 #define IS_DIRECTIVE(list) (TOK_TYPE (list, 0) == CPP_HASH)
 #define COLUMN(cur) ((cur) - buffer->line_base)
@@ -114,15 +114,28 @@ typedef unsigned int (* speller) PARAMS 
 #define IMMED_TOKEN() (!(cur_token->flags & PREV_WHITESPACE))
 #define PREV_TOKEN_TYPE (cur_token[-1].type)
 
+#define PUSH_TOKEN(ttype) cur_token++->type = ttype
+#define REVISE_TOKEN(ttype) cur_token[-1].type = ttype
+#define BACKUP_TOKEN(ttype) (--cur_token)->type = ttype
+#define BACKUP_DIGRAPH(ttype) do { \
+  BACKUP_TOKEN(ttype); cur_token->flags |= DIGRAPH;} while (0)
+
+/* An upper bound on the number of bytes needed to spell a token,
+   including preceding whitespace.  */
+#define TOKEN_LEN(token) (5 + (token_spellings[token->type].type > \
+		               SPELL_NONE ? token->val.name.len: 0))
+
+#endif
+
 /* Order here matters.  Those beyond SPELL_NONE store their spelling
    in the token list, and it's length in the token->val.name.len.  */
 #define SPELL_OPERATOR 0
-#define SPELL_CHAR     1
-#define SPELL_NONE     2
+#define SPELL_CHAR     2	/* FIXME: revert order after transition. */
+#define SPELL_NONE     1
 #define SPELL_IDENT    3
 #define SPELL_STRING   4
 
-#define T(e, s) {SPELL_OPERATOR, s},
+#define T(e, s) {SPELL_OPERATOR, (const U_CHAR *) s},
 #define I(e, s) {SPELL_IDENT, s},
 #define S(e, s) {SPELL_STRING, s},
 #define C(e, s) {SPELL_CHAR, s},
@@ -130,8 +143,8 @@ typedef unsigned int (* speller) PARAMS 
 
 static const struct token_spelling
 {
-  unsigned char type;
-  PTR  speller;
+  U_CHAR type;
+  const U_CHAR *spelling;
 } token_spellings [N_TTYPES + 1] = {TTYPE_TABLE {0, 0} };
 
 #undef T
@@ -140,19 +153,6 @@ static const struct token_spelling
 #undef C
 #undef N
 
-#define PUSH_TOKEN(ttype) cur_token++->type = ttype
-#define REVISE_TOKEN(ttype) cur_token[-1].type = ttype
-#define BACKUP_TOKEN(ttype) (--cur_token)->type = ttype
-#define BACKUP_DIGRAPH(ttype) do { \
-  BACKUP_TOKEN(ttype); cur_token->flags |= DIGRAPH;} while (0)
-
-/* An upper bound on the number of bytes needed to spell a token,
-   including preceding whitespace.  */
-#define TOKEN_LEN(token) (5 + (token_spellings[token->type].type > \
-		               SPELL_NONE ? token->val.name.len: 0))
-
-#endif
-
 /* Re-allocates PFILE->token_buffer so it will hold at least N more chars.  */
 
 void
@@ -525,8 +525,23 @@ expand_name_space (list, len)
      cpp_toklist *list;
      unsigned int len;
 {
+  const U_CHAR *old_namebuf;
+  ptrdiff_t delta;
+
+  old_namebuf = list->namebuf;
   list->name_cap += len;
   list->namebuf = (unsigned char *) xrealloc (list->namebuf, list->name_cap);
+
+  /* Fix up token text pointers.  */
+  delta = list->namebuf - old_namebuf;
+  if (delta)
+    {
+      unsigned int i;
+
+      for (i = 0; i < list->tokens_used; i++)
+	if (token_spellings[list->tokens[i].type].type > SPELL_NONE)
+	  list->tokens[i].val.name.text += delta;
+    }
 }
 
 /* Expand the number of tokens in a list.  */
@@ -632,9 +647,14 @@ _cpp_scan_line (pfile, list)
 	break;
 
       TOK_LEN (list, i) = len;
-      TOK_OFFSET (list, i) = list->name_used;
-      memcpy (TOK_NAME (list, i), CPP_PWRITTEN (pfile), len);
-      list->name_used += len;
+      if (token_spellings[type].type > SPELL_NONE)
+	{
+	  memcpy (list->namebuf + list->name_used, CPP_PWRITTEN (pfile), len);
+	  TOK_NAME (list, i) = list->namebuf + list->name_used;
+	  list->name_used += len;
+	}
+      else
+	TOK_NAME (list, i) = token_spellings[type].spelling;
       i++;
       space_before = 0;
     }
@@ -2563,7 +2583,7 @@ parse_name (pfile, list, name)
 
  out:
   buffer->cur = cur;
-  name->len = namebuf - (list->namebuf + name->offset);
+  name->len = namebuf - name->text;
   list->name_used = namebuf - list->namebuf;
 }
 
@@ -2613,7 +2633,7 @@ parse_number (pfile, list, name)
   
  out:
   buffer->cur = cur;
-  name->len = namebuf - (list->namebuf + name->offset);
+  name->len = namebuf - name->text;
   list->name_used = namebuf - list->namebuf;
 }
 
@@ -2651,8 +2671,6 @@ parse_string2 (pfile, list, name, termin
 	null_count++;
       else if (c == terminator || IS_NEWLINE (c))
 	{
-	  unsigned char* name_start = list->namebuf + name->offset;
-
 	  /* Needed for trigraph_replace and multiline string warning.  */
 	  buffer->cur = cur;
 
@@ -2660,9 +2678,9 @@ parse_string2 (pfile, list, name, termin
 	  if (CPP_OPTION (pfile, trigraphs)
 	      || CPP_OPTION (pfile, warn_trigraphs))
 	    {
-	      namebuf = trigraph_replace (pfile, name_start + trigraphed_len,
+	      namebuf = trigraph_replace (pfile, name->text + trigraphed_len,
 					    namebuf);
-	      trigraphed_len = namebuf - 2 - (name_start + trigraphed_len);
+	      trigraphed_len = namebuf - 2 - (name->text + trigraphed_len);
 	      if (trigraphed_len < 0)
 		trigraphed_len = 0;
 	    }
@@ -2714,7 +2732,7 @@ parse_string2 (pfile, list, name, termin
 	      /* An odd number of consecutive backslashes represents
 		 an escaped terminator.  */
 	      temp = namebuf - 1;
-	      while (temp >= name_start && *temp == '\\')
+	      while (temp >= name->text && *temp == '\\')
 		temp--;
 
 	      if ((namebuf - temp) & 1)
@@ -2751,7 +2769,7 @@ parse_string2 (pfile, list, name, termin
   
  out:
   buffer->cur = cur;
-  name->len = namebuf - (list->namebuf + name->offset);
+  name->len = namebuf - name->text;
   list->name_used = namebuf - list->namebuf;
 
   if (null_count > 0)
@@ -2783,13 +2801,14 @@ save_comment (list, from, len, tok_no, t
   if (list->name_used + len > list->name_cap)
     expand_name_space (list, len);
 
+  buffer = list->namebuf + list->name_used;
+
   comment = &list->comments[list->comments_used++];
   comment->type = CPP_COMMENT;
   comment->aux = tok_no;
   comment->val.name.len = len;
-  comment->val.name.offset = list->name_used;
+  comment->val.name.text = buffer;
 
-  buffer = list->namebuf + list->name_used;
   if (type == '*')
     {
       *buffer++ = '/';
@@ -2863,20 +2882,20 @@ _cpp_lex_line (pfile, list)
 	{
 	case '0': case '1': case '2': case '3': case '4':
 	case '5': case '6': case '7': case '8': case '9':
-	  /* Prepend an immediately previous CPP_DOT token.  */
+	  cur--;		/* Backup character.  */
 	  if (PREV_TOKEN_TYPE == CPP_DOT && IMMED_TOKEN ())
 	    {
+	      /* Prepend an immediately previous CPP_DOT token.  */
 	      cur_token--;
 	      if (list->name_cap == list->name_used)
 		auto_expand_name_space (list);
 
 	      cur_token->val.name.len = 1;
-	      cur_token->val.name.offset = list->name_used;
+	      cur_token->val.name.text = list->namebuf + list->name_used;
 	      list->namebuf[list->name_used++] = '.';
 	    }
 	  else
 	    INIT_NAME (list, cur_token->val.name);
-	  cur--;		/* Backup character.  */
 
 	continue_number:
 	  buffer->cur = cur;
@@ -2898,8 +2917,8 @@ _cpp_lex_line (pfile, list)
 	case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
 	case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
 	case 'Y': case 'Z':
-	  INIT_NAME (list, cur_token->val.name);
 	  cur--;		     /* Backup character.  */
+	  INIT_NAME (list, cur_token->val.name);
 	  cur_token->type = CPP_NAME; /* Identifier, macro etc.  */
 
 	continue_name:
@@ -2920,7 +2939,7 @@ _cpp_lex_line (pfile, list)
 	  /* Do we have a wide string?  */
 	  if (cur_token[-1].type == CPP_NAME && IMMED_TOKEN ()
 	      && cur_token[-1].val.name.len == 1
-	      && *(list->namebuf + cur_token[-1].val.name.offset) == 'L'
+	      && cur_token[-1].val.name.text[0] == 'L'
 	      && !CPP_TRADITIONAL (pfile))
 	    {
 	      /* No need for 'L' any more.  */
@@ -2977,8 +2996,7 @@ _cpp_lex_line (pfile, list)
 
 		      /* Back-up to first '-' or '/'.  */
 		      cur_token -= 2;
-		      if (!CPP_OPTION (pfile, traditional))
-			flags = PREV_WHITESPACE;
+		      flags = PREV_WHITESPACE;
 		    }
 		}
 	    }
@@ -3006,9 +3024,10 @@ _cpp_lex_line (pfile, list)
 				 cur_token - 1 - list->tokens, c);
 		  cur = buffer->cur;
 
-		  cur_token -= 2;
+		  cur_token--;
 		  if (!CPP_OPTION (pfile, traditional))
 		    flags = PREV_WHITESPACE;
+		  break;
 		}
 	      else if (CPP_OPTION (pfile, cplusplus))
 		{
@@ -3249,6 +3268,7 @@ _cpp_lex_line (pfile, list)
 	  /* Fall through */
 	default:
 	  cur_token->aux = c;
+	  cur_token->val.name.len = 0; /* FIXME: needed for transition only */
 	  PUSH_TOKEN (CPP_OTHER);
 	  break;
 	}
@@ -3300,10 +3320,9 @@ _cpp_lex_line (pfile, list)
    to the character after the last character written.  */
 
 static unsigned char *
-spell_token (pfile, token, list, buffer, whitespace)
+spell_token (pfile, token, buffer, whitespace)
      cpp_reader *pfile;		/* Would be nice to be rid of this...  */
      cpp_token *token;
-     cpp_toklist *list;		/* FIXME: get rid of this...  */
      unsigned char *buffer;
      int whitespace;
 {
@@ -3323,7 +3342,7 @@ spell_token (pfile, token, list, buffer,
 	if (token->flags & DIGRAPH)
 	  spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
 	else
-	  spelling = token_spellings[token->type].speller;
+	  spelling = token_spellings[token->type].spelling;
 	
 	while ((c = *spelling++) != '\0')
 	  *buffer++ = c;
@@ -3331,8 +3350,7 @@ spell_token (pfile, token, list, buffer,
       break;
 
     case SPELL_IDENT:
-      memcpy (buffer, list->namebuf + token->val.name.offset,
-	      token->val.name.len);
+      memcpy (buffer, token->val.name.text, token->val.name.len);
       buffer += token->val.name.len;
       break;
 
@@ -3346,8 +3364,7 @@ spell_token (pfile, token, list, buffer,
 	if (token->type == CPP_STRING || token->type == CPP_WSTRING)
 	  c = '"';
 	*buffer++ = c;
-	memcpy (buffer, list->namebuf + token->val.name.offset,
-		token->val.name.len);
+	memcpy (buffer, token->val.name.text, token->val.name.len);
 	buffer += token->val.name.len;
 	*buffer++ = c;
       }
@@ -3420,7 +3437,7 @@ _cpp_output_list (pfile, list)
 	{
 	  /* Make space for the comment, and copy it out.  */
 	  CPP_RESERVE (pfile, TOKEN_LEN (comment));
-	  pfile->limit = spell_token (pfile, comment, list, pfile->limit, 0);
+	  pfile->limit = spell_token (pfile, comment, pfile->limit, 0);
 
 	  /* Stop if no comments left, or no more comments appear
              before the current token.  */
@@ -3431,7 +3448,7 @@ _cpp_output_list (pfile, list)
 	}
 
       CPP_RESERVE (pfile, TOKEN_LEN (token));
-      pfile->limit = spell_token (pfile, token, list, pfile->limit, 1);
+      pfile->limit = spell_token (pfile, token, pfile->limit, 1);
     }
   while (token++->type != CPP_VSPACE);
 }
Index: cpplib.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/cpplib.c,v
retrieving revision 1.163
diff -u -p -r1.163 cpplib.c
--- cpplib.c	2000/05/04 04:38:00	1.163
+++ cpplib.c	2000/05/14 14:08:08
@@ -156,7 +156,7 @@ _cpp_check_directive (list, token)
      cpp_toklist *list;
      cpp_token *token;
 {
-  const U_CHAR *name = list->namebuf + token->val.name.offset;
+  const U_CHAR *name = token->val.name.text;
   size_t len = token->val.name.len;
   unsigned int i;
 
@@ -339,7 +339,7 @@ do_define (pfile)
 {
   HASHNODE *node;
   int len;
-  U_CHAR *sym;
+  const U_CHAR *sym;
   cpp_toklist *list = &pfile->directbuf;
 
   pfile->no_macro_expand++;
Index: cpplib.h
===================================================================
RCS file: /cvs/gcc/egcs/gcc/cpplib.h,v
retrieving revision 1.91
diff -u -p -r1.91 cpplib.h
--- cpplib.h	2000/05/10 09:39:18	1.91
+++ cpplib.h	2000/05/14 14:08:08
@@ -110,8 +110,8 @@ typedef struct cpp_name cpp_name;
   C(CPP_OTHER,		0)	/* stray punctuation */ \
 \
   I(CPP_NAME,		0)	/* word */	\
-  N(CPP_INT,		0)	/* 23 */	\
-  N(CPP_FLOAT,		0)	/* 3.14159 */	\
+  I(CPP_INT,		0)	/* 23 */	\
+  I(CPP_FLOAT,		0)	/* 3.14159 */	\
   I(CPP_NUMBER,		0)	/* 34_be+ta  */	\
   S(CPP_CHAR,		0)	/* 'char' */	\
   S(CPP_WCHAR,		0)	/* L'char' */	\
@@ -151,7 +151,7 @@ enum cpp_ttype
 struct cpp_name
 {
   unsigned int len;
-  unsigned int offset;		/* from list->namebuf */
+  const unsigned char *text;
 };
 
 /* Accessor macros for token lists - all expect you have a
@@ -162,8 +162,7 @@ struct cpp_name
 #define TOK_AUX(l_, i_)    ((l_)->tokens[i_].aux)
 #define TOK_COL(l_, i_)    ((l_)->tokens[i_].col)
 #define TOK_INT(l_, i_)    ((l_)->tokens[i_].val.integer)
-#define TOK_OFFSET(l_, i_) ((l_)->tokens[i_].val.name.offset)
-#define TOK_NAME(l_, i_)   ((l_)->tokens[i_].val.name.offset + (l_)->namebuf)
+#define TOK_NAME(l_, i_)   ((l_)->tokens[i_].val.name.text)
 #define TOK_LEN(l_, i_)    ((l_)->tokens[i_].val.name.len)
 
 #define TOK_PREV_WHITE(l_, i_) (TOK_FLAGS(l_, i_) & PREV_WHITESPACE)
References:
- cpplib: free-standing tokens
  - From: Neil Booth
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]