Nulls in files

Neil Booth NeilB@earthling.net
Tue Apr 4 08:18:00 GMT 2000


This is take 2.  Nulls are basically treated as whitespace, and
wherever whitespace was syntactically a delimiter before, a null would
have the same effect, and is part of any white space around it.  If
this is considered acceptable, I'll test properly, bundle a testsuite
case and update the docs for cpp if needed.

o nulls in comments are silently ignored

o nulls in strings produce one warning per string, and are preserved

o nulls elsewhere produce one warning per block of contiguous
whitespace, and the nulls plus surrounding whitespace are returned as
a single whitespace token.  The nulls are dropped.

Neil.

	* cppinit.c (ISTABLE): Null character handled as whitespace.
	* cpplex.c (null_warning):  new function.
	(_cpp_skip_hspace): Emit warning if nulls encountered.
	(parse_string): Emit warning if nulls encountered.
	Preserve them.
	(_cpp_lex_token): Emit warning if nulls encountered.  Drop
	them.

Index: cppinit.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/cppinit.c,v
retrieving revision 1.71
diff -u -p -r1.71 cppinit.c
--- cppinit.c	2000/04/02 22:50:54	1.71
+++ cppinit.c	2000/04/04 14:58:27
@@ -265,7 +265,7 @@ ISTABLE
 
   N('1') N('2') N('3') N('4') N('5') N('6') N('7') N('8') N('9') N('0')
 
-  H(' ') H('\t') H('\v') H('\f')
+  H('\0') H(' ') H('\t') H('\v') H('\f')
 
   S('\n')
 END
Index: cpplex.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/cpplex.c,v
retrieving revision 1.6
diff -u -p -r1.6 cpplex.c
--- cpplex.c	2000/04/02 22:50:54	1.6
+++ cpplex.c	2000/04/04 14:58:29
@@ -40,6 +40,7 @@ static void skip_string		PARAMS ((cpp_re
 static void parse_string	PARAMS ((cpp_reader *, int));
 static U_CHAR *find_position	PARAMS ((U_CHAR *, U_CHAR *, unsigned long *));
 static int null_cleanup		PARAMS ((cpp_buffer *, cpp_reader *));
+static void null_warning        PARAMS ((cpp_reader *, unsigned int));
 
 /* Re-allocates PFILE->token_buffer so it will hold at least N more chars.  */
 
@@ -376,23 +377,38 @@ copy_comment (pfile, m)
   return ' ';
 }
 
+static void
+null_warning (pfile, count)
+     cpp_reader *pfile;
+     unsigned int count;
+{
+  if (count == 1)
+    cpp_warning (pfile, "embedded null character ignored");
+  else
+    cpp_warning (pfile, "embedded null characters ignored");
+}
+
 /* Skip whitespace \-newline and comments.  Does not macro-expand.  */
 
 void
 _cpp_skip_hspace (pfile)
      cpp_reader *pfile;
 {
+  unsigned int null_count = 0;
   int c;
+
   while (1)
     {
       c = GETC();
       if (c == EOF)
-	return;
+	goto out;
       else if (is_hspace(c))
 	{
 	  if ((c == '\f' || c == '\v') && CPP_PEDANTIC (pfile))
 	    cpp_pedwarn (pfile, "%s in preprocessing directive",
 			 c == '\f' ? "formfeed" : "vertical tab");
+	  else if (c == '\0')
+	    null_count++;
 	}
       else if (c == '\r')
 	{
@@ -418,6 +434,9 @@ _cpp_skip_hspace (pfile)
 	break;
     }
   FORWARD(-1);
+ out:
+  if (null_count)
+    null_warning (pfile, null_count);
 }
 
 /* Read and discard the rest of the current line.  */
@@ -580,6 +599,7 @@ parse_string (pfile, c)
 {
   const U_CHAR *start = CPP_BUFFER (pfile)->cur;  /* XXX Layering violation */
   const U_CHAR *limit;
+  unsigned int null_count = 0;
 
   skip_string (pfile, c);
 
@@ -587,8 +607,19 @@ parse_string (pfile, c)
   CPP_RESERVE (pfile, limit - start + 2);
   CPP_PUTC_Q (pfile, c);
   for (; start < limit; start++)
-    if (*start != '\r')
-      CPP_PUTC_Q (pfile, *start);
+    {
+      char c = *start;
+
+      if (c == '\r')
+	continue;
+      CPP_PUTC_Q (pfile, c);
+      if (c == '\0')
+	null_count++;
+    }
+  if (null_count == 1)
+    cpp_warning (pfile, "embedded null character in string");
+  else if (null_count > 1)
+    cpp_warning (pfile, "embedded null characters in string");
 }
 
 /* Read an assertion into the token buffer, converting to
@@ -974,16 +1005,26 @@ _cpp_lex_token (pfile)
     _cpp_parse_name (pfile, c);
     return CPP_MACRO;
 
-    case ' ':  case '\t':  case '\v':
-      for (;;)
-	{
-	  CPP_PUTC (pfile, c);
-	  c = PEEKC ();
-	  if (c == EOF || !is_hspace(c))
-	    break;
-	  FORWARD(1);
-	}
-      return CPP_HSPACE;
+
+    case ' ':  case '\t':  case '\v': case '\0':
+      {
+	int null_count = 0;
+
+	for (;;)
+	  {
+	    if (c == '\0')
+	      null_count++;
+	    else
+	      CPP_PUTC (pfile, c);
+	    c = PEEKC ();
+	    if (c == EOF || !is_hspace(c))
+	      break;
+	    FORWARD(1);
+	  }
+	if (null_count)
+	  null_warning (pfile, null_count);
+	return CPP_HSPACE;
+      }
 
     case '\r':
       if (CPP_BUFFER (pfile)->has_escapes)


More information about the Gcc-patches mailing list