Nulls in files
Neil Booth
NeilB@earthling.net
Tue Apr 4 08:18:00 GMT 2000
This is take 2. Nulls are basically treated as whitespace, and
wherever whitespace was syntactically a delimiter before, a null would
have the same effect, and is part of any white space around it. If
this is considered acceptable, I'll test properly, bundle a testsuite
case and update the docs for cpp if needed.
o nulls in comments are silently ignored
o nulls in strings produce one warning per string, and are preserved
o nulls elsewhere produce one warning per block of contiguous
whitespace, and the nulls plus surrounding whitespace are returned as
a single whitespace token. The nulls are dropped.
Neil.
* cppinit.c (ISTABLE): Null character handled as whitespace.
* cpplex.c (null_warning): new function.
(_cpp_skip_hspace): Emit warning if nulls encountered.
(parse_string): Emit warning if nulls encountered.
Preserve them.
(_cpp_lex_token): Emit warning if nulls encountered. Drop
them.
Index: cppinit.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/cppinit.c,v
retrieving revision 1.71
diff -u -p -r1.71 cppinit.c
--- cppinit.c 2000/04/02 22:50:54 1.71
+++ cppinit.c 2000/04/04 14:58:27
@@ -265,7 +265,7 @@ ISTABLE
N('1') N('2') N('3') N('4') N('5') N('6') N('7') N('8') N('9') N('0')
- H(' ') H('\t') H('\v') H('\f')
+ H('\0') H(' ') H('\t') H('\v') H('\f')
S('\n')
END
Index: cpplex.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/cpplex.c,v
retrieving revision 1.6
diff -u -p -r1.6 cpplex.c
--- cpplex.c 2000/04/02 22:50:54 1.6
+++ cpplex.c 2000/04/04 14:58:29
@@ -40,6 +40,7 @@ static void skip_string PARAMS ((cpp_re
static void parse_string PARAMS ((cpp_reader *, int));
static U_CHAR *find_position PARAMS ((U_CHAR *, U_CHAR *, unsigned long *));
static int null_cleanup PARAMS ((cpp_buffer *, cpp_reader *));
+static void null_warning PARAMS ((cpp_reader *, unsigned int));
/* Re-allocates PFILE->token_buffer so it will hold at least N more chars. */
@@ -376,23 +377,38 @@ copy_comment (pfile, m)
return ' ';
}
+static void
+null_warning (pfile, count)
+ cpp_reader *pfile;
+ unsigned int count;
+{
+ if (count == 1)
+ cpp_warning (pfile, "embedded null character ignored");
+ else
+ cpp_warning (pfile, "embedded null characters ignored");
+}
+
/* Skip whitespace \-newline and comments. Does not macro-expand. */
void
_cpp_skip_hspace (pfile)
cpp_reader *pfile;
{
+ unsigned int null_count = 0;
int c;
+
while (1)
{
c = GETC();
if (c == EOF)
- return;
+ goto out;
else if (is_hspace(c))
{
if ((c == '\f' || c == '\v') && CPP_PEDANTIC (pfile))
cpp_pedwarn (pfile, "%s in preprocessing directive",
c == '\f' ? "formfeed" : "vertical tab");
+ else if (c == '\0')
+ null_count++;
}
else if (c == '\r')
{
@@ -418,6 +434,9 @@ _cpp_skip_hspace (pfile)
break;
}
FORWARD(-1);
+ out:
+ if (null_count)
+ null_warning (pfile, null_count);
}
/* Read and discard the rest of the current line. */
@@ -580,6 +599,7 @@ parse_string (pfile, c)
{
const U_CHAR *start = CPP_BUFFER (pfile)->cur; /* XXX Layering violation */
const U_CHAR *limit;
+ unsigned int null_count = 0;
skip_string (pfile, c);
@@ -587,8 +607,19 @@ parse_string (pfile, c)
CPP_RESERVE (pfile, limit - start + 2);
CPP_PUTC_Q (pfile, c);
for (; start < limit; start++)
- if (*start != '\r')
- CPP_PUTC_Q (pfile, *start);
+ {
+ char c = *start;
+
+ if (c == '\r')
+ continue;
+ CPP_PUTC_Q (pfile, c);
+ if (c == '\0')
+ null_count++;
+ }
+ if (null_count == 1)
+ cpp_warning (pfile, "embedded null character in string");
+ else if (null_count > 1)
+ cpp_warning (pfile, "embedded null characters in string");
}
/* Read an assertion into the token buffer, converting to
@@ -974,16 +1005,26 @@ _cpp_lex_token (pfile)
_cpp_parse_name (pfile, c);
return CPP_MACRO;
- case ' ': case '\t': case '\v':
- for (;;)
- {
- CPP_PUTC (pfile, c);
- c = PEEKC ();
- if (c == EOF || !is_hspace(c))
- break;
- FORWARD(1);
- }
- return CPP_HSPACE;
+
+ case ' ': case '\t': case '\v': case '\0':
+ {
+ int null_count = 0;
+
+ for (;;)
+ {
+ if (c == '\0')
+ null_count++;
+ else
+ CPP_PUTC (pfile, c);
+ c = PEEKC ();
+ if (c == EOF || !is_hspace(c))
+ break;
+ FORWARD(1);
+ }
+ if (null_count)
+ null_warning (pfile, null_count);
+ return CPP_HSPACE;
+ }
case '\r':
if (CPP_BUFFER (pfile)->has_escapes)
More information about the Gcc-patches
mailing list