This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Fix for backslash interpretation in #line and #-markers (1/2)


This is the first half of a fix for backslash interpretation in #line
directives and #-markers.  We were processing these with inconsistent
and buggy semantics, which made it impossible for downstream code to
process textual preprocessor output under some conditions (rare on
Unix, but normal on Windows).

The new rules are:  #line treats its optional filename argument the
same way that #include does, i.e. backslash is a normal text
character.  In #-markers, however, the string constant is a real
string constant; backslash introduces an escape sequence.  The
standalone preprocessor is changed to escape all dangerous characters
in its output.  So, given this input 

int x(void) { 
#line 3 "^A^B^C^D^E^F"  /* where ^A etc are hard control characters */
  int i = 0;
#line 5 "c:\newcode\file.c"
  i++;
# 7 "\""
  return i;
}

you get this preprocessor output

# 1 "test.c"
# 1 "<built-in>"
# 1 "<command line>"
# 1 "test.c"
int x(void) {
# 3 "\001\002\003\004\005\006"
  int i = 0;
# 5 "c:\\newcode\\file.c"
  i++;
# 7 "\""
  return i;
}

and that can be fed back into the preprocessor safely.  Formerly, bad
things would appear in the output, such as 

# 7 """

Assembly output is still incorrect.  I get, for instance,

        .file 3 "c:\newcode\file.c"

which the assembler interprets as a filename containing a newline and
a form feed.  That will be fixed by the second half of the patch.

Will bootstrap and test, with some new test cases, once I work up the
second half.

zw

	* cpphash.h (struct lexer_state): Remove line_extension.
	* cpplib.c (dequote_string, do_linemarker): New functions.
	(#line entry in directive table): Mark as INCL, not IN_I.
	(linemarker_dir): New.
	(_cpp_handle_directive): Forward linemarkers to do_linemarker
	via linemarker_dir.  Issue pedwarn about this extension here.
	(end_directive, directive_diagnostics, _cpp_handle_directive):
	Don't touch pfile->state.line_extension.
	(do_line): No longer handles linemarkers.

	* cpplib.h (cpp_quote_string): Prototype.
	* cppmacro.c (quote_string): Rename cpp_quote_string and
	export.  All callers changed.
	* cppmain.c (print_line): Call cpp_quote_string on filenames.

	* doc/cpp.texi: Document new semantics of #line and linemarkers.

===================================================================
Index: cpphash.h
--- cpphash.h	2002/02/02 18:56:34	1.140
+++ cpphash.h	2002/02/15 03:52:13
@@ -152,9 +152,6 @@ struct lexer_state
 
   /* Nonzero when parsing arguments to a function-like macro.  */
   unsigned char parsing_args;
-
-  /* Nonzero when in a # NUMBER directive.  */
-  unsigned char line_extension;
 };
 
 /* Special nodes - identifiers with predefined significance.  */
===================================================================
Index: cpplib.c
--- cpplib.c	2002/01/03 21:43:04	1.289
+++ cpplib.c	2002/02/15 03:52:13
@@ -103,6 +103,8 @@ static const cpp_token *parse_include PA
 static void push_conditional	PARAMS ((cpp_reader *, int, int,
 					 const cpp_hashnode *));
 static unsigned int read_flag	PARAMS ((cpp_reader *, unsigned int));
+static U_CHAR *dequote_string	PARAMS ((cpp_reader *, const U_CHAR *,
+					 unsigned int));
 static int  strtoul_for_line	PARAMS ((const U_CHAR *, unsigned int,
 					 unsigned long *));
 static void do_diagnostic	PARAMS ((cpp_reader *, enum error_type, int));
@@ -117,6 +119,7 @@ static void do_pragma_once	PARAMS ((cpp_
 static void do_pragma_poison	PARAMS ((cpp_reader *));
 static void do_pragma_system_header	PARAMS ((cpp_reader *));
 static void do_pragma_dependency	PARAMS ((cpp_reader *));
+static void do_linemarker		PARAMS ((cpp_reader *));
 static const cpp_token *get_token_no_padding PARAMS ((cpp_reader *));
 static const cpp_token *get__Pragma_string PARAMS ((cpp_reader *));
 static void destringize_and_run PARAMS ((cpp_reader *, const cpp_string *));
@@ -145,7 +148,7 @@ D(if,		T_IF,		KANDR,     COND | IF_COND)
 D(else,		T_ELSE,		KANDR,     COND)	   /*   9863 */ \
 D(ifndef,	T_IFNDEF,	KANDR,     COND | IF_COND) /*   9675 */ \
 D(undef,	T_UNDEF,	KANDR,     IN_I)	   /*   4837 */ \
-D(line,		T_LINE,		KANDR,     IN_I)	   /*   2465 */ \
+D(line,		T_LINE,		KANDR,     INCL)	   /*   2465 */ \
 D(elif,		T_ELIF,		STDC89,    COND)	   /*    610 */ \
 D(error,	T_ERROR,	STDC89,    0)		   /*    475 */ \
 D(pragma,	T_PRAGMA,	STDC89,    IN_I)	   /*    195 */ \
@@ -167,10 +170,6 @@ SCCS_ENTRY						   /* 0 SVR4? */
 /* Use the table to generate a series of prototypes, an enum for the
    directive names, and an array of directive handlers.  */
 
-/* The directive-processing functions are declared to return int
-   instead of void, because some old compilers have trouble with
-   pointers to functions returning void.  */
-
 /* Don't invoke CONCAT2 with any whitespace or K&R cc will fail.  */
 #define D(name, t, o, f) static void CONCAT2(do_,name) PARAMS ((cpp_reader *));
 DIRECTIVE_TABLE
@@ -195,6 +194,14 @@ DIRECTIVE_TABLE
 #undef D
 #undef DIRECTIVE_TABLE
 
+/* Wrapper struct directive for linemarkers.
+   The origin is more or less true - the original K+R cpp
+   did use this notation in its preprocessed output.  */
+static const directive linemarker_dir =
+{
+  do_linemarker, U"#", 1, KANDR, IN_I
+};
+
 #define SEEN_EOL() (pfile->cur_token[-1].type == CPP_EOF)
 
 /* Skip any remaining tokens in a directive.  */
@@ -256,7 +263,6 @@ end_directive (pfile, skip_line)
   pfile->state.save_comments = ! CPP_OPTION (pfile, discard_comments);
   pfile->state.in_directive = 0;
   pfile->state.angled_headers = 0;
-  pfile->state.line_extension = 0;
   pfile->directive = 0;
 }
 
@@ -268,40 +274,31 @@ directive_diagnostics (pfile, dir, inden
      const directive *dir;
      int indented;
 {
-  if (pfile->state.line_extension)
-    {
-      if (CPP_PEDANTIC (pfile)
-	  && ! pfile->state.skipping)
-	cpp_pedwarn (pfile, "style of line directive is a GCC extension");
+  /* Issue -pedantic warnings for extensions.  */
+  if (CPP_PEDANTIC (pfile)
+      && ! pfile->state.skipping
+      && dir->origin == EXTENSION)
+    cpp_pedwarn (pfile, "#%s is a GCC extension", dir->name);
+
+  /* Traditionally, a directive is ignored unless its # is in
+     column 1.  Therefore in code intended to work with K+R
+     compilers, directives added by C89 must have their #
+     indented, and directives present in traditional C must not.
+     This is true even of directives in skipped conditional
+     blocks.  */
+  if (CPP_WTRADITIONAL (pfile))
+    {
+      if (dir == &dtable[T_ELIF])
+	cpp_warning (pfile, "suggest not using #elif in traditional C");
+      else if (indented && dir->origin == KANDR)
+	cpp_warning (pfile,
+		     "traditional C ignores #%s with the # indented",
+		     dir->name);
+      else if (!indented && dir->origin != KANDR)
+	cpp_warning (pfile,
+		     "suggest hiding #%s from traditional C with an indented #",
+		     dir->name);
     }
-  else
-    {
-      /* Issue -pedantic warnings for extensions.  */
-      if (CPP_PEDANTIC (pfile)
-	  && ! pfile->state.skipping
-	  && dir->origin == EXTENSION)
-	cpp_pedwarn (pfile, "#%s is a GCC extension", dir->name);
-
-      /* Traditionally, a directive is ignored unless its # is in
-	 column 1.  Therefore in code intended to work with K+R
-	 compilers, directives added by C89 must have their #
-	 indented, and directives present in traditional C must not.
-	 This is true even of directives in skipped conditional
-	 blocks.  */
-      if (CPP_WTRADITIONAL (pfile))
-	{
-	  if (dir == &dtable[T_ELIF])
-	    cpp_warning (pfile, "suggest not using #elif in traditional C");
-	  else if (indented && dir->origin == KANDR)
-	    cpp_warning (pfile,
-			 "traditional C ignores #%s with the # indented",
-			 dir->name);
-	  else if (!indented && dir->origin != KANDR)
-	    cpp_warning (pfile,
-		 "suggest hiding #%s from traditional C with an indented #",
-			 dir->name);
-	}
-    }
 }
 
 /* Check if we have a known directive.  INDENTED is non-zero if the
@@ -330,8 +327,9 @@ _cpp_handle_directive (pfile, indented)
      assembler code.  */
   else if (dname->type == CPP_NUMBER && CPP_OPTION (pfile, lang) != CLK_ASM)
     {
-      dir = &dtable[T_LINE];
-      pfile->state.line_extension = 1;
+      dir = &linemarker_dir;
+      if (CPP_PEDANTIC (pfile) && ! pfile->state.skipping)
+	cpp_pedwarn (pfile, "style of line directive is a GCC extension");
     }
 
   if (dir)
@@ -653,9 +651,10 @@ do_include_next (pfile)
   do_include_common (pfile, IT_INCLUDE_NEXT);
 }
 
-/* Subroutine of do_line.  Read possible flags after file name.  LAST
-   is the last flag seen; 0 if this is the first flag. Return the flag
-   if it is valid, 0 at the end of the directive. Otherwise complain.  */
+/* Subroutine of do_linemarker.  Read possible flags after file name.
+   LAST is the last flag seen; 0 if this is the first flag. Return the
+   flag if it is valid, 0 at the end of the directive. Otherwise
+   complain.  */
 static unsigned int
 read_flag (pfile, last)
      cpp_reader *pfile;
@@ -679,9 +678,44 @@ read_flag (pfile, last)
   return 0;
 }
 
-/* Another subroutine of do_line.  Convert a number in STR, of length
-   LEN, to binary; store it in NUMP, and return 0 if the number was
-   well-formed, 1 if not.  Temporary, hopefully.  */
+/* Subroutine of do_linemarker.  Returns a version of STR which has a
+   NUL terminator and all escape sequences converted to their
+   equivalents.  Temporary, hopefully.  */
+static U_CHAR *
+dequote_string (pfile, str, len)
+     cpp_reader *pfile;
+     const U_CHAR *str;
+     unsigned int len;
+{
+  U_CHAR *result = _cpp_unaligned_alloc (pfile, len + 1);
+  U_CHAR *dst = result;
+  const U_CHAR *limit = str + len;
+  unsigned int c;
+  unsigned HOST_WIDE_INT mask;
+
+  /* We need the mask to match the host's 'unsigned char', not the
+     target's.  */
+  if (CHAR_BIT < HOST_BITS_PER_WIDE_INT)
+    mask = ((unsigned HOST_WIDE_INT) 1 << CHAR_BIT) - 1;
+  else
+    mask = ~(unsigned HOST_WIDE_INT)0;
+  
+  while (str < limit)
+    {
+      c = *str++;
+      if (c != '\\')
+	*dst++ = c;
+      else
+	*dst++ = cpp_parse_escape (pfile,  (const U_CHAR **)&str,
+				   limit, mask, 0);
+    }
+  *dst++ = '\0';
+  return result;
+}
+
+/* Subroutine of do_line and do_linemarker.  Convert a number in STR,
+   of length LEN, to binary; store it in NUMP, and return 0 if the
+   number was well-formed, 1 if not.  Temporary, hopefully.  */
 static int
 strtoul_for_line (str, len, nump)
      const U_CHAR *str;
@@ -712,16 +746,9 @@ do_line (pfile)
   const cpp_token *token;
   const char *new_file = pfile->map->to_file;
   unsigned long new_lineno;
-  unsigned int cap, new_sysp = pfile->map->sysp;
-  enum lc_reason reason = LC_RENAME;
 
   /* C99 raised the minimum limit on #line numbers.  */
-  cap = CPP_OPTION (pfile, c99) ? 2147483647 : 32767;
-
-  /* Putting this in _cpp_handle_directive risks two calls to
-     _cpp_backup_tokens in some circumstances, which can segfault.  */
-  if (pfile->state.line_extension)
-    _cpp_backup_tokens (pfile, 1);
+  unsigned int cap = CPP_OPTION (pfile, c99) ? 2147483647 : 32767;
 
   /* #line commands expand macros.  */
   token = cpp_get_token (pfile);
@@ -734,42 +761,84 @@ do_line (pfile)
       return;
     }      
 
-  if (CPP_PEDANTIC (pfile) && ! pfile->state.line_extension
-      && (new_lineno == 0 || new_lineno > cap))
+  if (CPP_PEDANTIC (pfile) && (new_lineno == 0 || new_lineno > cap))
     cpp_pedwarn (pfile, "line number out of range");
 
   token = cpp_get_token (pfile);
   if (token->type == CPP_STRING)
     {
       new_file = (const char *) token->val.str.text;
+      check_eol (pfile);
+    }
+  else if (token->type != CPP_EOF)
+    {
+      cpp_error (pfile, "\"%s\" is not a valid filename",
+		 cpp_token_as_text (pfile, token));
+      return;
+    }
 
-      /* Only accept flags for the # 55 form.  */
-      if (pfile->state.line_extension)
-	{
-	  int flag;
+  skip_rest_of_line (pfile);
+  _cpp_do_file_change (pfile, LC_RENAME, new_file, new_lineno,
+		       pfile->map->sysp);
+}
+
+/* Interpret the # 44 "file" [flags] notation, which has slightly
+   different syntax and semantics from #line:  Flags are allowed,
+   we never complain about the line number being too big, and "file"
+   _is_ a string constant (escapes are interpreted).  */
+static void
+do_linemarker (pfile)
+     cpp_reader *pfile;
+{
+  const cpp_token *token;
+  const char *new_file = pfile->map->to_file;
+  unsigned long new_lineno;
+  unsigned int new_sysp = pfile->map->sysp;
+  enum lc_reason reason = LC_RENAME;
+  int flag;
+
+  /* Putting this in _cpp_handle_directive risks two calls to
+     _cpp_backup_tokens in some circumstances, which can segfault.  */
+  _cpp_backup_tokens (pfile, 1);
+
+  /* #line commands expand macros.  */
+  token = cpp_get_token (pfile);
+  if (token->type != CPP_NUMBER
+      || strtoul_for_line (token->val.str.text, token->val.str.len,
+			   &new_lineno))
+    {
+      cpp_error (pfile, "\"%s\" after # is not a positive integer",
+		 cpp_token_as_text (pfile, token));
+      return;
+    }      
 
-	  new_sysp = 0;
-	  flag = read_flag (pfile, 0);
-	  if (flag == 1)
-	    {
-	      reason = LC_ENTER;
-	      /* Fake an include for cpp_included ().  */
-	      _cpp_fake_include (pfile, new_file);
-	      flag = read_flag (pfile, flag);
-	    }
-	  else if (flag == 2)
-	    {
-	      reason = LC_LEAVE;
-	      flag = read_flag (pfile, flag);
-	    }
-	  if (flag == 3)
-	    {
-	      new_sysp = 1;
-	      flag = read_flag (pfile, flag);
-	      if (flag == 4)
-		new_sysp = 2;
-	    }
+  token = cpp_get_token (pfile);
+  if (token->type == CPP_STRING)
+    {
+      new_file = (const char *) dequote_string (pfile, token->val.str.text,
+						token->val.str.len);
+      new_sysp = 0;
+      flag = read_flag (pfile, 0);
+      if (flag == 1)
+	{
+	  reason = LC_ENTER;
+	  /* Fake an include for cpp_included ().  */
+	  _cpp_fake_include (pfile, new_file);
+	  flag = read_flag (pfile, flag);
+	}
+      else if (flag == 2)
+	{
+	  reason = LC_LEAVE;
+	  flag = read_flag (pfile, flag);
+	}
+      if (flag == 3)
+	{
+	  new_sysp = 1;
+	  flag = read_flag (pfile, flag);
+	  if (flag == 4)
+	    new_sysp = 2;
 	}
+
       check_eol (pfile);
     }
   else if (token->type != CPP_EOF)
===================================================================
Index: cpplib.h
--- cpplib.h	2002/02/02 18:56:34	1.201
+++ cpplib.h	2002/02/15 03:52:14
@@ -592,6 +592,9 @@ extern void cpp_forall_identifiers	PARAM
 /* In cppmacro.c */
 extern void cpp_scan_nooutput		PARAMS ((cpp_reader *));
 extern int  cpp_sys_macro_p		PARAMS ((cpp_reader *));
+extern unsigned char *cpp_quote_string	PARAMS ((unsigned char *,
+						 const unsigned char *,
+						 unsigned int));
 
 /* In cppfiles.c */
 extern int cpp_included	PARAMS ((cpp_reader *, const char *));
===================================================================
Index: cppmacro.c
--- cppmacro.c	2002/01/03 21:43:05	1.91
+++ cppmacro.c	2002/02/15 03:52:14
@@ -64,9 +64,6 @@ static cpp_context *next_context PARAMS 
 static const cpp_token *padding_token
   PARAMS ((cpp_reader *, const cpp_token *));
 static void expand_arg PARAMS ((cpp_reader *, macro_arg *));
-static unsigned char *quote_string PARAMS ((unsigned char *,
-					    const unsigned char *,
-					    unsigned int));
 static const cpp_token *new_string_token PARAMS ((cpp_reader *, U_CHAR *,
 						  unsigned int));
 static const cpp_token *new_number_token PARAMS ((cpp_reader *, unsigned int));
@@ -163,7 +160,7 @@ builtin_macro (pfile, node)
 	name = map->to_file;
 	len = strlen (name);
 	buf = _cpp_unaligned_alloc (pfile, len * 4 + 1);
-	len = quote_string (buf, (const unsigned char *) name, len) - buf;
+	len = cpp_quote_string (buf, (const unsigned char *) name, len) - buf;
 
 	result = new_string_token (pfile, buf, len);
       }
@@ -243,9 +240,10 @@ builtin_macro (pfile, node)
 
 /* Copies SRC, of length LEN, to DEST, adding backslashes before all
    backslashes and double quotes.  Non-printable characters are
-   converted to octal.  DEST must be of sufficient size.  */
-static U_CHAR *
-quote_string (dest, src, len)
+   converted to octal.  DEST must be of sufficient size.  Returns
+   a pointer to the end of the string.  */
+U_CHAR *
+cpp_quote_string (dest, src, len)
      U_CHAR *dest;
      const U_CHAR *src;
      unsigned int len;
@@ -330,7 +328,7 @@ stringify_arg (pfile, arg)
 	  _cpp_buff *buff = _cpp_get_buff (pfile, len);
 	  unsigned char *buf = BUFF_FRONT (buff);
 	  len = cpp_spell_token (pfile, token, buf) - buf;
-	  dest = quote_string (dest, buf, len);
+	  dest = cpp_quote_string (dest, buf, len);
 	  _cpp_release_buff (pfile, buff);
 	}
       else
===================================================================
Index: cppmain.c
--- cppmain.c	2002/01/03 21:43:06	1.90
+++ cppmain.c	2002/02/15 03:52:14
@@ -316,8 +316,17 @@ print_line (map, line, special_flags)
   print.line = line;
   if (! options->no_line_commands)
     {
+      size_t to_file_len = strlen (map->to_file);
+      unsigned char *to_file_quoted = alloca (to_file_len * 4 + 1);
+      unsigned char *p;
+      
+      /* cpp_quote_string does not nul-terminate, so we have to do it
+	 ourselves.  */
+      p = cpp_quote_string (to_file_quoted,
+			    (unsigned char *)map->to_file, to_file_len);
+      *p = '\0';
       fprintf (print.outf, "# %u \"%s\"%s",
-	       SOURCE_LINE (map, print.line), map->to_file, special_flags);
+	       SOURCE_LINE (map, print.line), to_file_quoted, special_flags);
 
       if (map->sysp == 2)
 	fputs (" 3 4", print.outf);
===================================================================
Index: doc/cpp.texi
--- doc/cpp.texi	2002/01/10 11:53:19	1.22
+++ doc/cpp.texi	2002/02/15 03:52:16
@@ -3035,6 +3035,9 @@ input.  Subsequent lines are counted fro
 effect.  In addition, @var{filename} is a string constant.  The
 following line and all subsequent lines are reported to come from the
 file it specifies, until something else happens to change that.
+@var{filename} is interpreted according to the same rules as the
+argument of @samp{#include}: backslashes are ordinary text characters,
+not escape characters.
 
 @item #line @var{anything else}
 @var{anything else} is checked for macro calls, which are expanded.
@@ -3224,7 +3229,9 @@ of the form
 These are called @dfn{linemarkers}.  They are inserted as needed into
 the output (but never within a string or character constant).  They mean
 that the following line originated in file @var{filename} at line
-@var{linenum}.
+@var{linenum}.  Unlike @samp{#line}, the @var{filename} is a true string
+constant, in which backslashes introduce escape sequences.  It will
+never contain any non-printing characters.
 
 After the file name comes zero or more flags, which are @samp{1},
 @samp{2}, @samp{3}, or @samp{4}.  If there are multiple flags, spaces


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]