From: Zack Weinberg Date: Thu, 14 Mar 2002 18:17:18 +0000 (+0000) Subject: cpphash.h (struct lexer_state): Remove line_extension member. X-Git-Tag: releases/gcc-3.3.0~6448 X-Git-Url: https://gcc.gnu.org/git/?a=commitdiff_plain;h=dcc229e5a1a7425479e0738cb9e70e8e8a2132da;p=gcc.git cpphash.h (struct lexer_state): Remove line_extension member. * cpphash.h (struct lexer_state): Remove line_extension member. * cpplib.c (dequote_string, do_linemarker): New functions. (linemarker_dir): New data object. (DIRECTIVE_TABLE): No longer need to interpret #line in preprocessed source. Delete obsolete comment about return values of handlers. (end_directive, directive_diagnostics, _cpp_handle_directive): Don't muck with line_extension. (directive_diagnostics): No need to issue warnings for linemarkers here. (_cpp_handle_directive): Issue warnings for linemarkers here, when appropriate. Dispatch linemarkers to do_linemarker, not do_line. (do_line): Code to handle linemarkers split out to do_linemarker. Convert escape sequences in filename argument, both places. * cppmacro.c (quote_string): Rename cpp_quote_string and export. All callers changed. * cpplib.h (cpp_quote_string): Prototype. * cppmain.c (print_line): Call cpp_quote_string on to_file before printing it. * doc/cpp.texi: Document that escapes are now interpreted in #line and in linemarkers, and that non-printing characters are converted to octal escapes when linemarkers are generated. From-SVN: r50779 --- diff --git a/gcc/cpphash.h b/gcc/cpphash.h index e06914aeecd0..03de93f9644e 100644 --- a/gcc/cpphash.h +++ b/gcc/cpphash.h @@ -152,9 +152,6 @@ struct lexer_state /* Nonzero when parsing arguments to a function-like macro. */ unsigned char parsing_args; - - /* Nonzero when in a # NUMBER directive. */ - unsigned char line_extension; }; /* Special nodes - identifiers with predefined significance. */ diff --git a/gcc/cpplib.c b/gcc/cpplib.c index b6c3805b2dc0..d8e34330e281 100644 --- a/gcc/cpplib.c +++ b/gcc/cpplib.c @@ -103,6 +103,8 @@ static const cpp_token *parse_include PARAMS ((cpp_reader *)); static void push_conditional PARAMS ((cpp_reader *, int, int, const cpp_hashnode *)); static unsigned int read_flag PARAMS ((cpp_reader *, unsigned int)); +static U_CHAR *dequote_string PARAMS ((cpp_reader *, const U_CHAR *, + unsigned int)); static int strtoul_for_line PARAMS ((const U_CHAR *, unsigned int, unsigned long *)); static void do_diagnostic PARAMS ((cpp_reader *, enum error_type, int)); @@ -117,6 +119,7 @@ static void do_pragma_once PARAMS ((cpp_reader *)); static void do_pragma_poison PARAMS ((cpp_reader *)); static void do_pragma_system_header PARAMS ((cpp_reader *)); static void do_pragma_dependency PARAMS ((cpp_reader *)); +static void do_linemarker PARAMS ((cpp_reader *)); static const cpp_token *get_token_no_padding PARAMS ((cpp_reader *)); static const cpp_token *get__Pragma_string PARAMS ((cpp_reader *)); static void destringize_and_run PARAMS ((cpp_reader *, const cpp_string *)); @@ -145,7 +148,7 @@ D(if, T_IF, KANDR, COND | IF_COND) /* 18162 */ \ D(else, T_ELSE, KANDR, COND) /* 9863 */ \ D(ifndef, T_IFNDEF, KANDR, COND | IF_COND) /* 9675 */ \ D(undef, T_UNDEF, KANDR, IN_I) /* 4837 */ \ -D(line, T_LINE, KANDR, IN_I) /* 2465 */ \ +D(line, T_LINE, KANDR, 0) /* 2465 */ \ D(elif, T_ELIF, STDC89, COND) /* 610 */ \ D(error, T_ERROR, STDC89, 0) /* 475 */ \ D(pragma, T_PRAGMA, STDC89, IN_I) /* 195 */ \ @@ -167,10 +170,6 @@ SCCS_ENTRY /* 0 SVR4? */ /* Use the table to generate a series of prototypes, an enum for the directive names, and an array of directive handlers. */ -/* The directive-processing functions are declared to return int - instead of void, because some old compilers have trouble with - pointers to functions returning void. */ - /* Don't invoke CONCAT2 with any whitespace or K&R cc will fail. */ #define D(name, t, o, f) static void CONCAT2(do_,name) PARAMS ((cpp_reader *)); DIRECTIVE_TABLE @@ -195,6 +194,14 @@ DIRECTIVE_TABLE #undef D #undef DIRECTIVE_TABLE +/* Wrapper struct directive for linemarkers. + The origin is more or less true - the original K+R cpp + did use this notation in its preprocessed output. */ +static const directive linemarker_dir = +{ + do_linemarker, U"#", 1, KANDR, IN_I +}; + #define SEEN_EOL() (pfile->cur_token[-1].type == CPP_EOF) /* Skip any remaining tokens in a directive. */ @@ -256,7 +263,6 @@ end_directive (pfile, skip_line) pfile->state.save_comments = ! CPP_OPTION (pfile, discard_comments); pfile->state.in_directive = 0; pfile->state.angled_headers = 0; - pfile->state.line_extension = 0; pfile->directive = 0; } @@ -268,39 +274,30 @@ directive_diagnostics (pfile, dir, indented) const directive *dir; int indented; { - if (pfile->state.line_extension) + /* Issue -pedantic warnings for extensions. */ + if (CPP_PEDANTIC (pfile) + && ! pfile->state.skipping + && dir->origin == EXTENSION) + cpp_pedwarn (pfile, "#%s is a GCC extension", dir->name); + + /* Traditionally, a directive is ignored unless its # is in + column 1. Therefore in code intended to work with K+R + compilers, directives added by C89 must have their # + indented, and directives present in traditional C must not. + This is true even of directives in skipped conditional + blocks. #elif cannot be used at all. */ + if (CPP_WTRADITIONAL (pfile)) { - if (CPP_PEDANTIC (pfile) - && ! pfile->state.skipping) - cpp_pedwarn (pfile, "style of line directive is a GCC extension"); - } - else - { - /* Issue -pedantic warnings for extensions. */ - if (CPP_PEDANTIC (pfile) - && ! pfile->state.skipping - && dir->origin == EXTENSION) - cpp_pedwarn (pfile, "#%s is a GCC extension", dir->name); - - /* Traditionally, a directive is ignored unless its # is in - column 1. Therefore in code intended to work with K+R - compilers, directives added by C89 must have their # - indented, and directives present in traditional C must not. - This is true even of directives in skipped conditional - blocks. #elif cannot be used at all. */ - if (CPP_WTRADITIONAL (pfile)) - { - if (dir == &dtable[T_ELIF]) - cpp_warning (pfile, "suggest not using #elif in traditional C"); - else if (indented && dir->origin == KANDR) - cpp_warning (pfile, - "traditional C ignores #%s with the # indented", - dir->name); - else if (!indented && dir->origin != KANDR) - cpp_warning (pfile, - "suggest hiding #%s from traditional C with an indented #", - dir->name); - } + if (dir == &dtable[T_ELIF]) + cpp_warning (pfile, "suggest not using #elif in traditional C"); + else if (indented && dir->origin == KANDR) + cpp_warning (pfile, + "traditional C ignores #%s with the # indented", + dir->name); + else if (!indented && dir->origin != KANDR) + cpp_warning (pfile, + "suggest hiding #%s from traditional C with an indented #", + dir->name); } } @@ -339,8 +336,10 @@ _cpp_handle_directive (pfile, indented) assembler code. */ else if (dname->type == CPP_NUMBER && CPP_OPTION (pfile, lang) != CLK_ASM) { - dir = &dtable[T_LINE]; - pfile->state.line_extension = 1; + dir = &linemarker_dir; + if (CPP_PEDANTIC (pfile) && ! CPP_OPTION (pfile, preprocessed) + && ! pfile->state.skipping) + cpp_pedwarn (pfile, "style of line directive is a GCC extension"); } if (dir) @@ -669,9 +668,10 @@ do_include_next (pfile) do_include_common (pfile, IT_INCLUDE_NEXT); } -/* Subroutine of do_line. Read possible flags after file name. LAST - is the last flag seen; 0 if this is the first flag. Return the flag - if it is valid, 0 at the end of the directive. Otherwise complain. */ +/* Subroutine of do_linemarker. Read possible flags after file name. + LAST is the last flag seen; 0 if this is the first flag. Return the + flag if it is valid, 0 at the end of the directive. Otherwise + complain. */ static unsigned int read_flag (pfile, last) cpp_reader *pfile; @@ -695,9 +695,43 @@ read_flag (pfile, last) return 0; } -/* Another subroutine of do_line. Convert a number in STR, of length - LEN, to binary; store it in NUMP, and return 0 if the number was - well-formed, 1 if not. Temporary, hopefully. */ +/* Subroutine of do_line and do_linemarker. Returns a version of STR + which has a NUL terminator and all escape sequences converted to + their equivalents. Temporary, hopefully. */ +static U_CHAR * +dequote_string (pfile, str, len) + cpp_reader *pfile; + const U_CHAR *str; + unsigned int len; +{ + U_CHAR *result = _cpp_unaligned_alloc (pfile, len + 1); + U_CHAR *dst = result; + const U_CHAR *limit = str + len; + unsigned int c; + unsigned HOST_WIDE_INT mask; + + /* We need the mask to match the host's 'unsigned char', not the + target's. */ + if (CHAR_BIT < HOST_BITS_PER_WIDE_INT) + mask = ((unsigned HOST_WIDE_INT) 1 << CHAR_BIT) - 1; + else + mask = ~(unsigned HOST_WIDE_INT)0; + + while (str < limit) + { + c = *str++; + if (c != '\\') + *dst++ = c; + else + *dst++ = cpp_parse_escape (pfile, (const U_CHAR **)&str, limit, mask); + } + *dst++ = '\0'; + return result; +} + +/* Subroutine of do_line and do_linemarker. Convert a number in STR, + of length LEN, to binary; store it in NUMP, and return 0 if the + number was well-formed, 1 if not. Temporary, hopefully. */ static int strtoul_for_line (str, len, nump) const U_CHAR *str; @@ -719,8 +753,8 @@ strtoul_for_line (str, len, nump) } /* Interpret #line command. - Note that the filename string (if any) is treated as if it were an - include filename. That means no escape handling. */ + Note that the filename string (if any) is a true string constant + (escapes are interpreted), unlike in #line. */ static void do_line (pfile) cpp_reader *pfile; @@ -728,16 +762,9 @@ do_line (pfile) const cpp_token *token; const char *new_file = pfile->map->to_file; unsigned long new_lineno; - unsigned int cap, new_sysp = pfile->map->sysp; - enum lc_reason reason = LC_RENAME; /* C99 raised the minimum limit on #line numbers. */ - cap = CPP_OPTION (pfile, c99) ? 2147483647 : 32767; - - /* Putting this in _cpp_handle_directive risks two calls to - _cpp_backup_tokens in some circumstances, which can segfault. */ - if (pfile->state.line_extension) - _cpp_backup_tokens (pfile, 1); + unsigned int cap = CPP_OPTION (pfile, c99) ? 2147483647 : 32767; /* #line commands expand macros. */ token = cpp_get_token (pfile); @@ -750,42 +777,85 @@ do_line (pfile) return; } - if (CPP_PEDANTIC (pfile) && ! pfile->state.line_extension - && (new_lineno == 0 || new_lineno > cap)) + if (CPP_PEDANTIC (pfile) && (new_lineno == 0 || new_lineno > cap)) cpp_pedwarn (pfile, "line number out of range"); token = cpp_get_token (pfile); if (token->type == CPP_STRING) { - new_file = (const char *) token->val.str.text; + new_file = (const char *) dequote_string (pfile, token->val.str.text, + token->val.str.len); + check_eol (pfile); + } + else if (token->type != CPP_EOF) + { + cpp_error (pfile, "\"%s\" is not a valid filename", + cpp_token_as_text (pfile, token)); + return; + } - /* Only accept flags for the # 55 form. */ - if (pfile->state.line_extension) - { - int flag; + skip_rest_of_line (pfile); + _cpp_do_file_change (pfile, LC_RENAME, new_file, new_lineno, + pfile->map->sysp); +} - new_sysp = 0; - flag = read_flag (pfile, 0); - if (flag == 1) - { - reason = LC_ENTER; - /* Fake an include for cpp_included (). */ - _cpp_fake_include (pfile, new_file); - flag = read_flag (pfile, flag); - } - else if (flag == 2) - { - reason = LC_LEAVE; - flag = read_flag (pfile, flag); - } - if (flag == 3) - { - new_sysp = 1; - flag = read_flag (pfile, flag); - if (flag == 4) - new_sysp = 2; - } +/* Interpret the # 44 "file" [flags] notation, which has slightly + different syntax and semantics from #line: Flags are allowed, + and we never complain about the line number being too big. */ +static void +do_linemarker (pfile) + cpp_reader *pfile; +{ + const cpp_token *token; + const char *new_file = pfile->map->to_file; + unsigned long new_lineno; + unsigned int new_sysp = pfile->map->sysp; + enum lc_reason reason = LC_RENAME; + int flag; + + /* Back up so we can get the number again. Putting this in + _cpp_handle_directive risks two calls to _cpp_backup_tokens in + some circumstances, which can segfault. */ + _cpp_backup_tokens (pfile, 1); + + /* #line commands expand macros. */ + token = cpp_get_token (pfile); + if (token->type != CPP_NUMBER + || strtoul_for_line (token->val.str.text, token->val.str.len, + &new_lineno)) + { + cpp_error (pfile, "\"%s\" after # is not a positive integer", + cpp_token_as_text (pfile, token)); + return; + } + + token = cpp_get_token (pfile); + if (token->type == CPP_STRING) + { + new_file = (const char *) dequote_string (pfile, token->val.str.text, + token->val.str.len); + new_sysp = 0; + flag = read_flag (pfile, 0); + if (flag == 1) + { + reason = LC_ENTER; + /* Fake an include for cpp_included (). */ + _cpp_fake_include (pfile, new_file); + flag = read_flag (pfile, flag); } + else if (flag == 2) + { + reason = LC_LEAVE; + flag = read_flag (pfile, flag); + } + if (flag == 3) + { + new_sysp = 1; + flag = read_flag (pfile, flag); + if (flag == 4) + new_sysp = 2; + } + check_eol (pfile); } else if (token->type != CPP_EOF) diff --git a/gcc/cpplib.h b/gcc/cpplib.h index de5dd54bacfd..191649ccfd65 100644 --- a/gcc/cpplib.h +++ b/gcc/cpplib.h @@ -592,6 +592,9 @@ extern void cpp_forall_identifiers PARAMS ((cpp_reader *, /* In cppmacro.c */ extern void cpp_scan_nooutput PARAMS ((cpp_reader *)); extern int cpp_sys_macro_p PARAMS ((cpp_reader *)); +extern unsigned char *cpp_quote_string PARAMS ((unsigned char *, + const unsigned char *, + unsigned int)); /* In cppfiles.c */ extern int cpp_included PARAMS ((cpp_reader *, const char *)); diff --git a/gcc/cppmacro.c b/gcc/cppmacro.c index 302e0bd421b3..c5bb71e79547 100644 --- a/gcc/cppmacro.c +++ b/gcc/cppmacro.c @@ -64,9 +64,6 @@ static cpp_context *next_context PARAMS ((cpp_reader *)); static const cpp_token *padding_token PARAMS ((cpp_reader *, const cpp_token *)); static void expand_arg PARAMS ((cpp_reader *, macro_arg *)); -static unsigned char *quote_string PARAMS ((unsigned char *, - const unsigned char *, - unsigned int)); static const cpp_token *new_string_token PARAMS ((cpp_reader *, U_CHAR *, unsigned int)); static const cpp_token *new_number_token PARAMS ((cpp_reader *, unsigned int)); @@ -164,7 +161,7 @@ builtin_macro (pfile, node) name = map->to_file; len = strlen (name); buf = _cpp_unaligned_alloc (pfile, len * 4 + 1); - len = quote_string (buf, (const unsigned char *) name, len) - buf; + len = cpp_quote_string (buf, (const unsigned char *) name, len) - buf; result = new_string_token (pfile, buf, len); } @@ -244,9 +241,10 @@ builtin_macro (pfile, node) /* Copies SRC, of length LEN, to DEST, adding backslashes before all backslashes and double quotes. Non-printable characters are - converted to octal. DEST must be of sufficient size. */ -static U_CHAR * -quote_string (dest, src, len) + converted to octal. DEST must be of sufficient size. Returns + a pointer to the end of the string. */ +U_CHAR * +cpp_quote_string (dest, src, len) U_CHAR *dest; const U_CHAR *src; unsigned int len; @@ -331,7 +329,7 @@ stringify_arg (pfile, arg) _cpp_buff *buff = _cpp_get_buff (pfile, len); unsigned char *buf = BUFF_FRONT (buff); len = cpp_spell_token (pfile, token, buf) - buf; - dest = quote_string (dest, buf, len); + dest = cpp_quote_string (dest, buf, len); _cpp_release_buff (pfile, buff); } else diff --git a/gcc/cppmain.c b/gcc/cppmain.c index 503fa3089566..b8757c705d04 100644 --- a/gcc/cppmain.c +++ b/gcc/cppmain.c @@ -321,8 +321,17 @@ print_line (map, line, special_flags) print.line = line; if (! options->no_line_commands) { + size_t to_file_len = strlen (map->to_file); + unsigned char *to_file_quoted = alloca (to_file_len * 4 + 1); + unsigned char *p; + + /* cpp_quote_string does not nul-terminate, so we have to do it + ourselves. */ + p = cpp_quote_string (to_file_quoted, + (unsigned char *)map->to_file, to_file_len); + *p = '\0'; fprintf (print.outf, "# %u \"%s\"%s", - SOURCE_LINE (map, print.line), map->to_file, special_flags); + SOURCE_LINE (map, print.line), to_file_quoted, special_flags); if (map->sysp == 2) fputs (" 3 4", print.outf); diff --git a/gcc/doc/cpp.texi b/gcc/doc/cpp.texi index 74da087f5c2a..3087e351fe31 100644 --- a/gcc/doc/cpp.texi +++ b/gcc/doc/cpp.texi @@ -3087,6 +3087,13 @@ input. Subsequent lines are counted from @var{linenum}. effect. In addition, @var{filename} is a string constant. The following line and all subsequent lines are reported to come from the file it specifies, until something else happens to change that. +@var{filename} is interpreted according to the normal rules for a string +constant: backslash escapes are interpreted. This is different from +@samp{#include}. + +Previous versions of GNU CPP did not interpret escapes in @samp{#line}; +we have changed it because the standard requires they be interpreted, +and most other compilers do. @item #line @var{anything else} @var{anything else} is checked for macro calls, which are expanded. @@ -3304,7 +3311,8 @@ of the form These are called @dfn{linemarkers}. They are inserted as needed into the output (but never within a string or character constant). They mean that the following line originated in file @var{filename} at line -@var{linenum}. +@var{linenum}. @var{filename} will never contain any non-printing +characters; they are replaced with octal escape sequences. After the file name comes zero or more flags, which are @samp{1}, @samp{2}, @samp{3}, or @samp{4}. If there are multiple flags, spaces @@ -3868,6 +3876,17 @@ The @samp{#line} directive used to change GCC's notion of the a double-quoted header file name. In 3.0 and later, it does not. @xref{Line Control}, for further explanation. +@item Syntax of @samp{#line} + +In GCC 2.95 and previous, the string constant argument to @samp{#line} +was treated the same way as the argument to @samp{#include}: backslash +escapes were not honored, and the string ended at the second @samp{"}. +This is not compliant with the C standard. In GCC 3.0, an attempt was +made to correct the behavior, so that the string was treated as a real +string constant, but it turned out to be buggy. In 3.1, the bugs have +been fixed. (We are not fixing the bugs in 3.0 because they affect +relatively few people and the fix is quite invasive.) + @end itemize @node Invocation