This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH 17/22] libcpp: add location tracking within string literals
- From: David Malcolm <dmalcolm at redhat dot com>
- To: gcc-patches at gcc dot gnu dot org
- Cc: David Malcolm <dmalcolm at redhat dot com>
- Date: Thu, 10 Sep 2015 16:28:28 -0400
- Subject: [PATCH 17/22] libcpp: add location tracking within string literals
- Authentication-results: sourceware.org; auth=none
- References: <1441916913-11547-1-git-send-email-dmalcolm at redhat dot com>
This has not been optimized yet.
gcc/c-family/ChangeLog:
* c-common.c (fname_as_string): Initialize loc field of "cstr",
and call init_raw on strname.loc.
* c-lex.c (cb_ident): Initialize loc field of "cstr".
libcpp/ChangeLog:
* charset.c (struct _cpp_strbuf): Add cpp_string_location field
"loc".
(conversion_loop): Add "loc_reader" param and, if non-NULL, call its
add_char_at method.
(convert_utf8_utf16): Add "loc_reader" param and pass it to
conversion_loop.
(convert_utf8_utf32): Likewise.
(convert_utf16_utf8): Likewise.
(convert_utf32_utf8): Likewise.
(convert_no_conversion): Add "loc_reader" param and, if non-NULL,
call its add_n_chars_at method.
(convert_using_iconv): Add dummy cpp_string_location_reader *
param.
(APPLY_CONVERSION): Add LOC_READER param.
(cpp_host_to_exec_charset): Call init on tbuf's loc.
(_cpp_valid_ucn): Add "char_range" and "loc_reader" params. Write
back to "char_range".
(convert_ucn): Add "char_range" and "loc_reader" params, passing
them to _cpp_valid_ucn call and to APPLY_CONVERSION site.
(convert_hex): Add "char_range" and "loc_reader" params; use them
to track source range information.
(convert_oct): Likewise.
(convert_escape): Add loc_reader param and use it to track source
range information.
(cpp_interpret_string): Initialize tbuf.loc. Create an on-stack
cpp_string_location_reader and use it to track source range
information.
(cpp_interpret_charconst): Initialize str.loc.
(_cpp_convert_input): Initialize to.loc. Add NULL when calling
APPLY_CONVERSION.
(cpp_string_location::init): New method.
(cpp_string_location::init_raw): New method.
(cpp_string_location::add_char_at): New method.
(cpp_string_location::add_n_chars_at): New method.
(cpp_string_location::get_loc_at_index): New method.
(cpp_string_location::get_range_at_index): New method.
(cpp_string_location::trivial_p): New method.
(cpp_string_location_reader::cpp_string_location_reader): New ctors.
(cpp_string_location_reader::get_next): New method.
* directives.c (do_line): Initialize s.loc;
(do_linemarker): Likewise.
* expr.c (_cpp_parse_expr): Call init_raw on the token's str.loc.
* include/cpplib.h (struct cpp_string_fragment_location): New struct.
(struct cpp_string_location): New struct.
(class cpp_string_location_reader): New class.
(struct cpp_string): Add field "loc", a cpp_string_location.
* internal.h (convert_f): Add cpp_string_location_reader * param.
(_cpp_valid_ucn): Add source_range * param.
* lex.c (forms_identifier_p): Add NULL argument to _cpp_valid_ucn.
(lex_number): Initialize number->loc.
(create_literal): Call init_raw on the token's str.loc.
* macro.c (new_string_token): Call init on the token's str.loc.
---
gcc/c-family/c-common.c | 3 +-
gcc/c-family/c-lex.c | 2 +-
libcpp/charset.c | 345 ++++++++++++++++++++++++++++++++++++++++++------
libcpp/directives.c | 4 +-
libcpp/expr.c | 2 +
libcpp/include/cpplib.h | 134 +++++++++++++++++++
libcpp/internal.h | 7 +-
libcpp/lex.c | 12 +-
libcpp/macro.c | 1 +
9 files changed, 465 insertions(+), 45 deletions(-)
diff --git a/gcc/c-family/c-common.c b/gcc/c-family/c-common.c
index 77962fc..a430bee 100644
--- a/gcc/c-family/c-common.c
+++ b/gcc/c-family/c-common.c
@@ -935,7 +935,7 @@ fname_as_string (int pretty_p)
const char *name = "top level";
char *namep;
int vrb = 2, len;
- cpp_string cstr = { 0, 0 }, strname;
+ cpp_string cstr = { 0, 0, {NULL, 0, 0} }, strname;
if (!pretty_p)
{
@@ -952,6 +952,7 @@ fname_as_string (int pretty_p)
snprintf (namep, len, "\"%s\"", name);
strname.text = (unsigned char *) namep;
strname.len = len - 1;
+ strname.loc.init_raw (UNKNOWN_LOCATION, len, 1, line_table);
if (cpp_interpret_string (parse_in, &strname, 1, &cstr, CPP_STRING))
{
diff --git a/gcc/c-family/c-lex.c b/gcc/c-family/c-lex.c
index 1334994..f457199 100644
--- a/gcc/c-family/c-lex.c
+++ b/gcc/c-family/c-lex.c
@@ -171,7 +171,7 @@ cb_ident (cpp_reader * ARG_UNUSED (pfile),
if (!flag_no_ident)
{
/* Convert escapes in the string. */
- cpp_string cstr = { 0, 0 };
+ cpp_string cstr = { 0, 0, { NULL, 0, 0 } };
if (cpp_interpret_string (pfile, str, 1, &cstr, CPP_STRING))
{
targetm.asm_out.output_ident ((const char *) cstr.text);
diff --git a/libcpp/charset.c b/libcpp/charset.c
index 5a1c929..3ae7916 100644
--- a/libcpp/charset.c
+++ b/libcpp/charset.c
@@ -99,6 +99,7 @@ struct _cpp_strbuf
uchar *text;
size_t asize;
size_t len;
+ cpp_string_location loc;
};
/* This is enough to hold any string that fits on a single 80-column
@@ -453,7 +454,8 @@ one_utf16_to_utf8 (iconv_t bigend, const uchar **inbufp, size_t *inbytesleftp,
static inline bool
conversion_loop (int (*const one_conversion)(iconv_t, const uchar **, size_t *,
uchar **, size_t *),
- iconv_t cd, const uchar *from, size_t flen, struct _cpp_strbuf *to)
+ iconv_t cd, const uchar *from, size_t flen, struct _cpp_strbuf *to,
+ cpp_string_location_reader *loc_reader)
{
const uchar *inbuf;
uchar *outbuf;
@@ -468,8 +470,13 @@ conversion_loop (int (*const one_conversion)(iconv_t, const uchar **, size_t *,
for (;;)
{
do
- rval = one_conversion (cd, &inbuf, &inbytesleft,
- &outbuf, &outbytesleft);
+ {
+ rval = one_conversion (cd, &inbuf, &inbytesleft,
+ &outbuf, &outbytesleft);
+ if (loc_reader)
+ to->loc.add_char_at (loc_reader->get_next (),
+ loc_reader->get_line_maps ());
+ }
while (inbytesleft && !rval);
if (__builtin_expect (inbytesleft == 0, 1))
@@ -503,36 +510,37 @@ conversion_loop (int (*const one_conversion)(iconv_t, const uchar **, size_t *,
/* These four use the custom conversion code above. */
static bool
convert_utf8_utf16 (iconv_t cd, const uchar *from, size_t flen,
- struct _cpp_strbuf *to)
+ struct _cpp_strbuf *to, cpp_string_location_reader *loc_reader)
{
- return conversion_loop (one_utf8_to_utf16, cd, from, flen, to);
+ return conversion_loop (one_utf8_to_utf16, cd, from, flen, to, loc_reader);
}
static bool
convert_utf8_utf32 (iconv_t cd, const uchar *from, size_t flen,
- struct _cpp_strbuf *to)
+ struct _cpp_strbuf *to, cpp_string_location_reader *loc_reader)
{
- return conversion_loop (one_utf8_to_utf32, cd, from, flen, to);
+ return conversion_loop (one_utf8_to_utf32, cd, from, flen, to, loc_reader);
}
static bool
convert_utf16_utf8 (iconv_t cd, const uchar *from, size_t flen,
- struct _cpp_strbuf *to)
+ struct _cpp_strbuf *to, cpp_string_location_reader *loc_reader)
{
- return conversion_loop (one_utf16_to_utf8, cd, from, flen, to);
+ return conversion_loop (one_utf16_to_utf8, cd, from, flen, to, loc_reader);
}
static bool
convert_utf32_utf8 (iconv_t cd, const uchar *from, size_t flen,
- struct _cpp_strbuf *to)
+ struct _cpp_strbuf *to, cpp_string_location_reader *loc_reader)
{
- return conversion_loop (one_utf32_to_utf8, cd, from, flen, to);
+ return conversion_loop (one_utf32_to_utf8, cd, from, flen, to, loc_reader);
}
/* Identity conversion, used when we have no alternative. */
static bool
convert_no_conversion (iconv_t cd ATTRIBUTE_UNUSED,
- const uchar *from, size_t flen, struct _cpp_strbuf *to)
+ const uchar *from, size_t flen, struct _cpp_strbuf *to,
+ cpp_string_location_reader *loc_reader)
{
if (to->len + flen > to->asize)
{
@@ -542,6 +550,7 @@ convert_no_conversion (iconv_t cd ATTRIBUTE_UNUSED,
}
memcpy (to->text + to->len, from, flen);
to->len += flen;
+ to->loc.add_n_chars_at (flen, loc_reader);
return true;
}
@@ -559,7 +568,8 @@ convert_no_conversion (iconv_t cd ATTRIBUTE_UNUSED,
static bool
convert_using_iconv (iconv_t cd, const uchar *from, size_t flen,
- struct _cpp_strbuf *to)
+ struct _cpp_strbuf *to,
+ cpp_string_location_reader */*loc_reader*/)
{
ICONV_CONST char *inbuf;
char *outbuf;
@@ -606,8 +616,8 @@ convert_using_iconv (iconv_t cd, const uchar *from, size_t flen,
/* Arrange for the above custom conversion logic to be used automatically
when conversion between a suitable pair of character sets is requested. */
-#define APPLY_CONVERSION(CONVERTER, FROM, FLEN, TO) \
- CONVERTER.func (CONVERTER.cd, FROM, FLEN, TO)
+#define APPLY_CONVERSION(CONVERTER, FROM, FLEN, TO, LOC_READER) \
+ CONVERTER.func (CONVERTER.cd, FROM, FLEN, TO, LOC_READER)
struct cpp_conversion
{
@@ -792,8 +802,9 @@ cpp_host_to_exec_charset (cpp_reader *pfile, cppchar_t c)
tbuf.asize = 1;
tbuf.text = XNEWVEC (uchar, tbuf.asize);
tbuf.len = 0;
+ tbuf.loc.init ();
- if (!APPLY_CONVERSION (pfile->narrow_cset_desc, sbuf, 1, &tbuf))
+ if (!APPLY_CONVERSION (pfile->narrow_cset_desc, sbuf, 1, &tbuf, NULL))
{
cpp_errno (pfile, CPP_DL_ICE, "converting to execution character set");
return 0;
@@ -985,7 +996,9 @@ ucn_valid_in_identifier (cpp_reader *pfile, cppchar_t c,
bool
_cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr,
const uchar *limit, int identifier_pos,
- struct normalize_state *nst, cppchar_t *cp)
+ struct normalize_state *nst, cppchar_t *cp,
+ source_range *char_range,
+ cpp_string_location_reader *loc_reader)
{
cppchar_t result, c;
unsigned int length;
@@ -1021,6 +1034,8 @@ _cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr,
if (!ISXDIGIT (c))
break;
str++;
+ if (char_range)
+ char_range->m_finish = loc_reader->get_next ().m_finish;
result = (result << 4) + hex_value (c);
}
while (--length && str < limit);
@@ -1090,7 +1105,9 @@ _cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr,
An advanced pointer is returned. Issues all relevant diagnostics. */
static const uchar *
convert_ucn (cpp_reader *pfile, const uchar *from, const uchar *limit,
- struct _cpp_strbuf *tbuf, struct cset_converter cvt)
+ struct _cpp_strbuf *tbuf, struct cset_converter cvt,
+ source_range char_range,
+ cpp_string_location_reader *loc_reader)
{
cppchar_t ucn;
uchar buf[6];
@@ -1100,7 +1117,12 @@ convert_ucn (cpp_reader *pfile, const uchar *from, const uchar *limit,
struct normalize_state nst = INITIAL_NORMALIZE_STATE;
from++; /* Skip u/U. */
- _cpp_valid_ucn (pfile, &from, limit, 0, &nst, &ucn);
+
+ /* The u/U is part of the spelling of this character. */
+ char_range.m_finish = loc_reader->get_next ().m_finish;
+
+ ucn = _cpp_valid_ucn (pfile, &from, limit, 0, &nst,
+ &ucn, &char_range, loc_reader);
rval = one_cppchar_to_utf8 (ucn, &bufp, &bytesleft);
if (rval)
@@ -1109,9 +1131,18 @@ convert_ucn (cpp_reader *pfile, const uchar *from, const uchar *limit,
cpp_errno (pfile, CPP_DL_ERROR,
"converting UCN to source character set");
}
- else if (!APPLY_CONVERSION (cvt, buf, 6 - bytesleft, tbuf))
- cpp_errno (pfile, CPP_DL_ERROR,
- "converting UCN to execution character set");
+ else
+ {
+ /* Set up a cpp_string_location_reader to supply a
+ location for the single character, covering all of
+ char_range. */
+ cpp_string_location_reader buf_loc_reader
+ (char_range.m_start, char_range.m_finish + 1 - char_range.m_start,
+ loc_reader->get_line_maps ());
+ if (!APPLY_CONVERSION (cvt, buf, 6 - bytesleft, tbuf, &buf_loc_reader))
+ cpp_errno (pfile, CPP_DL_ERROR,
+ "converting UCN to execution character set");
+ }
return from;
}
@@ -1174,7 +1205,9 @@ emit_numeric_escape (cpp_reader *pfile, cppchar_t n,
number. You can, e.g. generate surrogate pairs this way. */
static const uchar *
convert_hex (cpp_reader *pfile, const uchar *from, const uchar *limit,
- struct _cpp_strbuf *tbuf, struct cset_converter cvt)
+ struct _cpp_strbuf *tbuf, struct cset_converter cvt,
+ source_range char_range,
+ cpp_string_location_reader *loc_reader)
{
cppchar_t c, n = 0, overflow = 0;
int digits_found = 0;
@@ -1185,13 +1218,19 @@ convert_hex (cpp_reader *pfile, const uchar *from, const uchar *limit,
cpp_warning (pfile, CPP_W_TRADITIONAL,
"the meaning of '\\x' is different in traditional C");
- from++; /* Skip 'x'. */
+ /* Skip 'x'. */
+ from++;
+
+ /* The 'x' is part of the spelling of this character. */
+ char_range.m_finish = loc_reader->get_next ().m_finish;
+
while (from < limit)
{
c = *from;
if (! hex_p (c))
break;
from++;
+ char_range.m_finish = loc_reader->get_next ().m_finish;
overflow |= n ^ (n << 4 >> 4);
n = (n << 4) + hex_value (c);
digits_found = 1;
@@ -1213,6 +1252,9 @@ convert_hex (cpp_reader *pfile, const uchar *from, const uchar *limit,
emit_numeric_escape (pfile, n, tbuf, cvt);
+ tbuf->loc.add_char_at (char_range,
+ pfile->line_table);
+
return from;
}
@@ -1224,7 +1266,9 @@ convert_hex (cpp_reader *pfile, const uchar *from, const uchar *limit,
number. */
static const uchar *
convert_oct (cpp_reader *pfile, const uchar *from, const uchar *limit,
- struct _cpp_strbuf *tbuf, struct cset_converter cvt)
+ struct _cpp_strbuf *tbuf, struct cset_converter cvt,
+ source_range char_range,
+ cpp_string_location_reader *loc_reader)
{
size_t count = 0;
cppchar_t c, n = 0;
@@ -1238,6 +1282,7 @@ convert_oct (cpp_reader *pfile, const uchar *from, const uchar *limit,
if (c < '0' || c > '7')
break;
from++;
+ char_range.m_finish = loc_reader->get_next ().m_finish;
overflow |= n ^ (n << 3 >> 3);
n = (n << 3) + c - '0';
}
@@ -1251,6 +1296,9 @@ convert_oct (cpp_reader *pfile, const uchar *from, const uchar *limit,
emit_numeric_escape (pfile, n, tbuf, cvt);
+ tbuf->loc.add_char_at (char_range,
+ pfile->line_table);
+
return from;
}
@@ -1260,7 +1308,8 @@ convert_oct (cpp_reader *pfile, const uchar *from, const uchar *limit,
pointer. Handles all relevant diagnostics. */
static const uchar *
convert_escape (cpp_reader *pfile, const uchar *from, const uchar *limit,
- struct _cpp_strbuf *tbuf, struct cset_converter cvt)
+ struct _cpp_strbuf *tbuf, struct cset_converter cvt,
+ cpp_string_location_reader *loc_reader)
{
/* Values of \a \b \e \f \n \r \t \v respectively. */
#if HOST_CHARSET == HOST_CHARSET_ASCII
@@ -1273,20 +1322,26 @@ convert_escape (cpp_reader *pfile, const uchar *from, const uchar *limit,
uchar c;
+ /* Record the location of the backslash. */
+ source_range char_range = loc_reader->get_next ();
+
c = *from;
switch (c)
{
/* UCNs, hex escapes, and octal escapes are processed separately. */
case 'u': case 'U':
- return convert_ucn (pfile, from, limit, tbuf, cvt);
+ return convert_ucn (pfile, from, limit, tbuf, cvt,
+ char_range, loc_reader);
case 'x':
- return convert_hex (pfile, from, limit, tbuf, cvt);
+ return convert_hex (pfile, from, limit, tbuf, cvt,
+ char_range, loc_reader);
break;
case '0': case '1': case '2': case '3':
case '4': case '5': case '6': case '7':
- return convert_oct (pfile, from, limit, tbuf, cvt);
+ return convert_oct (pfile, from, limit, tbuf, cvt,
+ char_range, loc_reader);
/* Various letter escapes. Get the appropriate host-charset
value into C. */
@@ -1339,7 +1394,7 @@ convert_escape (cpp_reader *pfile, const uchar *from, const uchar *limit,
}
/* Now convert what we have to the execution character set. */
- if (!APPLY_CONVERSION (cvt, &c, 1, tbuf))
+ if (!APPLY_CONVERSION (cvt, &c, 1, tbuf, loc_reader))
cpp_errno (pfile, CPP_DL_ERROR,
"converting escape sequence to execution character set");
@@ -1388,14 +1443,21 @@ cpp_interpret_string (cpp_reader *pfile, const cpp_string *from, size_t count,
tbuf.asize = MAX (OUTBUF_BLOCK_SIZE, from->len);
tbuf.text = XNEWVEC (uchar, tbuf.asize);
tbuf.len = 0;
+ tbuf.loc.init ();
for (i = 0; i < count; i++)
{
+ cpp_string_location_reader loc_reader (&from[i].loc, pfile->line_table);
p = from[i].text;
if (*p == 'u')
{
- if (*++p == '8')
- p++;
+ p++;
+ loc_reader.get_next ();
+ if (*p == '8')
+ {
+ p++;
+ loc_reader.get_next ();
+ }
}
else if (*p == 'L' || *p == 'U') p++;
if (*p == 'R')
@@ -1414,13 +1476,16 @@ cpp_interpret_string (cpp_reader *pfile, const cpp_string *from, size_t count,
/* Raw strings are all normal characters; these can be fed
directly to convert_cset. */
- if (!APPLY_CONVERSION (cvt, p, limit - p, &tbuf))
+ if (!APPLY_CONVERSION (cvt, p, limit - p, &tbuf, &loc_reader))
goto fail;
continue;
}
- p++; /* Skip leading quote. */
+ /* Skip leading quote. */
+ p++;
+ loc_reader.get_next ();
+
limit = from[i].text + from[i].len - 1; /* Skip trailing quote. */
for (;;)
@@ -1432,13 +1497,13 @@ cpp_interpret_string (cpp_reader *pfile, const cpp_string *from, size_t count,
{
/* We have a run of normal characters; these can be fed
directly to convert_cset. */
- if (!APPLY_CONVERSION (cvt, base, p - base, &tbuf))
+ if (!APPLY_CONVERSION (cvt, base, p - base, &tbuf, &loc_reader))
goto fail;
}
if (p == limit)
break;
- p = convert_escape (pfile, p + 1, limit, &tbuf, cvt);
+ p = convert_escape (pfile, p + 1, limit, &tbuf, cvt, &loc_reader);
}
}
/* NUL-terminate the 'to' buffer and translate it to a cpp_string
@@ -1447,6 +1512,7 @@ cpp_interpret_string (cpp_reader *pfile, const cpp_string *from, size_t count,
tbuf.text = XRESIZEVEC (uchar, tbuf.text, tbuf.len);
to->text = tbuf.text;
to->len = tbuf.len;
+ to->loc = tbuf.loc;
return true;
fail:
@@ -1611,7 +1677,7 @@ cppchar_t
cpp_interpret_charconst (cpp_reader *pfile, const cpp_token *token,
unsigned int *pchars_seen, int *unsignedp)
{
- cpp_string str = { 0, 0 };
+ cpp_string str = { 0, 0, {NULL, 0, 0} };
bool wide = (token->type != CPP_CHAR && token->type != CPP_UTF8CHAR);
int u8 = 2 * int(token->type == CPP_UTF8CHAR);
cppchar_t result;
@@ -1719,14 +1785,16 @@ _cpp_convert_input (cpp_reader *pfile, const char *input_charset,
to.text = input;
to.asize = size;
to.len = len;
+ to.loc.init ();
}
else
{
to.asize = MAX (65536, len);
to.text = XNEWVEC (uchar, to.asize);
to.len = 0;
+ to.loc.init ();
- if (!APPLY_CONVERSION (input_cset, input, len, &to))
+ if (!APPLY_CONVERSION (input_cset, input, len, &to, NULL))
cpp_error (pfile, CPP_DL_ERROR,
"failure to convert %s to %s",
CPP_OPTION (pfile, input_charset), SOURCE_CHARSET);
@@ -1811,3 +1879,204 @@ _cpp_default_encoding (void)
return current_encoding;
}
+
+/* Implementation of class cpp_string_location and
+ class cpp_string_location_reader.
+ We put them in this source file in the hope that they can be
+ inlined into heavy users such as cpp_interpret_string without
+ requiring the compiler itself to be built with LTO. */
+
+/* FIXME. */
+void
+cpp_string_location::init ()
+{
+ m_fragloc_array = NULL;
+ m_num_fraglocs = 0;
+ m_alloc_fraglocs = 0;
+}
+
+/* FIXME. */
+void
+cpp_string_location::init_raw (source_location loc, int len, int cols_per_char,
+ line_maps *line_table)
+{
+ line_map_realloc reallocator = (line_table->reallocator
+ ? line_table->reallocator
+ : (line_map_realloc) xrealloc);
+ m_fragloc_array = (cpp_string_fragment_location *)reallocator
+ (NULL,
+ sizeof (cpp_string_fragment_location));
+ m_fragloc_array[0].m_len = len;
+
+ /* LOC might be a macro location. It only makes sense to do
+ column-by-column calculations on ordinary maps, so get the
+ corresponding location in an ordinary map. */
+ source_location ordinary_loc
+ = linemap_resolve_location (line_table, loc,
+ LRK_SPELLING_LOCATION, NULL);
+ m_fragloc_array[0].m_loc = ordinary_loc;
+ m_fragloc_array[0].m_cols_per_char = cols_per_char;
+ m_num_fraglocs = 1;
+ m_alloc_fraglocs = 1;
+}
+
+
+/* FIXME. */
+void
+cpp_string_location::add_char_at (source_range range,
+ line_maps *line_table)
+{
+ if (m_fragloc_array)
+ {
+ /* Is this a simple run-on character in the next column
+ within the current fragment? */
+ cpp_string_fragment_location *current_fragment
+ = get_current_fragment ();
+ source_range next_range = current_fragment->get_next_range ();
+ if (range.m_start == next_range.m_start
+ && range.m_finish == next_range.m_finish)
+ /* If so, we can simply increase the length of the current
+ fragment. */
+ current_fragment->m_len++;
+ else
+ {
+ /* We need to start a new fragment. This may require growing
+ the underlying array. */
+ if (++m_num_fraglocs > m_alloc_fraglocs)
+ {
+ m_alloc_fraglocs *= 2;
+ line_map_realloc reallocator = (line_table->reallocator
+ ? line_table->reallocator
+ : (line_map_realloc) xrealloc);
+ m_fragloc_array = (cpp_string_fragment_location *)reallocator
+ (m_fragloc_array,
+ sizeof (cpp_string_fragment_location) * m_alloc_fraglocs);
+ }
+ current_fragment = get_current_fragment ();
+ current_fragment->m_len = 1;
+ current_fragment->m_loc = range.m_start;
+ current_fragment->m_cols_per_char
+ = range.m_finish + 1 - range.m_start;
+ }
+ }
+ else
+ {
+ /* Begin new fragment array. */
+ line_map_realloc reallocator = (line_table->reallocator
+ ? line_table->reallocator
+ : (line_map_realloc) xrealloc);
+ m_fragloc_array = (cpp_string_fragment_location *)reallocator
+ (NULL, sizeof (cpp_string_fragment_location));
+ m_fragloc_array[0].m_len = 1;
+ m_fragloc_array[0].m_loc = range.m_start;
+ m_fragloc_array[0].m_cols_per_char
+ = range.m_finish + 1 - range.m_start;
+ m_num_fraglocs = 1;
+ m_alloc_fraglocs = 1;
+ }
+}
+
+/* FIXME. */
+void
+cpp_string_location::add_n_chars_at (int flen,
+ cpp_string_location_reader *loc_reader)
+{
+ if (loc_reader)
+ while (flen--)
+ add_char_at (loc_reader->get_next (),
+ loc_reader->get_line_maps ());
+}
+
+/* FIXME. */
+source_location
+cpp_string_location::get_loc_at_index (unsigned int char_idx) const
+{
+ for (unsigned int fragment_idx = 0;
+ fragment_idx < m_num_fraglocs;
+ fragment_idx++)
+ {
+ cpp_string_fragment_location *fragment = &m_fragloc_array[fragment_idx];
+ if (char_idx < fragment->m_len)
+ return fragment->get_char_range (char_idx).m_start;
+ else
+ char_idx -= fragment->m_len;
+ }
+
+ /* Error: accessing beyond the end of the array. */
+ return 0;
+}
+
+/* FIXME. */
+source_range
+cpp_string_location::get_range_at_index (unsigned int char_idx) const
+{
+ for (unsigned int fragment_idx = 0;
+ fragment_idx < m_num_fraglocs;
+ fragment_idx++)
+ {
+ cpp_string_fragment_location *fragment = &m_fragloc_array[fragment_idx];
+ if (char_idx < fragment->m_len)
+ return fragment->get_char_range (char_idx);
+ else
+ char_idx -= fragment->m_len;
+ }
+
+ /* Error: accessing beyond the end of the array. */
+ source_range err;
+ err.m_start = 0;
+ err.m_finish = 0;
+ return err;
+}
+
+/* FIXME. */
+bool
+cpp_string_location::trivial_p () const
+{
+ if (m_num_fraglocs == 1)
+ if (m_fragloc_array[0].m_cols_per_char == 1)
+ return true;
+ return false;
+}
+
+/* Constructor for iterating through the locations in
+ cpp_string_location. */
+
+cpp_string_location_reader::
+cpp_string_location_reader (const cpp_string_location *strloc,
+ line_maps *line_table)
+{
+ /* As an optimization, we require that STRLOC must consist of a
+ single fragment. */
+ linemap_assert (strloc->m_num_fraglocs == 1);
+ m_loc = strloc->m_fragloc_array[0].m_loc;
+ m_cols_per_char = strloc->m_fragloc_array[0].m_cols_per_char;
+ m_line_table = line_table;
+}
+
+/* Constructor for iterating through an arbitrary buffer. */
+
+cpp_string_location_reader::
+cpp_string_location_reader (source_location src_loc,
+ int cols_per_char,
+ line_maps *line_table)
+: m_cols_per_char (cols_per_char),
+ m_line_table (line_table)
+{
+ /* LOC might be a macro location. It only makes sense to do
+ column-by-column calculations on ordinary maps, so get the
+ corresponding location in an ordinary map. */
+ m_loc
+ = linemap_resolve_location (line_table, src_loc,
+ LRK_SPELLING_LOCATION, NULL);
+}
+
+/* FIXME. */
+source_range
+cpp_string_location_reader::get_next ()
+{
+ source_range result;
+ result.m_start = m_loc;
+ result.m_finish = m_loc + m_cols_per_char - 1;
+ m_loc += m_cols_per_char;
+ return result;
+}
diff --git a/libcpp/directives.c b/libcpp/directives.c
index 1e9bc3d..b783a7e 100644
--- a/libcpp/directives.c
+++ b/libcpp/directives.c
@@ -949,7 +949,7 @@ do_line (cpp_reader *pfile)
token = cpp_get_token (pfile);
if (token->type == CPP_STRING)
{
- cpp_string s = { 0, 0 };
+ cpp_string s = { 0, 0, { NULL, 0, 0 } };
if (cpp_interpret_string_notranslate (pfile, &token->val.str, 1,
&s, CPP_STRING))
new_file = (const char *)s.text;
@@ -1006,7 +1006,7 @@ do_linemarker (cpp_reader *pfile)
token = cpp_get_token (pfile);
if (token->type == CPP_STRING)
{
- cpp_string s = { 0, 0 };
+ cpp_string s = { 0, 0, { NULL, 0, 0 } };
if (cpp_interpret_string_notranslate (pfile, &token->val.str,
1, &s, CPP_STRING))
new_file = (const char *)s.text;
diff --git a/libcpp/expr.c b/libcpp/expr.c
index 3dc5c0b..f355646 100644
--- a/libcpp/expr.c
+++ b/libcpp/expr.c
@@ -1228,6 +1228,8 @@ _cpp_parse_expr (cpp_reader *pfile, bool is_if)
"missing binary operator before token \"%s\"",
cpp_token_as_text (pfile, op.token));
want_value = false;
+ ((cpp_token *)op.token)->val.str.loc.init_raw (op.loc, 1, 1, /* FIXME */
+ pfile->line_table);
top->value = eval_token (pfile, op.token, op.loc);
continue;
diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h
index 0b1a403..a5e5df5 100644
--- a/libcpp/include/cpplib.h
+++ b/libcpp/include/cpplib.h
@@ -173,10 +173,144 @@ enum c_lang {CLK_GNUC89 = 0, CLK_GNUC99, CLK_GNUC11,
CLK_GNUCXX, CLK_CXX98, CLK_GNUCXX11, CLK_CXX11,
CLK_GNUCXX14, CLK_CXX14, CLK_GNUCXX1Z, CLK_CXX1Z, CLK_ASM};
+/* Location of the individual chars in a cpp_string.
+ Specifically, this stores a run of characters of len, starting at loc,
+ with a consistent number of columns per char.
+ See the description below for cpp_string_location. */
+struct GTY(()) cpp_string_fragment_location {
+ source_location m_loc;
+ unsigned int m_len : 12;
+ unsigned int m_cols_per_char : 4;
+
+ source_range get_char_range (int idx) const
+ {
+ source_range result;
+ result.m_start = m_loc + (idx * m_cols_per_char);
+ result.m_finish = result.m_start + m_cols_per_char - 1;
+ return result;
+ }
+ source_range get_next_range () const
+ {
+ return get_char_range (m_len);
+ }
+ source_range get_covered_range () const
+ {
+ source_range result;
+ result.m_start = m_loc;
+ result.m_finish = m_loc + (m_len * m_cols_per_char) - 1;
+ return result;
+ }
+ void debug (const char *msg) const;
+};
+
+class cpp_string_location_reader;
+
+/* Location of the individual chars in a cpp_string.
+ This is stored as a dynamically-allocated array of fragments.
+ For example, consider this call to printf:
+
+ printf ("foo \x25\151 bar" "baz",
+ "not an int");
+
+ The string constant for the first parameter is composed of
+ the concatenation of two string literals, with hexadecimal
+ encoding of a '%' and octal encoding of a 'i', giving a
+ resulting STRING_CST of:
+
+ "foo %i barbaz"
+
+ We want to efficiently record the range of locations in the
+ source file of each character so that we can emit warnings about
+ the type mismatch between format specifier "%i" and the non-int
+ second argument.
+
+ We record the locations as a series of fragments, where within
+ each fragment we have a contiguous run of input characters with
+ a consistent number of columns per character. In the example
+ above the fragments are:
+
+ printf ("foo \x25\151 bar" "baz",
+ .........^^^^....................: fragment 0: 4 chars at 1 col per char
+ .............^^^^^^^^............: fragment 1: 2 chars at 4 cols per char
+ .....................^^^^........: fragment 2: 4 chars at 1 col per char
+ .............................^^^.: fragment 3: 3 chars at 1 col per char
+
+ Note that the hex and octal chars both happen to be 4 cols per char
+ and are contiguous, hence both end up being in fragment 1, whereas the
+ "bar" and "baz" aren't contiguous and hence have to be in separate
+ fragments.
+
+ Note also that having a constant cols-per-char within each fragment
+ means that given an index into the fragment we can directly compute
+ the corresponding source_range. */
+
+struct GTY(()) cpp_string_location {
+
+ void init ();
+ void init_raw (source_location loc, int len, int cols_per_char,
+ line_maps *line_table);
+
+ void add_char_at (source_range range,
+ line_maps *line_table);
+ void add_n_chars_at (int flen, cpp_string_location_reader *loc_reader);
+
+ source_location get_loc_at_index (unsigned int idx) const;
+ source_range get_range_at_index (unsigned int idx) const;
+
+ void debug () const;
+
+ bool trivial_p () const;
+
+ private:
+ cpp_string_fragment_location *get_current_fragment () const
+ {
+ return &m_fragloc_array[m_num_fraglocs - 1];
+ }
+
+ /* Fields.
+ Ideally we would make these fields private, but this isn't easily
+ doable since gengtype generates functions in gtype-desc.c that
+ access them. */
+ public:
+
+ /* We seemingly can't use vec<> from libcpp, so do it "by hand"
+ here. */
+ cpp_string_fragment_location *m_fragloc_array;
+ unsigned int m_num_fraglocs;
+ unsigned int m_alloc_fraglocs;
+};
+
+/* A class for iterating through the source-locations within a
+ string, either from a cpp_string_location, or a temporary buffer. */
+class cpp_string_location_reader {
+ public:
+ /* Constructor for iterating through the locations in
+ cpp_string_location.
+ As an optimization, we require that STRLOC must consist of a
+ single fragment. */
+ cpp_string_location_reader (const cpp_string_location *strloc,
+ line_maps *line_table);
+
+ /* Constructor for iterating through an arbitrary buffer. */
+ cpp_string_location_reader (source_location src_loc,
+ int cols_per_char,
+ line_maps *line_table);
+
+ source_range get_next ();
+
+ line_maps *get_line_maps () const { return m_line_table; }
+
+ private:
+ source_location m_loc;
+ int m_cols_per_char;
+ line_maps *m_line_table;
+};
+
/* Payload of a NUMBER, STRING, CHAR or COMMENT token. */
struct GTY(()) cpp_string {
unsigned int len;
const unsigned char *text;
+ cpp_string_location loc;
};
/* Flags for the cpp_token structure. */
diff --git a/libcpp/internal.h b/libcpp/internal.h
index abd464f..5be45f3 100644
--- a/libcpp/internal.h
+++ b/libcpp/internal.h
@@ -42,7 +42,8 @@ struct op;
struct _cpp_strbuf;
typedef bool (*convert_f) (iconv_t, const unsigned char *, size_t,
- struct _cpp_strbuf *);
+ struct _cpp_strbuf *,
+ cpp_string_location_reader *loc_reader);
struct cset_converter
{
convert_f func;
@@ -747,7 +748,9 @@ struct normalize_state
extern bool _cpp_valid_ucn (cpp_reader *, const unsigned char **,
const unsigned char *, int,
struct normalize_state *state,
- cppchar_t *);
+ cppchar_t *,
+ source_range *char_range,
+ cpp_string_location_reader *loc_reader);
extern void _cpp_destroy_iconv (cpp_reader *);
extern unsigned char *_cpp_convert_input (cpp_reader *, const char *,
unsigned char *, size_t, size_t,
diff --git a/libcpp/lex.c b/libcpp/lex.c
index a84a8c0..0a6bc1c 100644
--- a/libcpp/lex.c
+++ b/libcpp/lex.c
@@ -1247,7 +1247,7 @@ forms_identifier_p (cpp_reader *pfile, int first,
cppchar_t s;
buffer->cur += 2;
if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
- state, &s))
+ state, &s, NULL, NULL))
return true;
buffer->cur -= 2;
}
@@ -1407,6 +1407,15 @@ lex_number (cpp_reader *pfile, cpp_string *number,
const uchar *base;
uchar *dest;
+ /* FIXME: should it really use a new "cpp_number", rather than cpp_string?
+ We need to init this, or we get a crash accessing uninited data
+ during GC, since,
+ struct GTY(()) cpp_token
+ has union cpp_token_u with
+ desc ("cpp_token_val_index (&%1)")))
+ and this gives CPP_TOKEN_FLD_STR for numbers (and strings). */
+ number->loc.init ();
+
base = pfile->buffer->cur - 1;
do
{
@@ -1446,6 +1455,7 @@ create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
token->type = type;
token->val.str.len = len;
token->val.str.text = dest;
+ token->val.str.loc.init_raw (token->src_loc, len, 1, pfile->line_table);
}
/* Subroutine of lex_raw_string: Append LEN chars from BASE to the buffer
diff --git a/libcpp/macro.c b/libcpp/macro.c
index 786c21b..b21e218 100644
--- a/libcpp/macro.c
+++ b/libcpp/macro.c
@@ -216,6 +216,7 @@ new_string_token (cpp_reader *pfile, unsigned char *text, unsigned int len)
token->type = CPP_STRING;
token->val.str.len = len;
token->val.str.text = text;
+ token->val.str.loc.init ();
token->flags = 0;
return token;
}
--
1.8.5.3