[PATCH] get source line for diagnostic from preprocessed file / PR preprocessor/79106

Bader, Lucas lucas.bader@sap.com
Mon Dec 16 11:32:00 GMT 2019


Hello,

within a compile cluster, only the preprocessed output of GCC is transferred to remote nodes for compilation. 
When GCC produces advanced diagnostics (with -fdiagnostics-show-caret), e.g. prints out the affected source
line and fixit hints, it attempts to read the source file again, even when compiling a preprocessed file (-fpreprocessed). 
This leads to wrong diagnostics when building with a compile cluster, or, more generally, when changing or deleting the original source file.

This patch attempts to alter the behavior by implementing a location_get_source_line_preprocessed 
function that can be used in diagnostic-show-locus.c in case a preprocessed file is compiled.
There was some previous discussion on this behavior on PR preprocessor/79106.

This is my first patch to GCC, so in case something is wrong with the format, please let me know.

Best regards
Lucas

2019-12-16  Lucas Bader  <lucas.bader@sap.com>

	PR preprocessor/79106
	* c-opts.c (c_common_handle_option): pass -fpreprocessed 
	option value to global diagnostic configuration
	
	* diagnostic-show-locus.c (layout::layout): read line from source or preprocessed
	file based on -fpreprocessed value
	(source_line::source_line): read line from source or preprocessed
	file based on -fpreprocessed value
	(layout::print_line): read line from source or preprocessed
	file based on -fpreprocessed value
	
	* diagnostic.h (diagnostic_context): new members for reading
	source lines from preprocessed files
	* diagnostic.c (diagnostic_initialize): initialize new members
	
	* input.c (location_get_source_line_preprocessed): new function
	to read source lines from preprocessed files
	(test_reading_source_line_preprocessed): new test case
	(input_c_tests): execute new test case
	
	* opts-global.c (read_cmdline_options): pass input filename to global
	diagnostic context

---

diff --git a/gcc/c-family/c-opts.c b/gcc/c-family/c-opts.c
index c913291c07c..8634de6eb3f 100644
--- a/gcc/c-family/c-opts.c
+++ b/gcc/c-family/c-opts.c
@@ -485,6 +485,7 @@ c_common_handle_option (size_t scode, const char *arg, HOST_WIDE_INT value,
 
     case OPT_fpreprocessed:
       cpp_opts->preprocessed = value;
+      global_dc->is_preprocessed = value;
       break;
 
     case OPT_fdebug_cpp:
diff --git a/gcc/diagnostic-show-locus.c b/gcc/diagnostic-show-locus.c
index cb920f6b9d0..3a4838605de 100644
--- a/gcc/diagnostic-show-locus.c
+++ b/gcc/diagnostic-show-locus.c
@@ -896,7 +896,12 @@ layout::layout (diagnostic_context * context,
      Center the primary caret to fit in max_width; all columns
      will be adjusted accordingly.  */
   size_t max_width = m_context->caret_max_width;
-  char_span line = location_get_source_line (m_exploc.file, m_exploc.line);
+  char_span line (NULL, 0);
+  if (global_dc->main_input_file_path != NULL && global_dc->is_preprocessed)
+    line = location_get_source_line_preprocessed (
+	m_exploc.file, global_dc->main_input_file_path, m_exploc.line);
+  else
+    line = location_get_source_line (m_exploc.file, m_exploc.line);
   if (line && (size_t)m_exploc.column <= line.length ())
     {
       size_t right_margin = CARET_LINE_MARGIN;
@@ -1913,7 +1918,12 @@ public:
 
 source_line::source_line (const char *filename, int line)
 {
-  char_span span = location_get_source_line (filename, line);
+  char_span span (NULL, 0);
+  if (global_dc->main_input_file_path != NULL && global_dc->is_preprocessed)
+    span = location_get_source_line_preprocessed (
+	filename, global_dc->main_input_file_path, line);
+  else
+    span = location_get_source_line (filename, line);
   chars = span.get_buffer ();
   width = span.length ();
 }
@@ -2237,7 +2247,13 @@ layout::show_ruler (int max_column) const
 void
 layout::print_line (linenum_type row)
 {
-  char_span line = location_get_source_line (m_exploc.file, row);
+  char_span line (NULL, 0);
+  if (global_dc->main_input_file_path != NULL && global_dc->is_preprocessed)
+    line = location_get_source_line_preprocessed (
+	m_exploc.file, global_dc->main_input_file_path, row);
+  else
+    line = location_get_source_line (m_exploc.file, row);
+
   if (!line)
     return;
 
diff --git a/gcc/diagnostic.c b/gcc/diagnostic.c
index a29bcf155e2..6ef99d0e8f1 100644
--- a/gcc/diagnostic.c
+++ b/gcc/diagnostic.c
@@ -218,6 +218,8 @@ diagnostic_initialize (diagnostic_context *context, int n_opts)
   context->begin_group_cb = NULL;
   context->end_group_cb = NULL;
   context->final_cb = default_diagnostic_final_cb;
+  context->main_input_file_path = NULL;
+  context->is_preprocessed = false;
 }
 
 /* Maybe initialize the color support. We require clients to do this
diff --git a/gcc/diagnostic.h b/gcc/diagnostic.h
index 91e4c509605..f233057fbbe 100644
--- a/gcc/diagnostic.h
+++ b/gcc/diagnostic.h
@@ -158,6 +158,12 @@ struct diagnostic_context
   /* Maximum number of errors to report.  */
   int max_errors;
 
+  /* Original input file.  */
+  const char* main_input_file_path;
+
+  /* True if the input file is treated as preprocessed.  */
+  bool is_preprocessed;
+
   /* This function is called before any message is printed out.  It is
      responsible for preparing message prefix and such.  For example, it
      might say:
diff --git a/gcc/input.c b/gcc/input.c
index 00301ef68dd..6fe0dbfbf34 100644
--- a/gcc/input.c
+++ b/gcc/input.c
@@ -767,6 +767,109 @@ location_get_source_line (const char *file_path, int line)
   return char_span (buffer, len);
 }
 
+/* Return the physical source line that corresponds to SOURCE_NAME/LINE.
+   Read the line from the preprocessed file at FILE_PATH.
+   The line is not nul-terminated.  The returned pointer is only
+   valid until the next call of location_get_source_line.
+   Note that the line can contain several null characters,
+   so the returned value's length has the actual length of the line.
+   If the function fails, a NULL char_span is returned.  */
+
+char_span
+location_get_source_line_preprocessed (const char *source_name,
+				       const char *file_path, int line)
+{
+  char *buffer = NULL;
+  ssize_t len;
+
+  if (line == 0)
+    return char_span (NULL, 0);
+
+  fcache *c = lookup_or_add_file_to_cache_tab (file_path);
+  if (c == NULL)
+    return char_span (NULL, 0);
+
+  // find linemarker closest to actual line number
+  int linemarker_line = 0; // line no of linemarker in pp file
+  int linemarker_loc = 0;  // source line no indicated in linemarker
+  int current_line = 1;
+  size_t source_name_len = strlen (source_name);
+  while (read_line_num (c, current_line, &buffer, &len))
+    {
+      if (len > 2 && buffer[0] == '#' && ISDIGIT (buffer[2]))
+	{
+	  // is a linemarker
+	  bool is_match = false;
+	  for (ssize_t i = 3; i < len && source_name_len < len - i - 1; ++i)
+	    {
+	      if (buffer[i] == '"')
+		{
+		  is_match
+		      = memcmp (buffer + i + 1, source_name, source_name_len)
+			== 0;
+		  break;
+		}
+	    }
+	  bool match_past_line = false;
+	  if (is_match)
+	    {
+	      // we found a linemarker that matches the source file
+	      // e.g.
+	      // # 14 "../../file.h"
+	      // we use the last suitable marker we find
+	      int loc = atoi (buffer + 2);
+	      if (loc <= line && line - loc <= line - linemarker_loc)
+		{
+		  // new best candidate
+		  linemarker_loc = loc;
+		  linemarker_line = current_line;
+		  current_line++;
+		  continue;
+		}
+	      else if (loc > line)
+		{
+		  // don't break directly due to validity check
+		  match_past_line = true;
+		}
+	    }
+	  if (linemarker_line != 0
+	      && linemarker_line + (line - linemarker_loc) + 1 >= current_line)
+	    {
+	      // if we find any other linemarker, we have to check if our
+	      // potential source line is outside the current candidate if
+	      // that's the case, we invalidate the candidate
+	      linemarker_line = 0;
+	      linemarker_loc = 0;
+	    }
+	  if (match_past_line)
+	    break;
+	}
+      current_line++;
+    }
+  if (linemarker_line != 0)
+    {
+      // line of linemarker in the preprocessed file is the base at which we
+      // start counting the line indicated by the linemarker, e.g. the 5 in # 5
+      // "file.cpp", is the original source line number which starts right
+      // below the linemarker So to read line 7 of the original source file
+      // "file.cpp" we treat line 2055 as line 5 of the original line 7 = 2054
+      // + (7 - 5) + 1 = 2057
+      // ...
+      // 2054: # 5 "file.cpp"
+      // 2055: int func() {
+      // 2056:	   int a = 4;
+      // 2057:	   int b = 5;
+      // ...
+      bool read = read_line_num (
+	  c, linemarker_line + (line - linemarker_loc) + 1, &buffer, &len);
+      if (!read)
+	return char_span (NULL, 0);
+
+      return char_span (buffer, len);
+    }
+  return char_span (NULL, 0);
+}
+
 /* Determine if FILE_PATH missing a trailing newline on its final line.
    Only valid to call once all of the file has been loaded, by
    requesting a line number beyond the end of the file.  */
@@ -1951,6 +2054,70 @@ test_reading_source_line ()
   ASSERT_TRUE (source_line.get_buffer () == NULL);
 }
 
+/* Verify reading of preprocessed input files
+   (e.g. for caret-based diagnostics).  */
+
+static void
+test_reading_source_line_preprocessed ()
+{
+  /* Create a tempfile and write some valid preprocessor output.  */
+  temp_source_file tmp (SELFTEST_LOCATION, ".cpp.ii",
+			"# 1 \"test.cpp\"\n"
+			"# 1 \"test.cpp\"\n"
+			"# 1 \"test.h\" 1\n"
+			"void test_func() {\n"
+			"# 35 \"test.h\"\n"
+			"    do_something_else ();\n"
+			"}\n"
+			"# 2 \"test.cpp\" 2\n"
+			"\n"
+			"int main() {\n"
+			"    do_something ();\n"
+			"\n"
+			"    int i = 5;\n"
+			"    unsigned j = 3;\n"
+			"    if (i > j)\n"
+			"    return 0;\n"
+			"\n"
+			"    test_func ();\n"
+			"}");
+
+  /* Read back a specific line from the tempfile.  */
+  char_span source_line = location_get_source_line_preprocessed (
+      "test.cpp", tmp.get_filename (), 4);
+  ASSERT_TRUE (source_line);
+  ASSERT_TRUE (source_line.get_buffer () != NULL);
+  ASSERT_EQ (20, source_line.length ());
+  ASSERT_TRUE (!strncmp ("    do_something ();", source_line.get_buffer (),
+			 source_line.length ()));
+
+  source_line = location_get_source_line_preprocessed (
+      "test.h", tmp.get_filename (), 35);
+  ASSERT_TRUE (source_line);
+  ASSERT_TRUE (source_line.get_buffer () != NULL);
+  ASSERT_EQ (25, source_line.length ());
+  ASSERT_TRUE (!strncmp ("    do_something_else ();",
+			 source_line.get_buffer (), source_line.length ()));
+
+  // file not present in preprocessor output
+  source_line = location_get_source_line_preprocessed ("other.h",
+						       tmp.get_filename (), 4);
+  ASSERT_FALSE (source_line);
+  ASSERT_TRUE (source_line.get_buffer () == NULL);
+
+  // off by one
+  source_line = location_get_source_line_preprocessed (
+      "test.cpp", tmp.get_filename (), 13);
+  ASSERT_FALSE (source_line);
+  ASSERT_TRUE (source_line.get_buffer () == NULL);
+
+  // empty lines omitted by preprocessor
+  source_line = location_get_source_line_preprocessed ("test.h",
+						       tmp.get_filename (), 2);
+  ASSERT_FALSE (source_line);
+  ASSERT_TRUE (source_line.get_buffer () == NULL);
+}
+
 /* Tests of lexing.  */
 
 /* Verify that token TOK from PARSER has cpp_token_as_text
@@ -3629,6 +3796,7 @@ input_c_tests ()
   for_each_line_table_case (test_lexer_char_constants);
 
   test_reading_source_line ();
+  test_reading_source_line_preprocessed ();
 
   test_line_offset_overflow ();
 }
diff --git a/gcc/input.h b/gcc/input.h
index c459bf28553..99ab002f941 100644
--- a/gcc/input.h
+++ b/gcc/input.h
@@ -83,6 +83,9 @@ class char_span
 };
 
 extern char_span location_get_source_line (const char *file_path, int line);
+extern char_span
+location_get_source_line_preprocessed (const char *source_name,
+				       const char *file_path, int line);
 
 extern bool location_missing_trailing_newline (const char *file_path);
 extern expanded_location
diff --git a/gcc/opts-global.c b/gcc/opts-global.c
index b51c2fbc21c..b48fe527370 100644
--- a/gcc/opts-global.c
+++ b/gcc/opts-global.c
@@ -224,6 +224,10 @@ read_cmdline_options (struct gcc_options *opts, struct gcc_options *opts_set,
 	  if (opts->x_main_input_filename == NULL)
 	    {
 	      opts->x_main_input_filename = decoded_options[i].arg;
+	  // reserve original input filename in diagnostic context
+	  // because if a preprocessed file is compiled, this is
+	  // changed to the original source file name later
+	  dc->main_input_file_path = opts->x_main_input_filename;
 	      opts->x_main_input_baselength
 		= base_of_path (opts->x_main_input_filename,
 				&opts->x_main_input_basename);
-- 
2.12.3



More information about the Gcc-patches mailing list