[gcc r15-1540] diagnostics: fixes to SARIF output [PR109360]

David Malcolm dmalcolm@gcc.gnu.org
Fri Jun 21 12:52:55 GMT 2024


https://gcc.gnu.org/g:9f4fdc3acebcf6b045edea1361570658da4bc0ab

commit r15-1540-g9f4fdc3acebcf6b045edea1361570658da4bc0ab
Author: David Malcolm <dmalcolm@redhat.com>
Date:   Fri Jun 21 08:46:13 2024 -0400

    diagnostics: fixes to SARIF output [PR109360]
    
    When adding validation of .sarif files against the schema
    (PR testsuite/109360) I discovered various issues where we were
    generating invalid .sarif files.
    
    Specifically, in
      c-c++-common/diagnostic-format-sarif-file-bad-utf8-pr109098-1.c
    the relatedLocations for the "note" diagnostics were missing column
    numbers, leading to validation failure due to non-unique elements,
    such as multiple:
            "message": {"text": "invalid UTF-8 character <bf>"}},
    on line 25 with no column information.
    
    Root cause is that for some diagnostics in libcpp we have a location_t
    representing the line as a whole, setting a column_override on the
    rich_location (since the line hasn't been fully read yet).  We were
    handling this column override for plain text output, but not for .sarif
    output.
    
    Similarly, in diagnostic-format-sarif-file-pr111700.c there is a warning
    emitted on "line 0" of the file, whereas SARIF requires line numbers to
    be positive.
    
    We also use column == 0 internally to mean "the line as a whole",
    whereas SARIF required column numbers to be positive.
    
    This patch fixes these various issues.
    
    gcc/ChangeLog:
            PR testsuite/109360
            * diagnostic-format-sarif.cc
            (sarif_builder::make_location_object): Pass any column override
            from rich_loc to maybe_make_physical_location_object.
            (sarif_builder::maybe_make_physical_location_object): Add
            "column_override" param and pass it to maybe_make_region_object.
            (sarif_builder::maybe_make_region_object): Add "column_override"
            param and use it when the location has 0 for a column.  Don't
            add "startLine", "startColumn", "endLine", or "endColumn" if
            the values aren't positive.
            (sarif_builder::maybe_make_region_object_for_context): Don't
            add "startLine" or "endLine" if the values aren't positive.
    
    libcpp/ChangeLog:
            PR testsuite/109360
            * include/rich-location.h (rich_location::get_column_override):
            New accessor.
    
    Signed-off-by: David Malcolm <dmalcolm@redhat.com>

Diff:
---
 gcc/diagnostic-format-sarif.cc | 75 ++++++++++++++++++++++++++++++------------
 libcpp/include/rich-location.h |  2 ++
 2 files changed, 56 insertions(+), 21 deletions(-)

diff --git a/gcc/diagnostic-format-sarif.cc b/gcc/diagnostic-format-sarif.cc
index 5f438dd38a8..5581aa1579e 100644
--- a/gcc/diagnostic-format-sarif.cc
+++ b/gcc/diagnostic-format-sarif.cc
@@ -243,11 +243,13 @@ private:
   json::array *maybe_make_kinds_array (diagnostic_event::meaning m) const;
   json::object *
   maybe_make_physical_location_object (location_t loc,
-				       enum diagnostic_artifact_role role);
+				       enum diagnostic_artifact_role role,
+				       int column_override);
   json::object *make_artifact_location_object (location_t loc);
   json::object *make_artifact_location_object (const char *filename);
   json::object *make_artifact_location_object_for_pwd () const;
-  json::object *maybe_make_region_object (location_t loc) const;
+  json::object *maybe_make_region_object (location_t loc,
+					  int column_override) const;
   json::object *maybe_make_region_object_for_context (location_t loc) const;
   json::object *make_region_object_for_hint (const fixit_hint &hint) const;
   json::object *make_multiformat_message_string (const char *msg) const;
@@ -924,8 +926,9 @@ sarif_builder::make_location_object (const rich_location &rich_loc,
   location_t loc = rich_loc.get_loc ();
 
   /* "physicalLocation" property (SARIF v2.1.0 section 3.28.3).  */
-  if (json::object *phs_loc_obj = maybe_make_physical_location_object (loc,
-								       role))
+  if (json::object *phs_loc_obj
+	= maybe_make_physical_location_object (loc, role,
+					       rich_loc.get_column_override ()))
     location_obj->set ("physicalLocation", phs_loc_obj);
 
   /* "logicalLocations" property (SARIF v2.1.0 section 3.28.4).  */
@@ -946,7 +949,7 @@ sarif_builder::make_location_object (const diagnostic_event &event,
   /* "physicalLocation" property (SARIF v2.1.0 section 3.28.3).  */
   location_t loc = event.get_location ();
   if (json::object *phs_loc_obj
-	= maybe_make_physical_location_object (loc, role))
+	= maybe_make_physical_location_object (loc, role, 0))
     location_obj->set ("physicalLocation", phs_loc_obj);
 
   /* "logicalLocations" property (SARIF v2.1.0 section 3.28.4).  */
@@ -961,7 +964,10 @@ sarif_builder::make_location_object (const diagnostic_event &event,
   return location_obj;
 }
 
-/* Make a physicalLocation object (SARIF v2.1.0 section 3.29) for LOC,
+/* Make a physicalLocation object (SARIF v2.1.0 section 3.29) for LOC.
+
+   If COLUMN_OVERRIDE is non-zero, then use it as the column number
+   if LOC has no column information.
 
    Ensure that we have an artifact object for the file, adding ROLE to it,
    and flagging that we will attempt to embed the contents of the artifact
@@ -970,7 +976,8 @@ sarif_builder::make_location_object (const diagnostic_event &event,
 json::object *
 sarif_builder::
 maybe_make_physical_location_object (location_t loc,
-				     enum diagnostic_artifact_role role)
+				     enum diagnostic_artifact_role role,
+				     int column_override)
 {
   if (loc <= BUILTINS_LOCATION || LOCATION_FILE (loc) == NULL)
     return NULL;
@@ -983,7 +990,8 @@ maybe_make_physical_location_object (location_t loc,
   get_or_create_artifact (LOCATION_FILE (loc), role, true);
 
   /* "region" property (SARIF v2.1.0 section 3.29.4).  */
-  if (json::object *region_obj = maybe_make_region_object (loc))
+  if (json::object *region_obj = maybe_make_region_object (loc,
+							   column_override))
     phys_loc_obj->set ("region", region_obj);
 
   /* "contextRegion" property (SARIF v2.1.0 section 3.29.5).  */
@@ -1089,10 +1097,14 @@ sarif_builder::get_sarif_column (expanded_location exploc) const
 }
 
 /* Make a region object (SARIF v2.1.0 section 3.30) for LOC,
-   or return NULL.  */
+   or return NULL.
+
+   If COLUMN_OVERRIDE is non-zero, then use it as the column number
+   if LOC has no column information.  */
 
 json::object *
-sarif_builder::maybe_make_region_object (location_t loc) const
+sarif_builder::maybe_make_region_object (location_t loc,
+					 int column_override) const
 {
   location_t caret_loc = get_pure_location (loc);
 
@@ -1114,21 +1126,40 @@ sarif_builder::maybe_make_region_object (location_t loc) const
   json::object *region_obj = new json::object ();
 
   /* "startLine" property (SARIF v2.1.0 section 3.30.5) */
-  region_obj->set_integer ("startLine", exploc_start.line);
+  if (exploc_start.line > 0)
+    region_obj->set_integer ("startLine", exploc_start.line);
 
-  /* "startColumn" property (SARIF v2.1.0 section 3.30.6) */
-  region_obj->set_integer ("startColumn", get_sarif_column (exploc_start));
+  /* "startColumn" property (SARIF v2.1.0 section 3.30.6).
+
+     We use column == 0 to mean the whole line, so omit the column
+     information for this case, unless COLUMN_OVERRIDE is non-zero,
+     (for handling certain awkward lexer diagnostics)  */
+
+  if (exploc_start.column == 0 && column_override)
+    /* Use the provided column number.  */
+    exploc_start.column = column_override;
+
+  if (exploc_start.column > 0)
+    {
+      int start_column = get_sarif_column (exploc_start);
+      region_obj->set_integer ("startColumn", start_column);
+    }
 
   /* "endLine" property (SARIF v2.1.0 section 3.30.7) */
-  if (exploc_finish.line != exploc_start.line)
+  if (exploc_finish.line != exploc_start.line
+      && exploc_finish.line > 0)
     region_obj->set_integer ("endLine", exploc_finish.line);
 
   /* "endColumn" property (SARIF v2.1.0 section 3.30.8).
-     This expresses the column immediately beyond the range.  */
-  {
-    int next_column = sarif_builder::get_sarif_column (exploc_finish) + 1;
-    region_obj->set_integer ("endColumn", next_column);
-  }
+     This expresses the column immediately beyond the range.
+
+     We use column == 0 to mean the whole line, so omit the column
+     information for this case.  */
+  if (exploc_finish.column > 0)
+    {
+      int next_column = get_sarif_column (exploc_finish) + 1;
+      region_obj->set_integer ("endColumn", next_column);
+    }
 
   return region_obj;
 }
@@ -1164,10 +1195,12 @@ sarif_builder::maybe_make_region_object_for_context (location_t loc) const
   json::object *region_obj = new json::object ();
 
   /* "startLine" property (SARIF v2.1.0 section 3.30.5) */
-  region_obj->set_integer ("startLine", exploc_start.line);
+  if (exploc_start.line > 0)
+    region_obj->set_integer ("startLine", exploc_start.line);
 
   /* "endLine" property (SARIF v2.1.0 section 3.30.7) */
-  if (exploc_finish.line != exploc_start.line)
+  if (exploc_finish.line != exploc_start.line
+      && exploc_finish.line > 0)
     region_obj->set_integer ("endLine", exploc_finish.line);
 
   /* "snippet" property (SARIF v2.1.0 section 3.30.13).  */
diff --git a/libcpp/include/rich-location.h b/libcpp/include/rich-location.h
index be424cb4b65..cc4f888f458 100644
--- a/libcpp/include/rich-location.h
+++ b/libcpp/include/rich-location.h
@@ -514,6 +514,8 @@ class rich_location
 
   const line_maps *get_line_table () const { return m_line_table; }
 
+  int get_column_override () const { return m_column_override; }
+
 private:
   bool reject_impossible_fixit (location_t where);
   void stop_supporting_fixits ();


More information about the Gcc-cvs mailing list