[PATCH] Remove some restrictions from rust-demangle.

Eduard-Mihai Burtescu eddyb@lyken.rs
Wed Sep 25 17:27:00 GMT 2019


The main change here is in the treatment of $...$ escapes.
I've relaxed the treatment of unknown escapes, during
unescaping, to continue processing the input string,
leaving the remainder of current path segment as-is.
Relatedly, rust_is_mangled function doesn't check escapes
at all anymore (as unknown escapes aren't errors now).

E.g. "a$LT$b$X$c$GT$::d$C$e" would now be demangled to
"a<b$X$c$GT$::d,e" ($X$ not being a valid escape),
instead of being treated as an invalid Rust symbol.

This behavior matches the official Rust demangler, the
intention being that this more gracefully handles new
escapes being added to the legacy mangling format.

The other change is allowing the hash at the end of the
Rust symbol to have all 16 of the possible hex nibbles.
Previously the maximum was 15, as a permutation of all
16 hex nibbles was considered highly unlikely, but I
stumbled across this example in my large (1M) data set:
"_ZN4core3ptr18real_drop_in_place17h8abe3105492df6c7E"

Bootstrapped and tested on x86_64-unknown-linux-gnu.

Also, I have no commit access, so I'd be thankful if
someone would commit this for me if/once approved.

2019-09-25  Eduard-Mihai Burtescu  <eddyb@lyken.rs>
libiberty/ChangeLog:
        * rust-demangle.c (looks_like_rust): Remove.
        (rust_is_mangled): Don't check escapes.
        (is_prefixed_hash): Allow 0-9a-f permutations.
        (rust_demangle_sym): Don't bail on unknown escapes.
        * testsuite/rust-demangle-expected: Update 'main::$99$' test.

diff --git a/libiberty/rust-demangle.c b/libiberty/rust-demangle.c
index da591902db1..6b62e6dbd80 100644
--- a/libiberty/rust-demangle.c
+++ b/libiberty/rust-demangle.c
@@ -85,7 +85,6 @@ static const size_t hash_prefix_len = 3;
 static const size_t hash_len = 16;
 
 static int is_prefixed_hash (const char *start);
-static int looks_like_rust (const char *sym, size_t len);
 static int parse_lower_hex_nibble (char nibble);
 static char parse_legacy_escape (const char **in);
 
@@ -105,16 +104,13 @@ static char parse_legacy_escape (const char **in);
       negative (the rare Rust symbol is not demangled) so this sets
       the balance in favor of false negatives.
 
-   3. There must be no characters other than a-zA-Z0-9 and _.:$
-
-   4. There must be no unrecognized $-sign sequences.
-
-   5. There must be no sequence of three or more dots in a row ("...").  */
+   3. There must be no characters other than a-zA-Z0-9 and _.:$  */
 
 int
 rust_is_mangled (const char *sym)
 {
   size_t len, len_without_hash;
+  const char *end;
 
   if (!sym)
     return 0;
@@ -128,12 +124,22 @@ rust_is_mangled (const char *sym)
   if (!is_prefixed_hash (sym + len_without_hash))
     return 0;
 
-  return looks_like_rust (sym, len_without_hash);
+  end = sym + len_without_hash;
+
+  while (sym < end)
+    {
+      if (*sym == '$' || *sym == '.' || *sym == '_' || *sym == ':'
+          || ISALNUM (*sym))
+        sym++;
+      else
+        return 0;
+    }
+
+  return 1;
 }
 
 /* A hash is the prefix "::h" followed by 16 lowercase hex digits. The
-   hex digits must comprise between 5 and 15 (inclusive) distinct
-   digits.  */
+   hex digits must contain at least 5 distinct digits.  */
 
 static int
 is_prefixed_hash (const char *str)
@@ -162,28 +168,7 @@ is_prefixed_hash (const char *str)
     if (seen[i])
       count++;
 
-  return count >= 5 && count <= 15;
-}
-
-static int
-looks_like_rust (const char *str, size_t len)
-{
-  const char *end = str + len;
-
-  while (str < end)
-    {
-      if (*str == '$')
-        {
-          if (!parse_legacy_escape (&str))
-            return 0;
-        }
-      else if (*str == '.' || *str == '_' || *str == ':' || ISALNUM (*str))
-        str++;
-      else
-        return 0;
-    }
-
-  return 1;
+  return count >= 5;
 }
 
 /*
@@ -215,8 +200,9 @@ rust_demangle_sym (char *sym)
           if (unescaped)
             *out++ = unescaped;
           else
-            /* unexpected escape sequence, not looks_like_rust. */
-            goto fail;
+            /* unexpected escape sequence, skip the rest of this segment. */
+            while (in < end && *in != ':')
+              *out++ = *in++;
         }
       else if (*in == '_')
         {
@@ -248,14 +234,14 @@ rust_demangle_sym (char *sym)
       else if (*in == ':' || ISALNUM (*in))
         *out++ = *in++;
       else
-        /* unexpected character in symbol, not looks_like_rust.  */
-        goto fail;
+        {
+          /* unexpected character in symbol, not rust_is_mangled.  */
+          *out++ = '?'; /* This is pretty lame, but it's hard to do better. */
+          *out = '\0';
+          return;
+        }
     }
-  goto done;
 
-fail:
-  *out++ = '?'; /* This is pretty lame, but it's hard to do better. */
-done:
   *out = '\0';
 }
 
diff --git a/libiberty/testsuite/rust-demangle-expected b/libiberty/testsuite/rust-demangle-expected
index c3b03f9f02d..74774794736 100644
--- a/libiberty/testsuite/rust-demangle-expected
+++ b/libiberty/testsuite/rust-demangle-expected
@@ -41,7 +41,7 @@ main::main::he714a2e23ed7db2g
 # $XX$ substitutions should not contain just numbers.
 --format=auto
 _ZN4main4$99$17he714a2e23ed7db23E
-main::$99$::he714a2e23ed7db23
+main::$99$
 # _ at start of path should be removed.
 # ".." translates to "::" "$GT$" to ">" and "$LT$" to "<".
 --format=rust



More information about the Gcc-patches mailing list