This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH 6/7] [D] libiberty: Improve support for demangling D2 templates


In my tests, this gives the demangler near-complete support.  Of a
sample of about 75k symbols pulled from the standard library
unittester, all but 20 were successfully parsed.

---
libiberty/ChangeLog:

2015-05-13 Iain Buclaw  <ibuclaw@gdcproject.org>

    * d-demangle.c (dlang_symbol_kinds): New enum.
    (dlang_parse_symbol): Update signature.  Handle an ambiguity between mangle
    symbol for pascal and template value arguments.  Only check for a type
    if parsing a function, or at the top level.  Return failure if the
    entire symbol was not successfully demangled.
    (dlang_identifier): Update signature.  Handle an ambiguity between two
    adjacent digits in a mangled symbol string.
    (dlang_type): Update call to dlang_parse_symbol.
    (dlang_template_args): Likewise.
    (dlang_parse_template): Likewise.
    (dlang_demangle): Likewise.
    * testsuite/d-demangle-expected: Fix bad tests found, and add problematic
    examples to the unittests.
From 32a49ea35f26964a7b4c8667835cd5c63b9baa43 Mon Sep 17 00:00:00 2001
From: Iain Buclaw <ibuclaw@gdcproject.org>
Date: Mon, 11 May 2015 10:08:31 +0200
Subject: [PATCH 6/7] D demangle: Better support for D template symbols

---
 libiberty/d-demangle.c                  | 191 ++++++++++++++++++++++++--------
 libiberty/testsuite/d-demangle-expected |  44 ++++++--
 2 files changed, 181 insertions(+), 54 deletions(-)

diff --git a/libiberty/d-demangle.c b/libiberty/d-demangle.c
index 0af926c..a7821d8 100644
--- a/libiberty/d-demangle.c
+++ b/libiberty/d-demangle.c
@@ -165,6 +165,21 @@ string_prepend (string *p, const char *s)
     }
 }
 
+/* What kinds of symbol we could be parsing.  */
+enum dlang_symbol_kinds
+{
+  /* Top-level symbol, needs it's type checked.  */
+  dlang_top_level,
+  /* Function symbol, needs it's type checked.   */
+  dlang_function,
+  /* Strongly typed name, such as for classes, structs and enums.  */
+  dlang_type_name,
+  /* Template identifier.  */
+  dlang_template_ident,
+  /* Template symbol parameter.  */
+  dlang_template_param
+};
+
 /* Prototypes for forward referenced functions */
 static const char *dlang_function_args (string *, const char *);
 
@@ -172,7 +187,8 @@ static const char *dlang_type (string *, const char *);
 
 static const char *dlang_value (string *, const char *, const char *, char);
 
-static const char *dlang_parse_symbol (string *, const char *);
+static const char *dlang_parse_symbol (string *, const char *,
+				       enum dlang_symbol_kinds);
 
 static const char *dlang_parse_tuple (string *, const char *);
 
@@ -527,7 +543,7 @@ dlang_type (string *decl, const char *mangled)
     case 'E': /* enum T */
     case 'T': /* typedef T */
       mangled++;
-      return dlang_parse_symbol (decl, mangled);
+      return dlang_parse_symbol (decl, mangled, dlang_type_name);
     case 'D': /* delegate T */
     {
       string mods;
@@ -662,114 +678,168 @@ dlang_type (string *decl, const char *mangled)
 /* Extract the identifier from MANGLED and append it to DECL.
    Return the remaining string on success or NULL on failure.  */
 static const char *
-dlang_identifier (string *decl, const char *mangled)
+dlang_identifier (string *decl, const char *mangled,
+		  enum dlang_symbol_kinds kind)
 {
+  char *endptr;
+  long len;
+
   if (mangled == NULL || *mangled == '\0')
     return NULL;
 
-  if (ISDIGIT (*mangled))
+  len = strtol (mangled, &endptr, 10);
+
+  if (endptr == NULL || len <= 0)
+    return NULL;
+
+  /* In template parameter symbols, the first character of the mangled
+     name can be a digit.  This causes ambiguity issues because the
+     digits of the two numbers are adjacent.  */
+  if (kind == dlang_template_param)
     {
-      char *endptr;
-      long i = strtol (mangled, &endptr, 10);
+      const char *pstart = mangled;
+      char *pend = endptr;
+      long psize = len;
+      int saved = string_length (decl);
+
+      /* First handle any overflow.  */
+      while (strlen (endptr) < (size_t) len)
+	{
+	  len /= 10;
+	  endptr--;
+	}
 
-      if (endptr == NULL || i <= 0 || strlen (endptr) < (size_t) i)
+      /* Work backwards until a match is found.  */
+      for (; pend >= pstart; pend--)
+	{
+	  mangled = pend;
+
+	  /* Reached the beginning of the pointer to the name length,
+	     try parsing the entire symbol.  */
+	  if (psize == 0)
+	    {
+	      psize = len;
+	      pend = endptr;
+	    }
+
+	  /* Check whether template parameter is a function with a valid
+	     return type or an untyped identifier.  */
+	  if (ISDIGIT (*mangled))
+	    mangled = dlang_parse_symbol (decl, mangled, dlang_template_ident);
+	  else if (strncmp (mangled, "_D", 2) == 0)
+	    {
+	      mangled += 2;
+	      mangled = dlang_parse_symbol (decl, mangled, dlang_function);
+	    }
+
+	  /* Check for name length mismatch.  */
+	  if (mangled && (mangled - pend) == psize)
+	    return mangled;
+
+	  psize /= 10;
+	  string_setlength (decl, saved);
+	}
+
+      return NULL;
+    }
+  else
+    {
+      if (strlen (endptr) < (size_t) len)
 	return NULL;
 
       mangled = endptr;
 
       /* May be a template instance.  */
-      if (i >= 5 && strncmp (mangled, "__T", 3) == 0)
+      if (len >= 5 && strncmp (mangled, "__T", 3) == 0)
 	{
 	  /* Template symbol.  */
 	  if (ISDIGIT (mangled[3]) && mangled[3] != '0')
-	    return dlang_parse_template (decl, mangled, i);
+	    return dlang_parse_template (decl, mangled, len);
 
 	  return NULL;
 	}
 
-      switch (i)
+      switch (len)
 	{
 	case 6:
-	  if (strncmp (mangled, "__ctor", i) == 0)
+	  if (strncmp (mangled, "__ctor", len) == 0)
 	    {
 	      /* Constructor symbol for a class/struct.  */
 	      string_append (decl, "this");
-	      mangled += i;
+	      mangled += len;
 	      return mangled;
 	    }
-	  else if (strncmp (mangled, "__dtor", i) == 0)
+	  else if (strncmp (mangled, "__dtor", len) == 0)
 	    {
 	      /* Destructor symbol for a class/struct.  */
 	      string_append (decl, "~this");
-	      mangled += i;
+	      mangled += len;
 	      return mangled;
 	    }
-	  else if (strncmp (mangled, "__initZ", i+1) == 0)
+	  else if (strncmp (mangled, "__initZ", len+1) == 0)
 	    {
 	      /* The static initialiser for a given symbol.  */
 	      string_append (decl, "init$");
-	      mangled += i;
+	      mangled += len;
 	      return mangled;
 	    }
-	  else if (strncmp (mangled, "__vtblZ", i+1) == 0)
+	  else if (strncmp (mangled, "__vtblZ", len+1) == 0)
 	    {
 	      /* The vtable symbol for a given class.  */
 	      string_prepend (decl, "vtable for ");
 	      string_setlength (decl, string_length (decl) - 1);
-	      mangled += i;
+	      mangled += len;
 	      return mangled;
 	    }
 	  break;
 
 	case 7:
-	  if (strncmp (mangled, "__ClassZ", i+1) == 0)
+	  if (strncmp (mangled, "__ClassZ", len+1) == 0)
 	    {
 	      /* The classinfo symbol for a given class.  */
 	      string_prepend (decl, "ClassInfo for ");
 	      string_setlength (decl, string_length (decl) - 1);
-	      mangled += i;
+	      mangled += len;
 	      return mangled;
 	    }
 	  break;
 
 	case 10:
-	  if (strncmp (mangled, "__postblitMFZ", i+3) == 0)
+	  if (strncmp (mangled, "__postblitMFZ", len+3) == 0)
 	    {
 	      /* Postblit symbol for a struct.  */
 	      string_append (decl, "this(this)");
-	      mangled += i + 3;
+	      mangled += len + 3;
 	      return mangled;
 	    }
 	  break;
 
 	case 11:
-	  if (strncmp (mangled, "__InterfaceZ", i+1) == 0)
+	  if (strncmp (mangled, "__InterfaceZ", len+1) == 0)
 	    {
 	      /* The interface symbol for a given class.  */
 	      string_prepend (decl, "Interface for ");
 	      string_setlength (decl, string_length (decl) - 1);
-	      mangled += i;
+	      mangled += len;
 	      return mangled;
 	    }
 	  break;
 
 	case 12:
-	  if (strncmp (mangled, "__ModuleInfoZ", i+1) == 0)
+	  if (strncmp (mangled, "__ModuleInfoZ", len+1) == 0)
 	    {
 	      /* The ModuleInfo symbol for a given module.  */
 	      string_prepend (decl, "ModuleInfo for ");
 	      string_setlength (decl, string_length (decl) - 1);
-	      mangled += i;
+	      mangled += len;
 	      return mangled;
 	    }
 	  break;
 	}
 
-      string_appendn (decl, mangled, i);
-      mangled += i;
+      string_appendn (decl, mangled, len);
+      mangled += len;
     }
-  else
-    return NULL;
 
   return mangled;
 }
@@ -1274,25 +1344,38 @@ dlang_call_convention_p (const char *mangled)
 /* Extract and demangle the symbol in MANGLED and append it to DECL.
    Returns the remaining signature on success or NULL on failure.  */
 static const char *
-dlang_parse_symbol (string *decl, const char *mangled)
+dlang_parse_symbol (string *decl, const char *mangled,
+		    enum dlang_symbol_kinds kind)
 {
+  int saved;
   size_t n = 0;
   do
     {
       if (n++)
 	string_append (decl, ".");
 
-      mangled = dlang_identifier (decl, mangled);
+      mangled = dlang_identifier (decl, mangled, kind);
 
       if (mangled && dlang_call_convention_p (mangled))
 	{
 	  string mods;
-	  int saved;
+	  const char *start = NULL;
+	  int checkpoint = 0;
 
 	  /* Skip over 'this' parameter.  */
 	  if (*mangled == 'M')
 	    mangled++;
 
+	  /* We have reached here because we expect an extern(Pascal) function.
+	     However this is so rare, that it is more likely a template value
+	     parameter.  Since this can't be assumed, first attempt parsing
+	     the symbol as a function, and then back out on failure.  */
+	  if (*mangled == 'V')
+	    {
+	      start = mangled;
+	      checkpoint = string_length (decl);
+	    }
+
 	  /* Save the type modifiers for appending at the end.  */
 	  string_init (&mods);
 	  mangled = dlang_type_modifiers (&mods, mangled);
@@ -1307,21 +1390,41 @@ dlang_parse_symbol (string *decl, const char *mangled)
 	  mangled = dlang_function_args (decl, mangled);
 	  string_append (decl, ")");
 
-	  /* Demangle the function return type as a kind of sanity test.  */
-	  if (mangled && !ISDIGIT (*mangled))
-	    {
-	      saved = string_length (decl);
-	      mangled = dlang_type (decl, mangled);
-	      string_setlength (decl, saved);
-	    }
-
 	  /* Add any const/immutable/shared modifier. */
 	  string_appendn (decl, mods.b, string_length (&mods));
 	  string_delete (&mods);
+
+	  if (mangled == NULL && checkpoint != 0)
+	    {
+	      mangled = start;
+	      string_setlength (decl, checkpoint);
+	    }
 	}
     }
   while (mangled && ISDIGIT (*mangled));
 
+  /* Only top-level symbols or function template parameters have
+     a type that needs checking.  */
+  if (kind == dlang_top_level || kind == dlang_function)
+    {
+      /* Artificial symbols end with 'Z' and have no type.  */
+      if (mangled && *mangled == 'Z')
+	mangled++;
+      else
+	{
+	  saved = string_length (decl);
+	  mangled = dlang_type (decl, mangled);
+	  string_setlength (decl, saved);
+	}
+
+      /* Check that the entire symbol was successfully demangled.  */
+      if (kind == dlang_top_level)
+	{
+	  if (mangled == NULL || *mangled != '\0')
+	    return NULL;
+	}
+    }
+
   return mangled;
 }
 
@@ -1373,7 +1476,7 @@ dlang_template_args (string *decl, const char *mangled)
 	{
 	case 'S': /* Symbol parameter.  */
 	  mangled++;
-	  mangled = dlang_parse_symbol (decl, mangled);
+	  mangled = dlang_parse_symbol (decl, mangled, dlang_template_param);
 	  break;
 	case 'T': /* Type parameter.  */
 	  mangled++;
@@ -1431,7 +1534,7 @@ dlang_parse_template (string *decl, const char *mangled, long len)
   mangled += 3;
 
   /* Template identifier.  */
-  mangled = dlang_identifier (decl, mangled);
+  mangled = dlang_identifier (decl, mangled, dlang_template_ident);
 
   /* Template arguments.  */
   string_append (decl, "!(");
@@ -1470,7 +1573,7 @@ dlang_demangle (const char *mangled, int option ATTRIBUTE_UNUSED)
     {
       mangled += 2;
 
-      if (dlang_parse_symbol (&decl, mangled) == NULL)
+      if (dlang_parse_symbol (&decl, mangled, dlang_top_level) == NULL)
 	string_delete (&decl);
     }
 
diff --git a/libiberty/testsuite/d-demangle-expected b/libiberty/testsuite/d-demangle-expected
index ae0e8d3..d88fb26 100644
--- a/libiberty/testsuite/d-demangle-expected
+++ b/libiberty/testsuite/d-demangle-expected
@@ -606,12 +606,12 @@ _D8demangle17__T4testS6symbolZv
 demangle.test!(symbol)
 #
 --format=dlang
-_D8demangle21__T4testS6symbol3fooZv
+_D8demangle23__T4testS116symbol3fooZv
 demangle.test!(symbol.foo)
 #
 --format=dlang
-_D8demangle25__T4testS6symbol3foo3barZv
-demangle.test!(symbol.foo.bar)
+_D8demangle32__T4testS20_D6symbol3foo3barFZvZv
+demangle.test!(symbol.foo.bar())
 #
 --format=dlang
 _D8demangle19__T4testTaS6symbolZv
@@ -888,19 +888,19 @@ _D6plugin8generateFiiZAOa
 plugin.generate(int, int)
 #
 --format=dlang
-_D8demangle3fnAFZv3fnBMFZv
+_D8demangle3fnAFZ3fnBMFZv
 demangle.fnA().fnB()
 #
 --format=dlang
-_D8demangle4mainFZv1S3fnCFZv
+_D8demangle4mainFZ1S3fnCMFZv
 demangle.main().S.fnC()
 #
 --format=dlang
-_D8demangle4mainFZv1S3fnDMFZv
+_D8demangle4mainFZ1S3fnDMFZv
 demangle.main().S.fnD()
 #
 --format=dlang
-_D8demangle4mainFZv5localMFZi
+_D8demangle4mainFZ5localMFZi
 demangle.main().local()
 #
 --format=dlang
@@ -944,7 +944,7 @@ _D6object14TypeInfo_Array8argTypesMFNbNfJC8TypeInfoJC8TypeInfoZi
 object.TypeInfo_Array.argTypes(out TypeInfo, out TypeInfo)
 #
 --format=dlang
-_D2rt6dmain211_d_run_mainUiPPaPUAAaZiZi7tryExecMFMDFZvZv
+_D2rt6dmain211_d_run_mainUiPPaPUAAaZiZ7tryExecMFMDFZvZv
 rt.dmain2._d_run_main(int, char**, extern(C) int(char[][]) function*).tryExec(scope void() delegate)
 #
 --format=dlang
@@ -1000,13 +1000,37 @@ _D2gc11gctemplates56__T8mkBitmapTS3std5range13__T4iotaTiTiZ4iotaFiiZ6ResultZ8mkB
 gc.gctemplates.mkBitmap!(std.range.iota!(int, int).iota(int, int).Result).mkBitmap(ulong*, ulong)
 #
 --format=dlang
-_D8serenity9persister6Sqlite70__T15SqlitePersisterTS8serenity9persister6Sqlite11__unittest6FZv4TestZ15SqlitePersister12__T7opIndexZ7opIndexMFmZS8serenity9persister6Sqlite11__unittest6FZv4Test
+_D8serenity9persister6Sqlite69__T15SqlitePersisterTS8serenity9persister6Sqlite11__unittest6FZ4TestZ15SqlitePersister12__T7opIndexZ7opIndexMFmZS8serenity9persister6Sqlite11__unittest6FZ4Test
 serenity.persister.Sqlite.SqlitePersister!(serenity.persister.Sqlite.__unittest6().Test).SqlitePersister.opIndex!().opIndex(ulong)
 #
 --format=dlang
-_D4test4mainFZv5localMFZi
+_D3std11parallelism273__T4TaskS213std11parallelism3runTDFS3std9algorithm87__T9MapResultS27_D4test4mainFZ7getTermMFiZeTS3std5range13__T4iotaTiTiZ4iotaFiiZ6ResultZ9MapResultmmZeTS3std9algorithm87__T9MapResultS27_D4test4mainFZ7getTermMFiZeTS3std5range13__T4iotaTiTiZ4iotaFiiZ6ResultZ9MapResultTmTmZ4Task4implFPvZv
+std.parallelism.Task!(std.parallelism.run, real(std.algorithm.MapResult!(test.main().getTerm(int), std.range.iota!(int, int).iota(int, int).Result).MapResult, ulong, ulong) delegate, std.algorithm.MapResult!(test.main().getTerm(int), std.range.iota!(int, int).iota(int, int).Result).MapResult, ulong, ulong).Task.impl(void*)
+#
+--format=dlang
+_D4test4mainFZ5localMFZi
 test.main().local()
 #
 --format=dlang
 _D3std6socket12InternetHost221__T13getHostNoSyncVAyaa96_0a09202020206175746f2078203d2068746f6e6c28706172616d293b0a09202020206175746f206865203d20676574686f73746279616464722826782c20342c206361737428696e74294164647265737346616d696c792e494e4554293b0a09TkZ13getHostNoSyncMFkZb
 std.socket.InternetHost.getHostNoSync!("\n\t    auto x = htonl(param);\n\t    auto he = gethostbyaddr(&x, 4, cast(int)AddressFamily.INET);\n\t", uint).getHostNoSync(uint)
+#
+--format=dlang
+_D2rt5minfo16__unittestL518_6FZ12UTModuleInfo6__ctorMFNckZS2rt5minfo16__unittestL518_6FZ12UTModuleInfo
+rt.minfo.__unittestL518_6().UTModuleInfo.this(uint)
+#
+--format=dlang
+_D3std6traits37__T7fqnTypeTC6ObjectVbi0Vbi0Vbi0Vbi0Z13addQualifiersFAyabbbbZAya
+std.traits.fqnType!(Object, false, false, false, false).addQualifiers(immutable(char)[], bool, bool, bool, bool)
+#
+--format=dlang
+_D3std9algorithm117__T9MapResultS153std5range4onlyTS3std9algorithm53__T12FilterResultS28_D3std3uni7isUpperFNaNbNfwZbTAyaZ12FilterResultZ9MapResult5frontMFNaNdNfZS3std5range22__T10OnlyResultTwVmi1Z10OnlyResult
+std.algorithm.MapResult!(std.range.only, std.algorithm.FilterResult!(std.uni.isUpper(dchar), immutable(char)[]).FilterResult).MapResult.front()
+#
+--format=dlang
+_D3std6traits17__T6fqnSymS43stdZ11adjustIdentFAyaZAya
+std.traits.fqnSym!(std).adjustIdent(immutable(char)[])
+#
+--format=dlang
+_D2rt8lifetime36__T14_d_newarrayOpTS13_d_newarrayiTZ14_d_newarrayOpTFNaNbxC8TypeInfomPmZAv
+rt.lifetime._d_newarrayOpT!(_d_newarrayiT)._d_newarrayOpT(const(TypeInfo), ulong, ulong*)
-- 
2.1.0


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]