This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]
[PATCH 02/10] Add JSON implementation

From: David Malcolm <dmalcolm at redhat dot com>
To: gcc-patches at gcc dot gnu dot org
Cc: David Malcolm <dmalcolm at redhat dot com>
Date: Tue, 29 May 2018 16:41:15 -0400
Subject: [PATCH 02/10] Add JSON implementation
References: <1527626483-4723-1-git-send-email-dmalcolm@redhat.com>
This patch is the JSON patch I posted last year;
it adds support to gcc for reading and writing JSON,
based on DOM-like trees of json::value instances.

This is overkill for what's needed by the rest of the
patch kit (which just needs to be able to write JSON),
but this code already existed, so I'm using it for now.

gcc/ChangeLog:
	* Makefile.in (OBJS): Add json.o.
	* json.cc: New file.
	* json.h: New file.
	* selftest-run-tests.c (selftest::run_tests): Call json_cc_tests.
	* selftest.h (selftest::json_cc_tests): New decl.
---
 gcc/Makefile.in          |    1 +
 gcc/json.cc              | 1914 ++++++++++++++++++++++++++++++++++++++++++++++
 gcc/json.h               |  214 ++++++
 gcc/selftest-run-tests.c |    1 +
 gcc/selftest.h           |    1 +
 5 files changed, 2131 insertions(+)
 create mode 100644 gcc/json.cc
 create mode 100644 gcc/json.h

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 20bee04..b3c7d5d 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1385,6 +1385,7 @@ OBJS = \
 	ira-color.o \
 	ira-emit.o \
 	ira-lives.o \
+	json.o \
 	jump.o \
 	langhooks.o \
 	lcm.o \
diff --git a/gcc/json.cc b/gcc/json.cc
new file mode 100644
index 0000000..e0d5a76
--- /dev/null
+++ b/gcc/json.cc
@@ -0,0 +1,1914 @@
+/* JSON parsing
+   Copyright (C) 2017 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "json.h"
+#include "pretty-print.h"
+#include "math.h"
+#include "selftest.h"
+
+using namespace json;
+
+/* class json::value.  */
+
+/* Generate a char * for this json::value tree.
+   The returned value must be freed by the caller.  */
+
+char *
+value::to_str () const
+{
+  pretty_printer pp;
+  print (&pp);
+  return xstrdup (pp_formatted_text (&pp));
+}
+
+/* Dump this json::value tree to OUTF.
+   No formatting is done.  There are no guarantees about the order
+   in which the key/value pairs of json::objects are printed.  */
+
+void
+value::dump (FILE *outf) const
+{
+  pretty_printer pp;
+  pp_buffer (&pp)->stream = outf;
+  print (&pp);
+  pp_flush (&pp);
+}
+
+/* If this json::value is a json::object, return it,
+   otherwise return NULL.  */
+
+const object *
+value::as_object () const
+{
+  if (get_kind () != JSON_OBJECT)
+    return NULL;
+  return static_cast <const object *> (this);
+}
+
+/* If this json::value is a json::array, return it,
+   otherwise return NULL.  */
+
+const array *
+value::as_array () const
+{
+  if (get_kind () != JSON_ARRAY)
+    return NULL;
+  return static_cast <const array *> (this);
+}
+
+/* If this json::value is a json::number, return it,
+   otherwise return NULL.  */
+
+const number *
+value::as_number () const
+{
+  if (get_kind () != JSON_NUMBER)
+    return NULL;
+  return static_cast <const number *> (this);
+}
+
+/* If this json::value is a json::string, return it,
+   otherwise return NULL.  */
+
+const string *
+value::as_string () const
+{
+  if (get_kind () != JSON_STRING)
+    return NULL;
+  return static_cast <const string *> (this);
+}
+
+/* Attempt to get the value of a key/value pair from this value
+   as if THIS value were an object.
+
+   If THIS is not a json::object, return write an error message to OUT_ERR
+   (which must be freed by the caller) and return false.
+
+   Otherwise write the value ptr (possibly NULL) to OUT_VALUE and
+   return true.  */
+
+bool
+value::get_optional_value_by_key (const char *name, const value *&out_value,
+				  char *&out_err) const
+{
+  const json::object *obj = as_object ();
+  if (!obj)
+    {
+      out_err = xstrdup ("not an object");
+      return false;
+    }
+  out_value = obj->get (name);
+  return true;
+}
+
+/* Attempt to get a string value of a key/value pair from this value
+   as if THIS value were an object.
+
+   If THIS is a json::object, and KEY is either not present, is a string,
+   or is the "null" JSON literal, then return true, and write to OUT_VALUE.
+   If a string, then the ptr is written to OUT_VALUE, otherwise NULL
+   is written to OUT_VALUE.
+
+   If THIS is not a json::object, or KEY is not a string/"null",
+   return false and write an error message to OUT_ERR
+   (which must be freed by the caller).  */
+
+bool
+value::get_optional_string_by_key (const char *name, const char *&out_value,
+				   char *&out_err) const
+{
+  const json::value *v;
+  if (!get_optional_value_by_key (name, v, out_err))
+    return false;
+  if (v && v->get_kind () != JSON_NULL)
+    {
+      const json::string *s = v->as_string ();
+      if (!s)
+	{
+	  out_err = xasprintf ("not a string: \"%s\"", name);
+	  return false;
+	}
+      out_value = s->get_string ();
+      return true;
+    }
+  else
+    {
+      out_value = NULL;
+      return true;
+    }
+}
+
+/* Attempt to get lookup the value of a key/value pair from this value
+   as if this value were an object.
+
+   To succeed, THIS must be a json::object, and it must have a key named
+   NAME.
+
+   On success, return true and write the value to OUT_VALUE.
+   On failure, return false and write an error message to OUT_ERR
+   (which must be freed by the caller).  */
+
+bool
+value::get_value_by_key (const char *name, const value *&out_value,
+			 char *&out_err) const
+{
+  const json::object *obj = as_object ();
+  if (!obj)
+    {
+      out_err = xstrdup ("not an object");
+      return false;
+    }
+  const json::value *v = obj->get (name);
+  if (!v)
+    {
+      out_err = xasprintf ("missing attribute: \"%s\"", name);
+      return false;
+    }
+  out_value = v;
+  return true;
+}
+
+/* As value::get_value_by_key, but the value must be a number;
+   if successful, write it as an int to OUT_VALUE.  */
+
+bool
+value::get_int_by_key (const char *name, int &out_value, char *&out_err) const
+{
+  const json::value *v;
+  if (!get_value_by_key (name, v, out_err))
+    return false;
+  const json::number *n = v->as_number ();
+  if (!n)
+    {
+      out_err = xasprintf ("not a number: \"%s\"", name);
+      return false;
+    }
+  out_value = n->get ();
+  return true;
+}
+
+/* As value::get_value_by_key, but the value must be a string;
+   if successful, write it as const char * to OUT_VALUE.  */
+
+bool
+value::get_string_by_key (const char *name, const char *&out_value,
+			  char *&out_err) const
+{
+  const json::value *v;
+  if (!get_value_by_key (name, v, out_err))
+    return false;
+  const json::string *s = v->as_string ();
+  if (!s)
+    {
+      out_err = xasprintf ("not a string: \"%s\"", name);
+      return false;
+    }
+  out_value = s->get_string ();
+  return true;
+}
+
+/* As value::get_value_by_key, but the value must be an array;
+   if successful, write it as a json::array * to OUT_VALUE.  */
+
+bool
+value::get_array_by_key (const char *name, const array *&out_value,
+			 char *&out_err) const
+{
+  const json::value *v;
+  if (!get_value_by_key (name, v, out_err))
+    return false;
+  const json::array *arr = v->as_array ();
+  if (!arr)
+    {
+      out_err = xasprintf ("not an array: \"%s\"", name);
+      return false;
+    }
+  out_value = arr;
+  return true;
+}
+
+/* class json::object, a subclass of json::value, representing
+   an unordered collection of key/value pairs.  */
+
+/* json:object's dtor.  */
+
+object::~object ()
+{
+  for (map_t::iterator it = m_map.begin (); it != m_map.end (); ++it)
+    {
+      free (const_cast <char *>((*it).first));
+      delete ((*it).second);
+    }
+}
+
+/* Implementation of json::value::print for json::object.  */
+
+void
+object::print (pretty_printer *pp) const
+{
+  /* Note that the order is not guaranteed.  */
+  pp_character (pp, '{');
+  for (map_t::iterator it = m_map.begin (); it != m_map.end (); ++it)
+    {
+      if (it != m_map.begin ())
+	pp_string (pp, ", ");
+      const char *key = const_cast <char *>((*it).first);
+      value *value = (*it).second;
+      pp_printf (pp, "\"%s\": ", key); // FIXME: escaping?
+      value->print (pp);
+    }
+  pp_character (pp, '}');
+}
+
+/* Implementation of json::value::clone for json::object.  */
+
+value *
+object::clone () const
+{
+  object *other = new object ();
+  for (map_t::iterator it = m_map.begin (); it != m_map.end (); ++it)
+    {
+      const char *key = const_cast <char *>((*it).first);
+      value *value = (*it).second;
+      other->set (key, value->clone ());
+    }
+  return other;
+}
+
+/* Get the json::value * for KEY, or NULL if the key is not present.  */
+
+value *
+object::get (const char *key) const
+{
+  value **slot = const_cast <object*> (this)->m_map.get (key);
+  if (slot)
+    return *slot;
+  return NULL;
+}
+
+/* As object::get (KEY), but return NULL if the value of the key
+   is the "null" JSON literal.  */
+
+value *
+object::get_if_nonnull (const char *key) const
+{
+  value *result = get (key);
+  if (!result)
+    return NULL;
+  if (result->get_kind () == JSON_NULL)
+    return NULL;
+  return result;
+}
+
+/* Set the json::value * for KEY, taking ownership of VALUE
+   (and taking a copy of KEY if necessary).  */
+
+void
+object::set (const char *key, value *v)
+{
+  value **ptr = m_map.get (key);
+  if (ptr)
+    {
+      /* If the key is already present, delete the existing value
+	 and overwrite it.  */
+      delete *ptr;
+      *ptr = v;
+    }
+  else
+    /* If the key wasn't already present, take a copy of the key,
+       and store the value.  */
+    m_map.put (xstrdup (key), v);
+}
+
+/* class json::array, a subclass of json::value, representing
+   an ordered collection of values.  */
+
+/* json::array's dtor.  */
+
+array::~array ()
+{
+  unsigned i;
+  value *v;
+  FOR_EACH_VEC_ELT (m_elements, i, v)
+    delete v;
+}
+
+/* Implementation of json::value::print for json::array.  */
+
+void
+array::print (pretty_printer *pp) const
+{
+  pp_character (pp, '[');
+  unsigned i;
+  value *v;
+  FOR_EACH_VEC_ELT (m_elements, i, v)
+    {
+      if (i)
+	pp_string (pp, ", ");
+      v->print (pp);
+    }
+  pp_character (pp, ']');
+}
+
+/* Implementation of json::value::clone for json::array.  */
+
+value *
+array::clone () const
+{
+  array *other = new array ();
+  unsigned i;
+  value *v;
+  FOR_EACH_VEC_ELT (m_elements, i, v)
+    other->append (v->clone ());
+  return other;
+}
+
+/* class json::number, a subclass of json::value, wrapping a double.  */
+
+/* Implementation of json::value::print for json::number.  */
+
+void
+number::print (pretty_printer *pp) const
+{
+  char tmp[1024];
+  snprintf (tmp, sizeof (tmp), "%g", m_value);
+  pp_string (pp, tmp);
+}
+
+/* Implementation of json::value::clone for json::number.  */
+
+value *
+number::clone () const
+{
+  return new number (m_value);
+}
+
+/* class json::string, a subclass of json::value.  */
+
+void
+string::print (pretty_printer *pp) const
+{
+  pp_character (pp, '"');
+  for (const char *ptr = m_utf8; *ptr; ptr++)
+    {
+      char ch = *ptr;
+      switch (ch)
+	{
+	case '"':
+	  pp_string (pp, "\\\"");
+	  break;
+	case '\\':
+	  pp_string (pp, "\\n");
+	  break;
+	case '\b':
+	  pp_string (pp, "\\b");
+	  break;
+	case '\f':
+	  pp_string (pp, "\\f");
+	  break;
+	case '\n':
+	  pp_string (pp, "\\n");
+	  break;
+	case '\r':
+	  pp_string (pp, "\\r");
+	  break;
+	case '\t':
+	  pp_string (pp, "\\t");
+	  break;
+
+	default:
+	  pp_character (pp, ch);
+	}
+    }
+  pp_character (pp, '"');
+}
+
+/* Implementation of json::value::clone for json::string.  */
+
+value *
+string::clone () const
+{
+  return new string (m_utf8);
+}
+
+/* class json::literal, a subclass of json::value.  */
+
+/* Implementation of json::value::print for json::literal.  */
+
+void
+literal::print (pretty_printer *pp) const
+{
+  switch (m_kind)
+    {
+    case JSON_TRUE:
+      pp_string (pp, "true");
+      break;
+    case JSON_FALSE:
+      pp_string (pp, "false");
+      break;
+    case JSON_NULL:
+      pp_string (pp, "null");
+      break;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Implementation of json::value::clone for json::literal.  */
+
+value *
+literal::clone () const
+{
+  return new literal (m_kind);
+}
+
+
+/* Declarations relating to parsing JSON, all within an
+   anonymous namespace.  */
+
+namespace {
+
+/* A typedef representing a single unicode character.  */
+
+typedef unsigned unichar;
+
+/* An enum for discriminating different kinds of JSON token.  */
+
+enum token_id
+{
+  TOK_ERROR,
+
+  TOK_EOF,
+
+  /* Punctuation.  */
+  TOK_OPEN_SQUARE,
+  TOK_OPEN_CURLY,
+  TOK_CLOSE_SQUARE,
+  TOK_CLOSE_CURLY,
+  TOK_COLON,
+  TOK_COMMA,
+
+  /* Literal names.  */
+  TOK_TRUE,
+  TOK_FALSE,
+  TOK_NULL,
+
+  TOK_STRING,
+  TOK_NUMBER
+};
+
+/* Human-readable descriptions of enum token_id.  */
+
+static const char *token_id_name[] = {
+  "error",
+  "EOF",
+  "'['",
+  "'{'",
+  "']'",
+  "'}'",
+  "':'",
+  "','",
+  "'true'",
+  "'false'",
+  "'null'",
+  "string",
+  "number"
+};
+
+/* Tokens within the JSON lexer.  */
+
+struct token
+{
+  /* The kind of token.  */
+  enum token_id id;
+
+  /* The location of this token within the unicode
+     character stream.  */
+  int index;
+
+  union
+  {
+    /* Value for TOK_ERROR and TOK_STRING.  */
+    char *string;
+
+    /* Value for TOK_NUMBER.  */
+    double number;
+  } u;
+};
+
+/* A class for lexing JSON.  */
+
+class lexer
+{
+ public:
+  lexer ();
+  ~lexer ();
+  bool add_utf8 (size_t length, const char *utf8_buf, char **err_out);
+
+  const token *peek ();
+  void consume ();
+
+ private:
+  bool get_char (unichar &out);
+  void unget_char ();
+  static void dump_token (FILE *outf, const token *tok);
+  void lex_token (token *out);
+  void lex_string (token *out);
+  void lex_number (token *out, unichar first_char);
+  bool rest_of_literal (const char *suffix);
+
+ private:
+  auto_vec<unichar> m_buffer;
+  int m_next_char_idx;
+
+  static const int MAX_TOKENS = 1;
+  token m_next_tokens[MAX_TOKENS];
+  int m_num_next_tokens;
+};
+
+/* A class for parsing JSON.  */
+
+class parser
+{
+ public:
+  parser (char **err_out);
+  bool add_utf8 (size_t length, const char *utf8_buf, char **err_out);
+  value *parse_value (int depth);
+  object *parse_object (int depth);
+  array *parse_array (int depth);
+
+  bool seen_error_p () const { return *m_err_out; }
+  void require_eof ();
+
+ private:
+  void require (enum token_id tok_id);
+  void error_at (int, const char *, ...) ATTRIBUTE_PRINTF_3;
+
+ private:
+  lexer m_lexer;
+  char **m_err_out;
+};
+
+} // anonymous namespace for parsing implementation
+
+/* Parser implementation.  */
+
+/* lexer's ctor.  */
+
+lexer::lexer ()
+: m_buffer (), m_next_char_idx (0), m_num_next_tokens (0)
+{
+}
+
+/* lexer's dtor.  */
+
+lexer::~lexer ()
+{
+  while (m_num_next_tokens > 0)
+    consume ();
+}
+
+/* Peek the next token.  */
+
+const token *
+lexer::peek ()
+{
+  if (m_num_next_tokens == 0)
+    {
+      lex_token (&m_next_tokens[0]);
+      m_num_next_tokens++;
+    }
+  return &m_next_tokens[0];
+}
+
+/* Consume the next token.  */
+
+void
+lexer::consume ()
+{
+  if (m_num_next_tokens == 0)
+    peek ();
+
+  gcc_assert (m_num_next_tokens > 0);
+  gcc_assert (m_num_next_tokens <= MAX_TOKENS);
+
+  if (0)
+    {
+      fprintf (stderr, "consuming token: ");
+      dump_token (stderr, &m_next_tokens[0]);
+      fprintf (stderr, "\n");
+    }
+
+  if (m_next_tokens[0].id == TOK_ERROR
+      || m_next_tokens[0].id == TOK_STRING)
+    free (m_next_tokens[0].u.string);
+
+  m_num_next_tokens--;
+  memmove (&m_next_tokens[0], &m_next_tokens[1],
+	   sizeof (token) * m_num_next_tokens);
+}
+
+/* Add LENGTH bytes of UTF-8 encoded text from UTF8_BUF to this lexer's
+   buffer.  */
+
+bool
+lexer::add_utf8 (size_t length, const char *utf8_buf, char **err_out)
+{
+  /* FIXME: adapted from charset.c:one_utf8_to_cppchar.  */
+  static const uchar masks[6] = { 0x7F, 0x1F, 0x0F, 0x07, 0x03, 0x01 };
+  static const uchar patns[6] = { 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
+
+  const uchar *inbuf = (const unsigned char *) (utf8_buf);
+  const uchar **inbufp = &inbuf;
+  size_t *inbytesleftp = &length;
+
+  while (length > 0)
+    {
+      unichar c;
+      const uchar *inbuf = *inbufp;
+      size_t nbytes, i;
+
+      c = *inbuf;
+      if (c < 0x80)
+	{
+	  m_buffer.safe_push (c);
+	  *inbytesleftp -= 1;
+	  *inbufp += 1;
+	  continue;
+	}
+
+      /* The number of leading 1-bits in the first byte indicates how many
+	 bytes follow.  */
+      for (nbytes = 2; nbytes < 7; nbytes++)
+	if ((c & ~masks[nbytes-1]) == patns[nbytes-1])
+	  goto found;
+      *err_out = xstrdup ("ill-formed UTF-8 sequence");
+      return false;
+    found:
+
+      if (*inbytesleftp < nbytes)
+	{
+	  *err_out = xstrdup ("ill-formed UTF-8 sequence");
+	  return false;
+	}
+
+      c = (c & masks[nbytes-1]);
+      inbuf++;
+      for (i = 1; i < nbytes; i++)
+	{
+	  unichar n = *inbuf++;
+	  if ((n & 0xC0) != 0x80)
+	    {
+	      *err_out = xstrdup ("ill-formed UTF-8 sequence");
+	      return false;
+	    }
+	  c = ((c << 6) + (n & 0x3F));
+	}
+
+      /* Make sure the shortest possible encoding was used.  */
+      if ((   c <=      0x7F && nbytes > 1)
+	  || (c <=     0x7FF && nbytes > 2)
+	  || (c <=    0xFFFF && nbytes > 3)
+	  || (c <=  0x1FFFFF && nbytes > 4)
+	  || (c <= 0x3FFFFFF && nbytes > 5))
+	{
+	  *err_out = xstrdup ("ill-formed UTF-8:"
+			      " shortest possible encoding not used");
+	  return false;
+	}
+
+      /* Make sure the character is valid.  */
+      if (c > 0x7FFFFFFF || (c >= 0xD800 && c <= 0xDFFF))
+	{
+	  *err_out = xstrdup ("ill-formed UTF-8: invalid character");
+	  return false;
+	}
+
+      m_buffer.safe_push (c);
+      *inbufp = inbuf;
+      *inbytesleftp -= nbytes;
+    }
+  return true;
+}
+
+/* Attempt to get the next unicode character from this lexer's buffer.
+   If successful, write it to OUT and return true.
+   Otherwise, return false.  */
+
+bool
+lexer::get_char (unichar &out)
+{
+  if (m_next_char_idx >= (int)m_buffer.length ())
+    return false;
+
+  out = m_buffer[m_next_char_idx++];
+  return true;
+}
+
+/* FIXME.  */
+
+void
+lexer::unget_char ()
+{
+  --m_next_char_idx;
+}
+
+/* Print a textual representation of TOK to OUTF.
+   This is intended for debugging the lexer and parser,
+   rather than for user-facing output.  */
+
+void
+lexer::dump_token (FILE *outf, const token *tok)
+{
+  switch (tok->id)
+    {
+    case TOK_ERROR:
+      fprintf (outf, "TOK_ERROR (\"%s\")", tok->u.string);
+      break;
+
+    case TOK_EOF:
+      fprintf (outf, "TOK_EOF");
+      break;
+
+    case TOK_OPEN_SQUARE:
+      fprintf (outf, "TOK_OPEN_SQUARE");
+      break;
+
+    case TOK_OPEN_CURLY:
+      fprintf (outf, "TOK_OPEN_CURLY");
+      break;
+
+    case TOK_CLOSE_SQUARE:
+      fprintf (outf, "TOK_CLOSE_SQUARE");
+      break;
+
+    case TOK_CLOSE_CURLY:
+      fprintf (outf, "TOK_CLOSE_CURLY");
+      break;
+
+    case TOK_COLON:
+      fprintf (outf, "TOK_COLON");
+      break;
+
+    case TOK_COMMA:
+      fprintf (outf, "TOK_COMMA");
+      break;
+
+    case TOK_TRUE:
+      fprintf (outf, "TOK_TRUE");
+      break;
+
+    case TOK_FALSE:
+      fprintf (outf, "TOK_FALSE");
+      break;
+
+    case TOK_NULL:
+      fprintf (outf, "TOK_NULL");
+      break;
+
+    case TOK_STRING:
+      fprintf (outf, "TOK_STRING (\"%s\")", tok->u.string);
+      break;
+
+    case TOK_NUMBER:
+      fprintf (outf, "TOK_NUMBER (%f)", tok->u.number);
+      break;
+
+    default:
+      gcc_unreachable ();
+      break;
+    }
+}
+
+/* Attempt to lex the input buffer, writing the next token to OUT.
+   On errors, TOK_ERROR (or TOK_EOF) is written to OUT.  */
+
+void
+lexer::lex_token (token *out)
+{
+  /* Skip to next non-whitespace char.  */
+  unichar next_char;
+  while (1)
+    {
+      out->index = m_next_char_idx;
+      if (!get_char (next_char))
+	{
+	  out->id = TOK_EOF;
+	  return;
+	}
+      if (next_char != ' '
+	  && next_char != '\t'
+	  && next_char != '\n'
+	  && next_char != '\r')
+	break;
+    }
+
+  switch (next_char)
+    {
+    case '[':
+      out->id = TOK_OPEN_SQUARE;
+      break;
+
+    case '{':
+      out->id = TOK_OPEN_CURLY;
+      break;
+
+    case ']':
+      out->id = TOK_CLOSE_SQUARE;
+      break;
+
+    case '}':
+      out->id = TOK_CLOSE_CURLY;
+      break;
+
+    case ':':
+      out->id = TOK_COLON;
+      break;
+
+    case ',':
+      out->id = TOK_COMMA;
+      break;
+
+    case '"':
+      lex_string (out);
+      break;
+
+    case '-':
+    case '0':
+    case '1':
+    case '2':
+    case '3':
+    case '4':
+    case '5':
+    case '6':
+    case '7':
+    case '8':
+    case '9':
+      lex_number (out, next_char);
+      break;
+
+    case 't':
+      /* Handle literal "true".  */
+      if (rest_of_literal ("rue"))
+	{
+	  out->id = TOK_TRUE;
+	  break;
+	}
+      else
+	goto err;
+
+    case 'f':
+      /* Handle literal "false".  */
+      if (rest_of_literal ("alse"))
+	{
+	  out->id = TOK_FALSE;
+	  break;
+	}
+      else
+	goto err;
+
+    case 'n':
+      /* Handle literal "null".  */
+      if (rest_of_literal ("ull"))
+	{
+	  out->id = TOK_NULL;
+	  break;
+	}
+      else
+	goto err;
+
+    err:
+    default:
+      out->id = TOK_ERROR;
+      out->u.string = xasprintf ("unexpected character: %c", next_char);
+      break;
+    }
+}
+
+/* Having consumed an open-quote character from the lexer's buffer, attempt
+   to lex the rest of a JSON string, writing the result to OUT (or TOK_ERROR)
+   if an error occurred.
+   (ECMA-404 section 9; RFC 7159 section 7).  */
+
+void
+lexer::lex_string (token *out)
+{
+  auto_vec<unichar> content;
+  bool still_going = true;
+  while (still_going)
+    {
+      unichar uc;
+      if (!get_char (uc))
+	{
+	  out->id = TOK_ERROR;
+	  out->u.string = xstrdup ("EOF within string");
+	  return;
+	}
+      switch (uc)
+	{
+	case '"':
+	  still_going = false;
+	  break;
+	case '\\':
+	  {
+	    unichar next_char;
+	    if (!get_char (next_char))
+	      {
+		out->id = TOK_ERROR;
+		out->u.string = xstrdup ("EOF within string");;
+		return;
+	      }
+	    switch (next_char)
+	      {
+	      case '"':
+	      case '\\':
+	      case '/':
+		content.safe_push (next_char);
+		break;
+
+	      case 'b':
+		content.safe_push ('\b');
+		break;
+
+	      case 'f':
+		content.safe_push ('\f');
+		break;
+
+	      case 'n':
+		content.safe_push ('\n');
+		break;
+
+	      case 'r':
+		content.safe_push ('\r');
+		break;
+
+	      case 't':
+		content.safe_push ('\t');
+		break;
+
+	      case 'u':
+		{
+		  unichar result = 0;
+		  for (int i = 0; i < 4; i++)
+		    {
+		      unichar hexdigit;
+		      if (!get_char (hexdigit))
+			{
+			  out->id = TOK_ERROR;
+			  out->u.string = xstrdup ("EOF within string");
+			  return;
+			}
+		      result <<= 4;
+		      if (hexdigit >= '0' && hexdigit <= '9')
+			result += hexdigit - '0';
+		      else if (hexdigit >= 'a' && hexdigit <= 'f')
+			result += (hexdigit - 'a') + 10;
+		      else if (hexdigit >= 'A' && hexdigit <= 'F')
+			result += (hexdigit - 'A') + 10;
+		      else
+			{
+			  out->id = TOK_ERROR;
+			  out->u.string = xstrdup ("bogus hex char");
+			  return;
+			}
+		    }
+		  content.safe_push (result);
+		}
+		break;
+
+	      default:
+		out->id = TOK_ERROR;
+		out->u.string = xstrdup ("unrecognized escape char");
+		return;
+	      }
+	  }
+	  break;
+
+	default:
+	  /* Reject unescaped control characters U+0000 through U+001F
+	     (ECMA-404 section 9 para 1; RFC 7159 section 7 para 1).  */
+	  if (uc <= 0x1f)
+	    {
+		out->id = TOK_ERROR;
+		out->u.string = xstrdup ("unescaped control char");
+		return;
+	    }
+
+	  /* Otherwise, add regular unicode code point.  */
+	  content.safe_push (uc);
+	  break;
+	}
+    }
+
+  out->id = TOK_STRING;
+
+  auto_vec<char> utf8_buf;
+  // FIXME: adapted from libcpp/charset.c:one_cppchar_to_utf8
+  for (unsigned i = 0; i < content.length (); i++)
+    {
+      static const uchar masks[6] =  { 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
+      static const uchar limits[6] = { 0x80, 0xE0, 0xF0, 0xF8, 0xFC, 0xFE };
+      size_t nbytes;
+      uchar buf[6], *p = &buf[6];
+      unichar c = content[i];
+
+      nbytes = 1;
+      if (c < 0x80)
+	*--p = c;
+      else
+	{
+	  do
+	    {
+	      *--p = ((c & 0x3F) | 0x80);
+	      c >>= 6;
+	      nbytes++;
+	    }
+	  while (c >= 0x3F || (c & limits[nbytes-1]));
+	  *--p = (c | masks[nbytes-1]);
+	}
+
+      while (p < &buf[6])
+	utf8_buf.safe_push (*p++);
+    }
+
+  out->u.string = XNEWVEC (char, utf8_buf.length () + 1);
+  for (unsigned i = 0; i < utf8_buf.length (); i++)
+    out->u.string[i] = utf8_buf[i];
+  out->u.string[utf8_buf.length ()] = '\0';
+
+  // FIXME: leaks?  have a json_context do the allocation
+}
+
+/* Having consumed FIRST_CHAR, an initial digit or '-' character from
+   the lexer's buffer attempt to lex the rest of a JSON number, writing
+   the result to OUT (or TOK_ERROR) if an error occurred.
+   (ECMA-404 section 8; RFC 7159 section 6).  */
+
+void
+lexer::lex_number (token *out, unichar first_char)
+{
+  bool negate = false;
+  double value = 0.0;
+  if (first_char == '-')
+    {
+      negate = true;
+      if (!get_char (first_char))
+	{
+	  out->id = TOK_ERROR;
+	  out->u.string = xstrdup ("expected digit");
+	  return;
+	}
+    }
+
+  if (first_char == '0')
+    value = 0.0;
+  else if (!ISDIGIT (first_char))
+    {
+      out->id = TOK_ERROR;
+      out->u.string = xstrdup ("expected digit");
+      return;
+    }
+  else
+    {
+      /* Got a nonzero digit; expect zero or more digits.  */
+      value = first_char - '0';
+      while (1)
+	{
+	  unichar uc;
+	  if (!get_char (uc))
+	    break;
+	  if (ISDIGIT (uc))
+	    {
+	      value *= 10;
+	      value += uc -'0';
+	      continue;
+	    }
+	  else
+	    {
+	      unget_char ();
+	      break;
+	    }
+	}
+    }
+
+  /* Optional '.', followed by one or more decimals.  */
+  unichar next_char;
+  if (get_char (next_char))
+    {
+      if (next_char == '.')
+	{
+	  /* Parse decimal digits.  */
+	  bool had_digit = false;
+	  // FIXME: does this lose too much precision?
+	  double digit_factor = 0.1;
+	  while (get_char (next_char))
+	    {
+	      if (!ISDIGIT (next_char))
+		{
+		  unget_char ();
+		  break;
+		}
+	      value += (next_char - '0') * digit_factor;
+	      digit_factor *= 0.1;
+	      had_digit = true;
+	    }
+	  if (!had_digit)
+	    {
+	      out->id = TOK_ERROR;
+	      out->u.string = xstrdup ("expected digit");
+	      return;
+	    }
+	}
+      else
+	unget_char ();
+    }
+
+  /* Parse 'e' and 'E'.  */
+  unichar exponent_char;
+  if (get_char (exponent_char))
+    {
+      if (exponent_char == 'e' || exponent_char == 'E')
+	{
+	  /* Optional +/-.  */
+	  unichar sign_char;
+	  int exponent = 0;
+	  bool negate_exponent = false;
+	  bool had_exponent_digit = false;
+	  if (!get_char (sign_char))
+	    {
+	      out->id = TOK_ERROR;
+	      out->u.string = xstrdup ("EOF within exponent");
+	      return;
+	    }
+	  if (sign_char == '-')
+	    negate_exponent = true;
+	  else if (sign_char == '+')
+	    ;
+	  else if (ISDIGIT (sign_char))
+	    {
+	      exponent = sign_char - '0';
+	      had_exponent_digit = true;
+	    }
+	  else
+	    {
+	      out->id = TOK_ERROR;
+	      out->u.string
+		= xstrdup ("expected '-','+' or digit within exponent");
+	      return;
+	    }
+
+	  /* One or more digits (we might have seen the digit above,
+	     though).  */
+	  while (1)
+	    {
+	      unichar uc;
+	      if (!get_char (uc))
+		break;
+	      if (ISDIGIT (uc))
+		{
+		  exponent *= 10;
+		  exponent += uc -'0';
+		  had_exponent_digit = true;
+		  continue;
+		}
+	      else
+		{
+		  unget_char ();
+		  break;
+		}
+	    }
+	  if (!had_exponent_digit)
+	    {
+	      out->id = TOK_ERROR;
+	      out->u.string = xstrdup ("expected digit within exponent");
+	      return;
+	    }
+	  if (negate_exponent)
+	    exponent = -exponent;
+	  /* FIXME: better way to do this?  */
+	  value = value * pow (10, exponent);
+	}
+      else
+	unget_char ();
+    }
+
+  if (negate)
+    value = -value;
+
+  out->id = TOK_NUMBER;
+  out->u.number = value;
+}
+
+/* Determine if the next characters to be lexed match SUFFIX.
+   SUFFIX must be pure ASCII.
+   If so, consume the characters and return true.
+   Otherwise, return false.  */
+
+bool
+lexer::rest_of_literal (const char *suffix)
+{
+  int suffix_idx = 0;
+  int buf_idx = m_next_char_idx;
+  while (1)
+    {
+      if (suffix[suffix_idx] == '\0')
+	{
+	  m_next_char_idx += suffix_idx;
+	  return true;
+	}
+      if (buf_idx >= (int)m_buffer.length ())
+	return false;
+      /* This assumes that suffix is ASCII.  */
+      if (m_buffer[buf_idx] != (unichar)suffix[suffix_idx])
+	return false;
+      buf_idx++;
+      suffix_idx++;
+    }
+}
+
+/* parser's ctor.  */
+
+parser::parser (char **err_out)
+: m_lexer (), m_err_out (err_out)
+{
+  gcc_assert (err_out);
+  gcc_assert (*err_out == NULL);
+  *err_out = NULL;
+}
+
+/* Add LENGTH bytes of UTF-8 encoded text from UTF8_BUF to this parser's
+   lexer's buffer.  */
+
+bool
+parser::add_utf8 (size_t length, const char *utf8_buf, char **err_out)
+{
+  return m_lexer.add_utf8 (length, utf8_buf, err_out);
+}
+
+/* Parse a JSON value (object, array, number, string, or literal).
+   (ECMA-404 section 5; RFC 7159 section 3).  */
+
+value *
+parser::parse_value (int depth)
+{
+  const token *tok = m_lexer.peek ();
+
+  /* Avoid stack overflow with deeply-nested inputs; RFC 7159 section 9
+     states: "An implementation may set limits on the maximum depth
+     of nesting.".
+
+     Ideally we'd avoid this limit (e.g. by rewriting parse_value,
+     parse_object, and parse_array into a single function with a vec of
+     state).  */
+  const int MAX_DEPTH = 100;
+  if (depth >= MAX_DEPTH)
+    {
+      error_at (tok->index, "maximum nesting depth exceeded: %i", MAX_DEPTH);
+      return NULL;
+    }
+
+  switch (tok->id)
+    {
+    case TOK_OPEN_CURLY:
+      return parse_object (depth);
+
+    case TOK_STRING:
+      {
+	string *result = new string (tok->u.string);
+	m_lexer.consume ();
+	return result;
+      }
+
+    case TOK_OPEN_SQUARE:
+      return parse_array (depth);
+
+    case TOK_NUMBER:
+      {
+	number *result = new number (tok->u.number);
+	m_lexer.consume ();
+	return result;
+      }
+
+    case TOK_TRUE:
+      {
+	literal *result = new literal (JSON_TRUE);
+	m_lexer.consume ();
+	return result;
+      }
+
+    case TOK_FALSE:
+      {
+	literal *result = new literal (JSON_FALSE);
+	m_lexer.consume ();
+	return result;
+      }
+
+    case TOK_NULL:
+      {
+	literal *result = new literal (JSON_NULL);
+	m_lexer.consume ();
+	return result;
+      }
+
+    default:
+      error_at (tok->index, "unexpected token: %s", token_id_name[tok->id]);
+      return NULL;
+    }
+}
+
+/* Parse a JSON object.
+   (ECMA-404 section 6; RFC 7159 section 4).  */
+
+object *
+parser::parse_object (int depth)
+{
+  require (TOK_OPEN_CURLY);
+
+  object *result = new object ();
+
+  const token *tok = m_lexer.peek ();
+  if (tok->id == TOK_CLOSE_CURLY)
+    {
+      require (TOK_CLOSE_CURLY);
+      return result;
+    }
+  if (tok->id != TOK_STRING)
+    {
+      error_at (tok->index, "expected string for object key");
+      return result;
+    }
+  while (!seen_error_p ())
+    {
+      tok = m_lexer.peek ();
+      if (tok->id != TOK_STRING)
+	{
+	  error_at (tok->index, "expected string for object key");
+	  return result;
+	}
+      char *key = xstrdup (tok->u.string);
+      m_lexer.consume ();
+
+      require (TOK_COLON);
+
+      value *v = parse_value (depth + 1);
+      if (!v)
+	{
+	  free (key);
+	  return result;
+	}
+      /* We don't enforce uniqueness for keys.  */
+      result->set (key, v);
+      free (key);
+
+      tok = m_lexer.peek ();
+      if (tok->id == TOK_COMMA)
+	{
+	  m_lexer.consume ();
+	  continue;
+	}
+      else
+	{
+	  require (TOK_CLOSE_CURLY);
+	  break;
+	}
+    }
+  return result;
+}
+
+/* Parse a JSON array.
+   (ECMA-404 section 7; RFC 7159 section 5).  */
+
+array *
+parser::parse_array (int depth)
+{
+  require (TOK_OPEN_SQUARE);
+
+  array *result = new array ();
+
+  const token *tok = m_lexer.peek ();
+  if (tok->id == TOK_CLOSE_SQUARE)
+    {
+      m_lexer.consume ();
+      return result;
+    }
+
+  while (!seen_error_p ())
+    {
+      value *v = parse_value (depth + 1);
+      if (!v)
+	return result;
+
+      result->append (v);
+
+      tok = m_lexer.peek ();
+      if (tok->id == TOK_COMMA)
+	{
+	  m_lexer.consume ();
+	  continue;
+	}
+      else
+	{
+	  require (TOK_CLOSE_SQUARE);
+	  break;
+	}
+    }
+
+  return result;
+}
+
+/* Require an EOF, or fail if there is surplus input.  */
+
+void
+parser::require_eof ()
+{
+  require (TOK_EOF);
+}
+
+/* Consume the next token, issuing an error if it is not of kind TOK_ID.  */
+
+void
+parser::require (enum token_id tok_id)
+{
+  const token *tok = m_lexer.peek ();
+  if (tok->id != tok_id)
+    {
+      if (tok->id == TOK_ERROR)
+	error_at (tok->index, "expected %s; got bad token: %s",
+		  token_id_name[tok_id], tok->u.string);
+      else
+	error_at (tok->index, "expected %s; got %s", token_id_name[tok_id],
+		  token_id_name[tok->id]);
+    }
+  m_lexer.consume ();
+}
+
+/* Issue a parsing error.  If this is the first error that has occurred on
+   the parser, store it within the parser's m_err_out (the buffer will
+   eventually need to be free by the caller of the parser).
+   Otherwise the error is discarded.
+
+   TODO: maybe provide a callback so that client code can print all errors?  */
+
+void
+parser::error_at (int index, const char *fmt, ...)
+{
+  va_list ap;
+  va_start (ap, fmt);
+  char *formatted = xvasprintf (fmt, ap);
+  va_end (ap);
+
+  char *msg_with_index = xasprintf ("error at index %i: %s",
+				    index, formatted);
+  free (formatted);
+
+  if (0)
+    fprintf (stderr, "%s\n", msg_with_index);
+  if (*m_err_out == NULL)
+    *m_err_out = msg_with_index;
+  else
+    free (msg_with_index);
+}
+
+/* Attempt to parse the UTF-8 encoded buffer at UTF8_BUF
+   of the given LENGTH.
+   If successful, return a non-NULL json::value *.
+   if there was a problem, return NULL and write an error
+   message to err_out, which must be freed by the caller.  */
+
+value *
+json::parse_utf8_string (size_t length, const char *utf8_buf,
+			 char **err_out)
+{
+  gcc_assert (err_out);
+  gcc_assert (*err_out == NULL);
+
+  parser p (err_out);
+  if (!p.add_utf8 (length, utf8_buf, err_out))
+    return NULL;
+  value *result = p.parse_value (0);
+  if (!p.seen_error_p ())
+    p.require_eof ();
+  if (p.seen_error_p ())
+    {
+      gcc_assert (*err_out);
+      delete result;
+      return NULL;
+    }
+  return result;
+}
+
+/* Attempt to parse the nil-terminated UTF-8 encoded buffer at
+   UTF8_BUF.
+   If successful, return a non-NULL json::value *.
+   if there was a problem, return NULL and write an error
+   message to err_out, which must be freed by the caller.  */
+
+value *
+json::parse_utf8_string (const char *utf8, char **err_out)
+{
+  return parse_utf8_string (strlen (utf8), utf8, err_out);
+}
+
+
+#if CHECKING_P
+
+namespace selftest {
+
+/* Selftests.  */
+
+/* Verify that JV->to_str () equals EXPECTED_JSON.  */
+
+static void
+assert_to_str_eq (const char *expected_json, json::value *jv)
+{
+  char *json = jv->to_str ();
+  ASSERT_STREQ (expected_json, json);
+  free (json);
+}
+
+/* FIXME.  */
+
+static void
+test_parse_string ()
+{
+  char *err = NULL;
+  json::value *jv = parse_utf8_string ("\"foo\"", &err);
+  ASSERT_EQ (NULL, err);
+  ASSERT_EQ (JSON_STRING, jv->get_kind ());
+  ASSERT_STREQ ("foo", ((json::string *)jv)->get_string ());
+  assert_to_str_eq ("\"foo\"", jv);
+
+  json::value *clone = jv->clone ();
+  ASSERT_EQ (JSON_STRING, clone->get_kind ());
+  ASSERT_STREQ ("foo", ((json::string *)clone)->get_string ());
+  assert_to_str_eq ("\"foo\"", clone);
+  delete clone;
+  delete jv;
+
+  const char *contains_quotes = "\"before \\\"quoted\\\" after\"";
+  jv = parse_utf8_string (contains_quotes, &err);
+  ASSERT_EQ (NULL, err);
+  ASSERT_EQ (JSON_STRING, jv->get_kind ());
+  ASSERT_STREQ ("before \"quoted\" after", ((json::string *)jv)->get_string ());
+  assert_to_str_eq (contains_quotes, jv);
+  delete jv;
+
+  /* Test of non-ASCII input.  This string is the Japanese word "mojibake",
+     written as C octal-escaped UTF-8.  */
+  const char *mojibake = (/* Opening quote.  */
+			  "\""
+			  /* U+6587 CJK UNIFIED IDEOGRAPH-6587
+			     UTF-8: 0xE6 0x96 0x87
+			     C octal escaped UTF-8: \346\226\207.  */
+			  "\346\226\207"
+			  /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57
+			     UTF-8: 0xE5 0xAD 0x97
+			     C octal escaped UTF-8: \345\255\227.  */
+			  "\345\255\227"
+			 /* U+5316 CJK UNIFIED IDEOGRAPH-5316
+			      UTF-8: 0xE5 0x8C 0x96
+			      C octal escaped UTF-8: \345\214\226.  */
+			  "\345\214\226"
+			 /* U+3051 HIRAGANA LETTER KE
+			      UTF-8: 0xE3 0x81 0x91
+			      C octal escaped UTF-8: \343\201\221.  */
+			  "\343\201\221"
+			  /* Closing quote.  */
+			  "\"");
+  jv = parse_utf8_string (mojibake, &err);
+  ASSERT_EQ (NULL, err);
+  ASSERT_EQ (JSON_STRING, jv->get_kind ());
+  /* Result of get_string should be UTF-8 encoded, without quotes.  */
+  ASSERT_STREQ ("\346\226\207" "\345\255\227" "\345\214\226" "\343\201\221",
+		((json::string *)jv)->get_string ());
+  /* Result of dump should be UTF-8 encoded, with quotes.  */
+  assert_to_str_eq (mojibake, jv);
+  delete jv;
+
+  /* Test of \u-escaped unicode.  This is "mojibake" again, as above.  */
+  const char *escaped_unicode = "\"\\u6587\\u5b57\\u5316\\u3051\"";
+  jv = parse_utf8_string (escaped_unicode, &err);
+  ASSERT_EQ (NULL, err);
+  ASSERT_EQ (JSON_STRING, jv->get_kind ());
+  /* Result of get_string should be UTF-8 encoded, without quotes.  */
+  ASSERT_STREQ ("\346\226\207" "\345\255\227" "\345\214\226" "\343\201\221",
+		((json::string *)jv)->get_string ());
+  /* Result of dump should be UTF-8 encoded, with quotes.  */
+  assert_to_str_eq (mojibake, jv);
+  delete jv;
+}
+
+/* FIXME.  */
+
+static void
+test_parse_number ()
+{
+  json::value *jv, *clone;
+
+  char *err = NULL;
+  jv = parse_utf8_string ("42", &err);
+  ASSERT_EQ (NULL, err);
+  ASSERT_EQ (JSON_NUMBER, jv->get_kind ());
+  ASSERT_EQ (42.0, ((json::number *)jv)->get ());
+  assert_to_str_eq ("42", jv);
+  clone = jv->clone ();
+  ASSERT_EQ (JSON_NUMBER, clone->get_kind ());
+  delete clone;
+  delete jv;
+
+  /* Negative number.  */
+  jv = parse_utf8_string ("-17", &err);
+  ASSERT_EQ (NULL, err);
+  ASSERT_EQ (JSON_NUMBER, jv->get_kind ());
+  ASSERT_EQ (-17.0, ((json::number *)jv)->get ());
+  assert_to_str_eq ("-17", jv);
+  delete jv;
+
+  /* Decimal.  */
+  jv = parse_utf8_string ("3.141", &err);
+  ASSERT_EQ (NULL, err);
+  ASSERT_EQ (JSON_NUMBER, jv->get_kind ());
+  ASSERT_EQ (3.141, ((json::number *)jv)->get ());
+  assert_to_str_eq ("3.141", jv);
+  delete jv;
+
+  /* Exponents.  */
+  jv = parse_utf8_string ("3.141e+0", &err);
+  ASSERT_EQ (NULL, err);
+  ASSERT_EQ (JSON_NUMBER, jv->get_kind ());
+  ASSERT_EQ (3.141, ((json::number *)jv)->get ());
+  assert_to_str_eq ("3.141", jv);
+  delete jv;
+
+  jv = parse_utf8_string ("42e2", &err);
+  ASSERT_EQ (NULL, err);
+  ASSERT_EQ (JSON_NUMBER, jv->get_kind ());
+  ASSERT_EQ (4200, ((json::number *)jv)->get ());
+  assert_to_str_eq ("4200", jv);
+  delete jv;
+
+  jv = parse_utf8_string ("42e-1", &err);
+  ASSERT_EQ (NULL, err);
+  ASSERT_EQ (JSON_NUMBER, jv->get_kind ());
+  ASSERT_EQ (4.2, ((json::number *)jv)->get ());
+  assert_to_str_eq ("4.2", jv);
+  delete jv;
+
+}
+
+/* FIXME.  */
+
+static void
+test_parse_array ()
+{
+  json::value *jv, *clone;
+
+  char *err = NULL;
+  jv = parse_utf8_string ("[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]", &err);
+  ASSERT_EQ (NULL, err);
+  ASSERT_EQ (JSON_ARRAY, jv->get_kind ());
+  json::array *arr = static_cast <json::array *> (jv);
+  ASSERT_EQ (10, arr->get_length ());
+  for (int i = 0; i < 10; i++)
+    {
+      json::value *element = arr->get (i);
+      ASSERT_EQ (JSON_NUMBER, element->get_kind ());
+      ASSERT_EQ (i, ((json::number *)element)->get ());
+    }
+  assert_to_str_eq ("[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]", jv);
+
+  clone = jv->clone ();
+  ASSERT_EQ (JSON_ARRAY, clone->get_kind ());
+  arr = static_cast <json::array *> (clone);
+  ASSERT_EQ (10, arr->get_length ());
+  for (int i = 0; i < 10; i++)
+    {
+      json::value *element = arr->get (i);
+      ASSERT_EQ (JSON_NUMBER, element->get_kind ());
+      ASSERT_EQ (i, ((json::number *)element)->get ());
+    }
+  assert_to_str_eq ("[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]", clone);
+  delete clone;
+
+  delete jv;
+}
+
+/* FIXME.  */
+
+static void
+test_parse_object ()
+{
+  char *err = NULL;
+  json::value *jv
+    = parse_utf8_string ("{\"foo\": \"bar\", \"baz\": [42, null]}", &err);
+  ASSERT_EQ (NULL, err);
+  ASSERT_TRUE (jv != NULL);
+  ASSERT_EQ (JSON_OBJECT, jv->get_kind ());
+  json::object *jo = static_cast <json::object *> (jv);
+
+  json::value *foo_value = jo->get ("foo");
+  ASSERT_TRUE (foo_value != NULL);
+  ASSERT_EQ (JSON_STRING, foo_value->get_kind ());
+  ASSERT_STREQ ("bar", ((json::string *)foo_value)->get_string ());
+
+  json::value *baz_value = jo->get ("baz");
+  ASSERT_TRUE (baz_value != NULL);
+  ASSERT_EQ (JSON_ARRAY, baz_value->get_kind ());
+
+  json::array *baz_array = (json::array *)baz_value;
+  ASSERT_EQ (2, baz_array->get_length ());
+  ASSERT_EQ (42, baz_array->get (0)->as_number ()->get ());
+  ASSERT_EQ (JSON_NULL, baz_array->get (1)->get_kind ());
+
+  // TODO: error-handling
+  // TODO: partial document
+
+  /* We can't use assert_to_str_eq since ordering is not guaranteed.  */
+
+  json::value *clone = jv->clone ();
+  ASSERT_EQ (JSON_OBJECT, clone->get_kind ());
+  ASSERT_EQ (JSON_STRING, clone->as_object ()->get ("foo")->get_kind ());
+  delete clone;
+
+  delete jv;
+}
+
+/* Verify that the literals "true", "false" and "null" are parsed,
+   dumped, and are clonable.  */
+
+static void
+test_parse_literals ()
+{
+  json::value *jv, *clone;
+  char *err = NULL;
+  jv = parse_utf8_string ("true", &err);
+  ASSERT_EQ (NULL, err);
+  ASSERT_TRUE (jv != NULL);
+  ASSERT_EQ (JSON_TRUE, jv->get_kind ());
+  assert_to_str_eq ("true", jv);
+  clone = jv->clone ();
+  ASSERT_EQ (JSON_TRUE, clone->get_kind ());
+  delete clone;
+  delete jv;
+
+  jv = parse_utf8_string ("false", &err);
+  ASSERT_EQ (NULL, err);
+  ASSERT_TRUE (jv != NULL);
+  ASSERT_EQ (JSON_FALSE, jv->get_kind ());
+  assert_to_str_eq ("false", jv);
+  clone = jv->clone ();
+  ASSERT_EQ (JSON_FALSE, clone->get_kind ());
+  delete clone;
+  delete jv;
+
+  jv = parse_utf8_string ("null", &err);
+  ASSERT_EQ (NULL, err);
+  ASSERT_TRUE (jv != NULL);
+  ASSERT_EQ (JSON_NULL, jv->get_kind ());
+  assert_to_str_eq ("null", jv);
+  clone = jv->clone ();
+  ASSERT_EQ (JSON_NULL, clone->get_kind ());
+  delete clone;
+  delete jv;
+}
+
+/* FIXME.  */
+
+static void
+test_parse_jsonrpc ()
+{
+  char *err = NULL;
+  const char *request
+    = ("{\"jsonrpc\": \"2.0\", \"method\": \"subtract\","
+       " \"params\": [42, 23], \"id\": 1}");
+  json::value *jv = parse_utf8_string (request, &err);
+  ASSERT_EQ (NULL, err);
+  ASSERT_TRUE (jv != NULL);
+  delete jv;
+}
+
+/* FIXME.  */
+
+static void
+test_parse_empty_object ()
+{
+  char *err = NULL;
+  json::value *jv = parse_utf8_string ("{}", &err);
+  ASSERT_EQ (NULL, err);
+  ASSERT_TRUE (jv != NULL);
+  ASSERT_EQ (JSON_OBJECT, jv->get_kind ());
+  assert_to_str_eq ("{}", jv);
+  delete jv;
+}
+
+/* FIXME.  */
+
+static void
+test_error_empty_string ()
+{
+  char *err = NULL;
+  json::value *jv = parse_utf8_string ("", &err);
+  ASSERT_STREQ ("error at index 0: unexpected token: EOF", err);
+  ASSERT_TRUE (jv == NULL);
+  free (err);
+}
+
+/* FIXME.  */
+
+static void
+test_error_missing_comma ()
+{
+  char *err = NULL;
+  /*                  01234567.  */
+  const char *json = "[0, 1 2]";
+  json::value *jv = parse_utf8_string (json, &err);
+  ASSERT_STREQ ("error at index 6: expected ']'; got number",
+		err);
+  // FIXME: unittest the lexer?
+  ASSERT_TRUE (jv == NULL);
+  free (err);
+}
+
+/* Run all of the selftests within this file.  */
+
+void
+json_cc_tests ()
+{
+  test_parse_string ();
+  test_parse_number ();
+  test_parse_array ();
+  test_parse_object ();
+  test_parse_literals ();
+  test_parse_jsonrpc ();
+  test_parse_empty_object ();
+  test_error_empty_string ();
+  test_error_missing_comma ();
+
+  /* FIXME: tests for roundtripping (noting that we don't preserve
+     object key ordering).  */
+
+  /* FIXME: cloning.  */
+}
+
+} // namespace selftest
+
+#endif /* #if CHECKING_P */
diff --git a/gcc/json.h b/gcc/json.h
new file mode 100644
index 0000000..aedf84a
--- /dev/null
+++ b/gcc/json.h
@@ -0,0 +1,214 @@
+/* JSON parsing
+   Copyright (C) 2017 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_JSON_H
+#define GCC_JSON_H
+
+/* Implementation of JSON, a lightweight data-interchange format.
+
+   See http://www.json.org/
+   and http://www.ecma-international.org/publications/files/ECMA-ST/ECMA-404.pdf
+   and https://tools.ietf.org/html/rfc7159
+
+   Supports parsing text into a DOM-like tree of json::value *, dumping
+   json::value * to text.  */
+
+namespace json
+{
+
+/* Forward decls of json::value and its subclasses (using indentation
+   to denote inheritance.  */
+
+class value;
+  class object;
+  class array;
+  class number;
+  class string;
+  class literal;
+
+/* An enum for discriminating the subclasses of json::value.  */
+
+enum kind
+{
+  /* class json::object.  */
+  JSON_OBJECT,
+
+  /* class json::array.  */
+  JSON_ARRAY,
+
+  /* class json::number.  */
+  JSON_NUMBER,
+
+  /* class json::string.  */
+  JSON_STRING,
+
+  /* class json::literal uses these three values to identify the
+     particular literal.  */
+  JSON_TRUE,
+  JSON_FALSE,
+  JSON_NULL
+};
+
+/* Base class of JSON value.  */
+
+class value
+{
+ public:
+  virtual ~value () {}
+  virtual enum kind get_kind () const = 0;
+  virtual void print (pretty_printer *pp) const = 0;
+
+  /* Create a deep copy of the value, returning a value which must be
+     deleted by the caller.  */
+  virtual value *clone () const = 0;
+
+  char *to_str () const;
+  void dump (FILE *) const;
+
+  /* Methods for dynamically casting a value to one of the subclasses,
+     returning NULL if the value is of the wrong kind.  */
+  const object *as_object () const;
+  const array *as_array () const;
+  const number *as_number () const;
+  const string *as_string () const;
+
+  /* Convenience accessors for attempting to perform key/value lookups
+     on this value as if it were an json::object.
+
+     On success, return true and write the value to OUT_VALUE.
+     On failure, return false and write an error message to OUT_ERR
+     (which must be freed by the caller).  */
+  bool get_value_by_key (const char *name, const value *&out_value,
+			 char *&out_err) const;
+  bool get_int_by_key (const char *name, int &out_value, char *&out_err) const;
+  bool get_string_by_key (const char *name, const char *&out_value,
+			  char *&out_err) const;
+  bool get_array_by_key (const char *name, const array *&out_value,
+			 char *&out_err) const;
+
+  /* As above, but the key is optional.  THIS must still be an object,
+     though.  */
+  bool get_optional_value_by_key (const char *name, const value *&out_value,
+				  char *&out_err) const;
+  bool get_optional_string_by_key (const char *name, const char *&out_value,
+				   char *&out_err) const;
+};
+
+/* Subclass of value for objects: an unordered collection of
+   key/value pairs.  */
+
+class object : public value
+{
+ public:
+  ~object ();
+
+  enum kind get_kind () const FINAL OVERRIDE { return JSON_OBJECT; }
+  void print (pretty_printer *pp) const FINAL OVERRIDE;
+  value *clone () const FINAL OVERRIDE;
+
+  value *get (const char *key) const;
+  value *get_if_nonnull (const char *key) const;
+
+  void set (const char *key, value *v);
+
+ private:
+  typedef hash_map <char *, value *,
+    simple_hashmap_traits<nofree_string_hash, value *> > map_t;
+  map_t m_map;
+};
+
+/* Subclass of value for arrays.  */
+
+class array : public value
+{
+ public:
+  ~array ();
+
+  enum kind get_kind () const FINAL OVERRIDE { return JSON_ARRAY; }
+  void print (pretty_printer *pp) const FINAL OVERRIDE;
+  value *clone () const FINAL OVERRIDE;
+
+  unsigned get_length () const { return m_elements.length (); }
+  value *get (int idx) const { return m_elements[idx]; }
+  void append (value *v) { m_elements.safe_push (v); }
+
+ private:
+  auto_vec<value *> m_elements;
+};
+
+/* Subclass of value for numbers.  */
+
+class number : public value
+{
+ public:
+  number (double value) : m_value (value) {}
+
+  enum kind get_kind () const FINAL OVERRIDE { return JSON_NUMBER; }
+  void print (pretty_printer *pp) const FINAL OVERRIDE;
+  value *clone () const FINAL OVERRIDE;
+
+  double get () const { return m_value; }
+
+ private:
+  double m_value;
+};
+
+/* Subclass of value for strings.  */
+
+class string : public value
+{
+ public:
+  string (const char *utf8) : m_utf8 (xstrdup (utf8)) {}
+  ~string () { free (m_utf8); }
+
+  enum kind get_kind () const FINAL OVERRIDE { return JSON_STRING; }
+  void print (pretty_printer *pp) const FINAL OVERRIDE;
+  value *clone () const FINAL OVERRIDE;
+
+  const char *get_string () const { return m_utf8; }
+
+ private:
+  char *m_utf8;
+};
+
+/* Subclass of value for the three JSON literals "true", "false",
+   and "null".  */
+
+class literal : public value
+{
+ public:
+  literal (enum kind kind) : m_kind (kind) {}
+
+  enum kind get_kind () const FINAL OVERRIDE { return m_kind; }
+  void print (pretty_printer *pp) const FINAL OVERRIDE;
+  value *clone () const FINAL OVERRIDE;
+
+ private:
+  enum kind m_kind;
+};
+
+/* Declarations for parsing JSON to a json::value * tree.  */
+
+extern value *parse_utf8_string (size_t length, const char *utf8_buf,
+				 char **err_out);
+extern value *parse_utf8_string (const char *utf8, char **err_out);
+
+} // namespace json
+
+#endif  /* GCC_JSON_H  */
diff --git a/gcc/selftest-run-tests.c b/gcc/selftest-run-tests.c
index fe221ff..36879cf 100644
--- a/gcc/selftest-run-tests.c
+++ b/gcc/selftest-run-tests.c
@@ -70,6 +70,7 @@ selftest::run_tests ()
   fibonacci_heap_c_tests ();
   typed_splay_tree_c_tests ();
   unique_ptr_tests_cc_tests ();
+  json_cc_tests ();
 
   /* Mid-level data structures.  */
   input_c_tests ();
diff --git a/gcc/selftest.h b/gcc/selftest.h
index e3117c6..2a912d8 100644
--- a/gcc/selftest.h
+++ b/gcc/selftest.h
@@ -199,6 +199,7 @@ extern void ggc_tests_c_tests ();
 extern void hash_map_tests_c_tests ();
 extern void hash_set_tests_c_tests ();
 extern void input_c_tests ();
+extern void json_cc_tests ();
 extern void pretty_print_c_tests ();
 extern void read_rtl_function_c_tests ();
 extern void rtl_tests_c_tests ();
-- 
1.8.5.3
Follow-Ups:
- Re: [PATCH 02/10] Add JSON implementation
  - From: Eric Gallager
References:
- [PATCH 00/10] RFC: Prototype of compiler-assisted performance analysis
  - From: David Malcolm
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]