This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]
[PATCH 11/17] Add JSON implementation

From: David Malcolm <dmalcolm at redhat dot com>
To: gcc-patches at gcc dot gnu dot org
Cc: David Malcolm <dmalcolm at redhat dot com>
Date: Mon, 24 Jul 2017 16:05:08 -0400
Subject: [PATCH 11/17] Add JSON implementation
Authentication-results: sourceware.org; auth=none
Authentication-results: ext-mx07.extmail.prod.ext.phx2.redhat.com; dmarc=none (p=none dis=none) header.from=redhat.com
Authentication-results: ext-mx07.extmail.prod.ext.phx2.redhat.com; spf=pass smtp.mailfrom=dmalcolm at redhat dot com
Dkim-filter: OpenDKIM Filter v2.11.0 mx1.redhat.com D7CE8C0B7894
Dmarc-filter: OpenDMARC Filter v1.3.2 mx1.redhat.com D7CE8C0B7894
References: <1500926714-56988-1-git-send-email-dmalcolm@redhat.com>
This patch is the first part of the Language Server Protocol
server proof-of-concept.

It adds support to gcc for reading and writing JSON,
based on DOM-like trees of "json::value" instances.

gcc/ChangeLog:
	* Makefile.in (OBJS): Add json.o.
	* json.c: New file.
	* json.h: New file.
	* selftest-run-tests.c (selftest::run_tests): Call json_c_tests.
	* selftest.h (selftest::json_c_tests): New decl.
---
 gcc/Makefile.in          |    1 +
 gcc/json.c               | 1914 ++++++++++++++++++++++++++++++++++++++++++++++
 gcc/json.h               |  214 ++++++
 gcc/selftest-run-tests.c |    1 +
 gcc/selftest.h           |    1 +
 5 files changed, 2131 insertions(+)
 create mode 100644 gcc/json.c
 create mode 100644 gcc/json.h

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 519ada0..77ecbd6 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1369,6 +1369,7 @@ OBJS = \
 	ira-color.o \
 	ira-emit.o \
 	ira-lives.o \
+	json.o \
 	jump.o \
 	langhooks.o \
 	lcm.o \
diff --git a/gcc/json.c b/gcc/json.c
new file mode 100644
index 0000000..9f8b456
--- /dev/null
+++ b/gcc/json.c
@@ -0,0 +1,1914 @@
+/* JSON parsing
+   Copyright (C) 2017 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "json.h"
+#include "pretty-print.h"
+#include "math.h"
+#include "selftest.h"
+
+using namespace json;
+
+/* class json::value.  */
+
+/* Generate a char * for this json::value tree.
+   The returned value must be freed by the caller.  */
+
+char *
+value::to_str () const
+{
+  pretty_printer pp;
+  print (&pp);
+  return xstrdup (pp_formatted_text (&pp));
+}
+
+/* Dump this json::value tree to OUTF.
+   No formatting is done.  There are no guarantees about the order
+   in which the key/value pairs of json::objects are printed.  */
+
+void
+value::dump (FILE *outf) const
+{
+  pretty_printer pp;
+  pp_buffer (&pp)->stream = outf;
+  print (&pp);
+  pp_flush (&pp);
+}
+
+/* If this json::value is a json::object, return it,
+   otherwise return NULL.  */
+
+const object *
+value::as_object () const
+{
+  if (get_kind () != JSON_OBJECT)
+    return NULL;
+  return static_cast <const object *> (this);
+}
+
+/* If this json::value is a json::array, return it,
+   otherwise return NULL.  */
+
+const array *
+value::as_array () const
+{
+  if (get_kind () != JSON_ARRAY)
+    return NULL;
+  return static_cast <const array *> (this);
+}
+
+/* If this json::value is a json::number, return it,
+   otherwise return NULL.  */
+
+const number *
+value::as_number () const
+{
+  if (get_kind () != JSON_NUMBER)
+    return NULL;
+  return static_cast <const number *> (this);
+}
+
+/* If this json::value is a json::string, return it,
+   otherwise return NULL.  */
+
+const string *
+value::as_string () const
+{
+  if (get_kind () != JSON_STRING)
+    return NULL;
+  return static_cast <const string *> (this);
+}
+
+/* Attempt to get the value of a key/value pair from this value
+   as if THIS value were an object.
+
+   If THIS is not a json::object, return write an error message to OUT_ERR
+   (which must be freed by the caller) and return false.
+
+   Otherwise write the value ptr (possibly NULL) to OUT_VALUE and
+   return true.  */
+
+bool
+value::get_optional_value_by_key (const char *name, const value *&out_value,
+				  char *&out_err) const
+{
+  const json::object *obj = as_object ();
+  if (!obj)
+    {
+      out_err = xstrdup ("not an object");
+      return false;
+    }
+  out_value = obj->get (name);
+  return true;
+}
+
+/* Attempt to get a string value of a key/value pair from this value
+   as if THIS value were an object.
+
+   If THIS is a json::object, and KEY is either not present, is a string,
+   or is the "null" JSON literal, then return true, and write to OUT_VALUE.
+   If a string, then the ptr is written to OUT_VALUE, otherwise NULL
+   is written to OUT_VALUE.
+
+   If THIS is not a json::object, or KEY is not a string/"null",
+   return false and write an error message to OUT_ERR
+   (which must be freed by the caller).  */
+
+bool
+value::get_optional_string_by_key (const char *name, const char *&out_value,
+				   char *&out_err) const
+{
+  const json::value *v;
+  if (!get_optional_value_by_key (name, v, out_err))
+    return false;
+  if (v && v->get_kind () != JSON_NULL)
+    {
+      const json::string *s = v->as_string ();
+      if (!s)
+	{
+	  out_err = xasprintf ("not a string: \"%s\"", name);
+	  return false;
+	}
+      out_value = s->get_string ();
+      return true;
+    }
+  else
+    {
+      out_value = NULL;
+      return true;
+    }
+}
+
+/* Attempt to get lookup the value of a key/value pair from this value
+   as if this value were an object.
+
+   To succeed, THIS must be a json::object, and it must have a key named
+   NAME.
+
+   On success, return true and write the value to OUT_VALUE.
+   On failure, return false and write an error message to OUT_ERR
+   (which must be freed by the caller).  */
+
+bool
+value::get_value_by_key (const char *name, const value *&out_value,
+			 char *&out_err) const
+{
+  const json::object *obj = as_object ();
+  if (!obj)
+    {
+      out_err = xstrdup ("not an object");
+      return false;
+    }
+  const json::value *v = obj->get (name);
+  if (!v)
+    {
+      out_err = xasprintf ("missing attribute: \"%s\"", name);
+      return false;
+    }
+  out_value = v;
+  return true;
+}
+
+/* As value::get_value_by_key, but the value must be a number;
+   if successful, write it as an int to OUT_VALUE.  */
+
+bool
+value::get_int_by_key (const char *name, int &out_value, char *&out_err) const
+{
+  const json::value *v;
+  if (!get_value_by_key (name, v, out_err))
+    return false;
+  const json::number *n = v->as_number ();
+  if (!n)
+    {
+      out_err = xasprintf ("not a number: \"%s\"", name);
+      return false;
+    }
+  out_value = n->get ();
+  return true;
+}
+
+/* As value::get_value_by_key, but the value must be a string;
+   if successful, write it as const char * to OUT_VALUE.  */
+
+bool
+value::get_string_by_key (const char *name, const char *&out_value,
+			  char *&out_err) const
+{
+  const json::value *v;
+  if (!get_value_by_key (name, v, out_err))
+    return false;
+  const json::string *s = v->as_string ();
+  if (!s)
+    {
+      out_err = xasprintf ("not a string: \"%s\"", name);
+      return false;
+    }
+  out_value = s->get_string ();
+  return true;
+}
+
+/* As value::get_value_by_key, but the value must be an array;
+   if successful, write it as a json::array * to OUT_VALUE.  */
+
+bool
+value::get_array_by_key (const char *name, const array *&out_value,
+			 char *&out_err) const
+{
+  const json::value *v;
+  if (!get_value_by_key (name, v, out_err))
+    return false;
+  const json::array *arr = v->as_array ();
+  if (!arr)
+    {
+      out_err = xasprintf ("not an array: \"%s\"", name);
+      return false;
+    }
+  out_value = arr;
+  return true;
+}
+
+/* class json::object, a subclass of json::value, representing
+   an unordered collection of key/value pairs.  */
+
+/* json:object's dtor.  */
+
+object::~object ()
+{
+  for (map_t::iterator it = m_map.begin (); it != m_map.end (); ++it)
+    {
+      free (const_cast <char *>((*it).first));
+      delete ((*it).second);
+    }
+}
+
+/* Implementation of json::value::print for json::object.  */
+
+void
+object::print (pretty_printer *pp) const
+{
+  /* Note that the order is not guaranteed.  */
+  pp_character (pp, '{');
+  for (map_t::iterator it = m_map.begin (); it != m_map.end (); ++it)
+    {
+      if (it != m_map.begin ())
+	pp_string (pp, ", ");
+      const char *key = const_cast <char *>((*it).first);
+      value *value = (*it).second;
+      pp_printf (pp, "\"%s\": ", key); // FIXME: escaping?
+      value->print (pp);
+    }
+  pp_character (pp, '}');
+}
+
+/* Implementation of json::value::clone for json::object.  */
+
+value *
+object::clone () const
+{
+  object *other = new object ();
+  for (map_t::iterator it = m_map.begin (); it != m_map.end (); ++it)
+    {
+      const char *key = const_cast <char *>((*it).first);
+      value *value = (*it).second;
+      other->set (key, value->clone ());
+    }
+  return other;
+}
+
+/* Get the json::value * for KEY, or NULL if the key is not present.  */
+
+value *
+object::get (const char *key) const
+{
+  value **slot = const_cast <object*> (this)->m_map.get (key);
+  if (slot)
+    return *slot;
+  return NULL;
+}
+
+/* As object::get (KEY), but return NULL if the value of the key
+   is the "null" JSON literal.  */
+
+value *
+object::get_if_nonnull (const char *key) const
+{
+  value *result = get (key);
+  if (!result)
+    return NULL;
+  if (result->get_kind () == JSON_NULL)
+    return NULL;
+  return result;
+}
+
+/* Set the json::value * for KEY, taking ownership of VALUE
+   (and taking a copy of KEY if necessary).  */
+
+void
+object::set (const char *key, value *v)
+{
+  value **ptr = m_map.get (key);
+  if (ptr)
+    {
+      /* If the key is already present, delete the existing value
+	 and overwrite it.  */
+      delete *ptr;
+      *ptr = v;
+    }
+  else
+    /* If the key wasn't already present, take a copy of the key,
+       and store the value.  */
+    m_map.put (xstrdup (key), v);
+}
+
+/* class json::array, a subclass of json::value, representing
+   an ordered collection of values.  */
+
+/* json::array's dtor.  */
+
+array::~array ()
+{
+  unsigned i;
+  value *v;
+  FOR_EACH_VEC_ELT (m_elements, i, v)
+    delete v;
+}
+
+/* Implementation of json::value::print for json::array.  */
+
+void
+array::print (pretty_printer *pp) const
+{
+  pp_character (pp, '[');
+  unsigned i;
+  value *v;
+  FOR_EACH_VEC_ELT (m_elements, i, v)
+    {
+      if (i)
+	pp_string (pp, ", ");
+      v->print (pp);
+    }
+  pp_character (pp, ']');
+}
+
+/* Implementation of json::value::clone for json::array.  */
+
+value *
+array::clone () const
+{
+  array *other = new array ();
+  unsigned i;
+  value *v;
+  FOR_EACH_VEC_ELT (m_elements, i, v)
+    other->append (v->clone ());
+  return other;
+}
+
+/* class json::number, a subclass of json::value, wrapping a double.  */
+
+/* Implementation of json::value::print for json::number.  */
+
+void
+number::print (pretty_printer *pp) const
+{
+  char tmp[1024];
+  snprintf (tmp, sizeof (tmp), "%g", m_value);
+  pp_string (pp, tmp);
+}
+
+/* Implementation of json::value::clone for json::number.  */
+
+value *
+number::clone () const
+{
+  return new number (m_value);
+}
+
+/* class json::string, a subclass of json::value.  */
+
+void
+string::print (pretty_printer *pp) const
+{
+  pp_character (pp, '"');
+  for (const char *ptr = m_utf8; *ptr; ptr++)
+    {
+      char ch = *ptr;
+      switch (ch)
+	{
+	case '"':
+	  pp_string (pp, "\\\"");
+	  break;
+	case '\\':
+	  pp_string (pp, "\\n");
+	  break;
+	case '\b':
+	  pp_string (pp, "\\b");
+	  break;
+	case '\f':
+	  pp_string (pp, "\\f");
+	  break;
+	case '\n':
+	  pp_string (pp, "\\n");
+	  break;
+	case '\r':
+	  pp_string (pp, "\\r");
+	  break;
+	case '\t':
+	  pp_string (pp, "\\t");
+	  break;
+
+	default:
+	  pp_character (pp, ch);
+	}
+    }
+  pp_character (pp, '"');
+}
+
+/* Implementation of json::value::clone for json::string.  */
+
+value *
+string::clone () const
+{
+  return new string (m_utf8);
+}
+
+/* class json::literal, a subclass of json::value.  */
+
+/* Implementation of json::value::print for json::literal.  */
+
+void
+literal::print (pretty_printer *pp) const
+{
+  switch (m_kind)
+    {
+    case JSON_TRUE:
+      pp_string (pp, "true");
+      break;
+    case JSON_FALSE:
+      pp_string (pp, "false");
+      break;
+    case JSON_NULL:
+      pp_string (pp, "null");
+      break;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Implementation of json::value::clone for json::literal.  */
+
+value *
+literal::clone () const
+{
+  return new literal (m_kind);
+}
+
+
+/* Declarations relating to parsing JSON, all within an
+   anonymous namespace.  */
+
+namespace {
+
+/* A typedef representing a single unicode character.  */
+
+typedef unsigned unichar;
+
+/* An enum for discriminating different kinds of JSON token.  */
+
+enum token_id
+{
+  TOK_ERROR,
+
+  TOK_EOF,
+
+  /* Punctuation.  */
+  TOK_OPEN_SQUARE,
+  TOK_OPEN_CURLY,
+  TOK_CLOSE_SQUARE,
+  TOK_CLOSE_CURLY,
+  TOK_COLON,
+  TOK_COMMA,
+
+  /* Literal names.  */
+  TOK_TRUE,
+  TOK_FALSE,
+  TOK_NULL,
+
+  TOK_STRING,
+  TOK_NUMBER
+};
+
+/* Human-readable descriptions of enum token_id.  */
+
+static const char *token_id_name[] = {
+  "error",
+  "EOF",
+  "'['",
+  "'{'",
+  "']'",
+  "'}'",
+  "':'",
+  "','",
+  "'true'",
+  "'false'",
+  "'null'",
+  "string",
+  "number"
+};
+
+/* Tokens within the JSON lexer.  */
+
+struct token
+{
+  /* The kind of token.  */
+  enum token_id id;
+
+  /* The location of this token within the unicode
+     character stream.  */
+  int index;
+
+  union
+  {
+    /* Value for TOK_ERROR and TOK_STRING.  */
+    char *string;
+
+    /* Value for TOK_NUMBER.  */
+    double number;
+  } u;
+};
+
+/* A class for lexing JSON.  */
+
+class lexer
+{
+ public:
+  lexer ();
+  ~lexer ();
+  bool add_utf8 (size_t length, const char *utf8_buf, char **err_out);
+
+  const token *peek ();
+  void consume ();
+
+ private:
+  bool get_char (unichar &out);
+  void unget_char ();
+  static void dump_token (FILE *outf, const token *tok);
+  void lex_token (token *out);
+  void lex_string (token *out);
+  void lex_number (token *out, unichar first_char);
+  bool rest_of_literal (const char *suffix);
+
+ private:
+  auto_vec<unichar> m_buffer;
+  int m_next_char_idx;
+
+  static const int MAX_TOKENS = 1;
+  token m_next_tokens[MAX_TOKENS];
+  int m_num_next_tokens;
+};
+
+/* A class for parsing JSON.  */
+
+class parser
+{
+ public:
+  parser (char **err_out);
+  bool add_utf8 (size_t length, const char *utf8_buf, char **err_out);
+  value *parse_value (int depth);
+  object *parse_object (int depth);
+  array *parse_array (int depth);
+
+  bool seen_error_p () const { return *m_err_out; }
+  void require_eof ();
+
+ private:
+  void require (enum token_id tok_id);
+  void error_at (int, const char *, ...) ATTRIBUTE_PRINTF_3;
+
+ private:
+  lexer m_lexer;
+  char **m_err_out;
+};
+
+} // anonymous namespace for parsing implementation
+
+/* Parser implementation.  */
+
+/* lexer's ctor.  */
+
+lexer::lexer ()
+: m_buffer (), m_next_char_idx (0), m_num_next_tokens (0)
+{
+}
+
+/* lexer's dtor.  */
+
+lexer::~lexer ()
+{
+  while (m_num_next_tokens > 0)
+    consume ();
+}
+
+/* Peek the next token.  */
+
+const token *
+lexer::peek ()
+{
+  if (m_num_next_tokens == 0)
+    {
+      lex_token (&m_next_tokens[0]);
+      m_num_next_tokens++;
+    }
+  return &m_next_tokens[0];
+}
+
+/* Consume the next token.  */
+
+void
+lexer::consume ()
+{
+  if (m_num_next_tokens == 0)
+    peek ();
+
+  gcc_assert (m_num_next_tokens > 0);
+  gcc_assert (m_num_next_tokens <= MAX_TOKENS);
+
+  if (0)
+    {
+      fprintf (stderr, "consuming token: ");
+      dump_token (stderr, &m_next_tokens[0]);
+      fprintf (stderr, "\n");
+    }
+
+  if (m_next_tokens[0].id == TOK_ERROR
+      || m_next_tokens[0].id == TOK_STRING)
+    free (m_next_tokens[0].u.string);
+
+  m_num_next_tokens--;
+  memmove (&m_next_tokens[0], &m_next_tokens[1],
+	   sizeof (token) * m_num_next_tokens);
+}
+
+/* Add LENGTH bytes of UTF-8 encoded text from UTF8_BUF to this lexer's
+   buffer.  */
+
+bool
+lexer::add_utf8 (size_t length, const char *utf8_buf, char **err_out)
+{
+  /* FIXME: adapted from charset.c:one_utf8_to_cppchar.  */
+  static const uchar masks[6] = { 0x7F, 0x1F, 0x0F, 0x07, 0x03, 0x01 };
+  static const uchar patns[6] = { 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
+
+  const uchar *inbuf = (const unsigned char *) (utf8_buf);
+  const uchar **inbufp = &inbuf;
+  size_t *inbytesleftp = &length;
+
+  while (length > 0)
+    {
+      unichar c;
+      const uchar *inbuf = *inbufp;
+      size_t nbytes, i;
+
+      c = *inbuf;
+      if (c < 0x80)
+	{
+	  m_buffer.safe_push (c);
+	  *inbytesleftp -= 1;
+	  *inbufp += 1;
+	  continue;
+	}
+
+      /* The number of leading 1-bits in the first byte indicates how many
+	 bytes follow.  */
+      for (nbytes = 2; nbytes < 7; nbytes++)
+	if ((c & ~masks[nbytes-1]) == patns[nbytes-1])
+	  goto found;
+      *err_out = xstrdup ("ill-formed UTF-8 sequence");
+      return false;
+    found:
+
+      if (*inbytesleftp < nbytes)
+	{
+	  *err_out = xstrdup ("ill-formed UTF-8 sequence");
+	  return false;
+	}
+
+      c = (c & masks[nbytes-1]);
+      inbuf++;
+      for (i = 1; i < nbytes; i++)
+	{
+	  unichar n = *inbuf++;
+	  if ((n & 0xC0) != 0x80)
+	    {
+	      *err_out = xstrdup ("ill-formed UTF-8 sequence");
+	      return false;
+	    }
+	  c = ((c << 6) + (n & 0x3F));
+	}
+
+      /* Make sure the shortest possible encoding was used.  */
+      if ((   c <=      0x7F && nbytes > 1)
+	  || (c <=     0x7FF && nbytes > 2)
+	  || (c <=    0xFFFF && nbytes > 3)
+	  || (c <=  0x1FFFFF && nbytes > 4)
+	  || (c <= 0x3FFFFFF && nbytes > 5))
+	{
+	  *err_out = xstrdup ("ill-formed UTF-8:"
+			      " shortest possible encoding not used");
+	  return false;
+	}
+
+      /* Make sure the character is valid.  */
+      if (c > 0x7FFFFFFF || (c >= 0xD800 && c <= 0xDFFF))
+	{
+	  *err_out = xstrdup ("ill-formed UTF-8: invalid character");
+	  return false;
+	}
+
+      m_buffer.safe_push (c);
+      *inbufp = inbuf;
+      *inbytesleftp -= nbytes;
+    }
+  return true;
+}
+
+/* Attempt to get the next unicode character from this lexer's buffer.
+   If successful, write it to OUT and return true.
+   Otherwise, return false.  */
+
+bool
+lexer::get_char (unichar &out)
+{
+  if (m_next_char_idx >= (int)m_buffer.length ())
+    return false;
+
+  out = m_buffer[m_next_char_idx++];
+  return true;
+}
+
+/* FIXME.  */
+
+void
+lexer::unget_char ()
+{
+  --m_next_char_idx;
+}
+
+/* Print a textual representation of TOK to OUTF.
+   This is intended for debugging the lexer and parser,
+   rather than for user-facing output.  */
+
+void
+lexer::dump_token (FILE *outf, const token *tok)
+{
+  switch (tok->id)
+    {
+    case TOK_ERROR:
+      fprintf (outf, "TOK_ERROR (\"%s\")", tok->u.string);
+      break;
+
+    case TOK_EOF:
+      fprintf (outf, "TOK_EOF");
+      break;
+
+    case TOK_OPEN_SQUARE:
+      fprintf (outf, "TOK_OPEN_SQUARE");
+      break;
+
+    case TOK_OPEN_CURLY:
+      fprintf (outf, "TOK_OPEN_CURLY");
+      break;
+
+    case TOK_CLOSE_SQUARE:
+      fprintf (outf, "TOK_CLOSE_SQUARE");
+      break;
+
+    case TOK_CLOSE_CURLY:
+      fprintf (outf, "TOK_CLOSE_CURLY");
+      break;
+
+    case TOK_COLON:
+      fprintf (outf, "TOK_COLON");
+      break;
+
+    case TOK_COMMA:
+      fprintf (outf, "TOK_COMMA");
+      break;
+
+    case TOK_TRUE:
+      fprintf (outf, "TOK_TRUE");
+      break;
+
+    case TOK_FALSE:
+      fprintf (outf, "TOK_FALSE");
+      break;
+
+    case TOK_NULL:
+      fprintf (outf, "TOK_NULL");
+      break;
+
+    case TOK_STRING:
+      fprintf (outf, "TOK_STRING (\"%s\")", tok->u.string);
+      break;
+
+    case TOK_NUMBER:
+      fprintf (outf, "TOK_NUMBER (%f)", tok->u.number);
+      break;
+
+    default:
+      gcc_unreachable ();
+      break;
+    }
+}
+
+/* Attempt to lex the input buffer, writing the next token to OUT.
+   On errors, TOK_ERROR (or TOK_EOF) is written to OUT.  */
+
+void
+lexer::lex_token (token *out)
+{
+  /* Skip to next non-whitespace char.  */
+  unichar next_char;
+  while (1)
+    {
+      out->index = m_next_char_idx;
+      if (!get_char (next_char))
+	{
+	  out->id = TOK_EOF;
+	  return;
+	}
+      if (next_char != ' '
+	  && next_char != '\t'
+	  && next_char != '\n'
+	  && next_char != '\r')
+	break;
+    }
+
+  switch (next_char)
+    {
+    case '[':
+      out->id = TOK_OPEN_SQUARE;
+      break;
+
+    case '{':
+      out->id = TOK_OPEN_CURLY;
+      break;
+
+    case ']':
+      out->id = TOK_CLOSE_SQUARE;
+      break;
+
+    case '}':
+      out->id = TOK_CLOSE_CURLY;
+      break;
+
+    case ':':
+      out->id = TOK_COLON;
+      break;
+
+    case ',':
+      out->id = TOK_COMMA;
+      break;
+
+    case '"':
+      lex_string (out);
+      break;
+
+    case '-':
+    case '0':
+    case '1':
+    case '2':
+    case '3':
+    case '4':
+    case '5':
+    case '6':
+    case '7':
+    case '8':
+    case '9':
+      lex_number (out, next_char);
+      break;
+
+    case 't':
+      /* Handle literal "true".  */
+      if (rest_of_literal ("rue"))
+	{
+	  out->id = TOK_TRUE;
+	  break;
+	}
+      else
+	goto err;
+
+    case 'f':
+      /* Handle literal "false".  */
+      if (rest_of_literal ("alse"))
+	{
+	  out->id = TOK_FALSE;
+	  break;
+	}
+      else
+	goto err;
+
+    case 'n':
+      /* Handle literal "null".  */
+      if (rest_of_literal ("ull"))
+	{
+	  out->id = TOK_NULL;
+	  break;
+	}
+      else
+	goto err;
+
+    err:
+    default:
+      out->id = TOK_ERROR;
+      out->u.string = xasprintf ("unexpected character: %c", next_char);
+      break;
+    }
+}
+
+/* Having consumed an open-quote character from the lexer's buffer, attempt
+   to lex the rest of a JSON string, writing the result to OUT (or TOK_ERROR)
+   if an error occurred.
+   (ECMA-404 section 9; RFC 7159 section 7).  */
+
+void
+lexer::lex_string (token *out)
+{
+  auto_vec<unichar> content;
+  bool still_going = true;
+  while (still_going)
+    {
+      unichar uc;
+      if (!get_char (uc))
+	{
+	  out->id = TOK_ERROR;
+	  out->u.string = xstrdup ("EOF within string");
+	  return;
+	}
+      switch (uc)
+	{
+	case '"':
+	  still_going = false;
+	  break;
+	case '\\':
+	  {
+	    unichar next_char;
+	    if (!get_char (next_char))
+	      {
+		out->id = TOK_ERROR;
+		out->u.string = xstrdup ("EOF within string");;
+		return;
+	      }
+	    switch (next_char)
+	      {
+	      case '"':
+	      case '\\':
+	      case '/':
+		content.safe_push (next_char);
+		break;
+
+	      case 'b':
+		content.safe_push ('\b');
+		break;
+
+	      case 'f':
+		content.safe_push ('\f');
+		break;
+
+	      case 'n':
+		content.safe_push ('\n');
+		break;
+
+	      case 'r':
+		content.safe_push ('\r');
+		break;
+
+	      case 't':
+		content.safe_push ('\t');
+		break;
+
+	      case 'u':
+		{
+		  unichar result = 0;
+		  for (int i = 0; i < 4; i++)
+		    {
+		      unichar hexdigit;
+		      if (!get_char (hexdigit))
+			{
+			  out->id = TOK_ERROR;
+			  out->u.string = xstrdup ("EOF within string");
+			  return;
+			}
+		      result <<= 4;
+		      if (hexdigit >= '0' && hexdigit <= '9')
+			result += hexdigit - '0';
+		      else if (hexdigit >= 'a' && hexdigit <= 'f')
+			result += (hexdigit - 'a') + 10;
+		      else if (hexdigit >= 'A' && hexdigit <= 'F')
+			result += (hexdigit - 'A') + 10;
+		      else
+			{
+			  out->id = TOK_ERROR;
+			  out->u.string = xstrdup ("bogus hex char");
+			  return;
+			}
+		    }
+		  content.safe_push (result);
+		}
+		break;
+
+	      default:
+		out->id = TOK_ERROR;
+		out->u.string = xstrdup ("unrecognized escape char");
+		return;
+	      }
+	  }
+	  break;
+
+	default:
+	  /* Reject unescaped control characters U+0000 through U+001F
+	     (ECMA-404 section 9 para 1; RFC 7159 section 7 para 1).  */
+	  if (uc <= 0x1f)
+	    {
+		out->id = TOK_ERROR;
+		out->u.string = xstrdup ("unescaped control char");
+		return;
+	    }
+
+	  /* Otherwise, add regular unicode code point.  */
+	  content.safe_push (uc);
+	  break;
+	}
+    }
+
+  out->id = TOK_STRING;
+
+  auto_vec<char> utf8_buf;
+  // FIXME: adapted from libcpp/charset.c:one_cppchar_to_utf8
+  for (unsigned i = 0; i < content.length (); i++)
+    {
+      static const uchar masks[6] =  { 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
+      static const uchar limits[6] = { 0x80, 0xE0, 0xF0, 0xF8, 0xFC, 0xFE };
+      size_t nbytes;
+      uchar buf[6], *p = &buf[6];
+      unichar c = content[i];
+
+      nbytes = 1;
+      if (c < 0x80)
+	*--p = c;
+      else
+	{
+	  do
+	    {
+	      *--p = ((c & 0x3F) | 0x80);
+	      c >>= 6;
+	      nbytes++;
+	    }
+	  while (c >= 0x3F || (c & limits[nbytes-1]));
+	  *--p = (c | masks[nbytes-1]);
+	}
+
+      while (p < &buf[6])
+	utf8_buf.safe_push (*p++);
+    }
+
+  out->u.string = XNEWVEC (char, utf8_buf.length () + 1);
+  for (unsigned i = 0; i < utf8_buf.length (); i++)
+    out->u.string[i] = utf8_buf[i];
+  out->u.string[utf8_buf.length ()] = '\0';
+
+  // FIXME: leaks?  have a json_context do the allocation
+}
+
+/* Having consumed FIRST_CHAR, an initial digit or '-' character from
+   the lexer's buffer attempt to lex the rest of a JSON number, writing
+   the result to OUT (or TOK_ERROR) if an error occurred.
+   (ECMA-404 section 8; RFC 7159 section 6).  */
+
+void
+lexer::lex_number (token *out, unichar first_char)
+{
+  bool negate = false;
+  double value = 0.0;
+  if (first_char == '-')
+    {
+      negate = true;
+      if (!get_char (first_char))
+	{
+	  out->id = TOK_ERROR;
+	  out->u.string = xstrdup ("expected digit");
+	  return;
+	}
+    }
+
+  if (first_char == '0')
+    value = 0.0;
+  else if (!ISDIGIT (first_char))
+    {
+      out->id = TOK_ERROR;
+      out->u.string = xstrdup ("expected digit");
+      return;
+    }
+  else
+    {
+      /* Got a nonzero digit; expect zero or more digits.  */
+      value = first_char - '0';
+      while (1)
+	{
+	  unichar uc;
+	  if (!get_char (uc))
+	    break;
+	  if (ISDIGIT (uc))
+	    {
+	      value *= 10;
+	      value += uc -'0';
+	      continue;
+	    }
+	  else
+	    {
+	      unget_char ();
+	      break;
+	    }
+	}
+    }
+
+  /* Optional '.', followed by one or more decimals.  */
+  unichar next_char;
+  if (get_char (next_char))
+    {
+      if (next_char == '.')
+	{
+	  /* Parse decimal digits.  */
+	  bool had_digit = false;
+	  // FIXME: does this lose too much precision?
+	  double digit_factor = 0.1;
+	  while (get_char (next_char))
+	    {
+	      if (!ISDIGIT (next_char))
+		{
+		  unget_char ();
+		  break;
+		}
+	      value += (next_char - '0') * digit_factor;
+	      digit_factor *= 0.1;
+	      had_digit = true;
+	    }
+	  if (!had_digit)
+	    {
+	      out->id = TOK_ERROR;
+	      out->u.string = xstrdup ("expected digit");
+	      return;
+	    }
+	}
+      else
+	unget_char ();
+    }
+
+  /* Parse 'e' and 'E'.  */
+  unichar exponent_char;
+  if (get_char (exponent_char))
+    {
+      if (exponent_char == 'e' || exponent_char == 'E')
+	{
+	  /* Optional +/-.  */
+	  unichar sign_char;
+	  int exponent = 0;
+	  bool negate_exponent = false;
+	  bool had_exponent_digit = false;
+	  if (!get_char (sign_char))
+	    {
+	      out->id = TOK_ERROR;
+	      out->u.string = xstrdup ("EOF within exponent");
+	      return;
+	    }
+	  if (sign_char == '-')
+	    negate_exponent = true;
+	  else if (sign_char == '+')
+	    ;
+	  else if (ISDIGIT (sign_char))
+	    {
+	      exponent = sign_char - '0';
+	      had_exponent_digit = true;
+	    }
+	  else
+	    {
+	      out->id = TOK_ERROR;
+	      out->u.string
+		= xstrdup ("expected '-','+' or digit within exponent");
+	      return;
+	    }
+
+	  /* One or more digits (we might have seen the digit above,
+	     though).  */
+	  while (1)
+	    {
+	      unichar uc;
+	      if (!get_char (uc))
+		break;
+	      if (ISDIGIT (uc))
+		{
+		  exponent *= 10;
+		  exponent += uc -'0';
+		  had_exponent_digit = true;
+		  continue;
+		}
+	      else
+		{
+		  unget_char ();
+		  break;
+		}
+	    }
+	  if (!had_exponent_digit)
+	    {
+	      out->id = TOK_ERROR;
+	      out->u.string = xstrdup ("expected digit within exponent");
+	      return;
+	    }
+	  if (negate_exponent)
+	    exponent = -exponent;
+	  /* FIXME: better way to do this?  */
+	  value = value * pow (10, exponent);
+	}
+      else
+	unget_char ();
+    }
+
+  if (negate)
+    value = -value;
+
+  out->id = TOK_NUMBER;
+  out->u.number = value;
+}
+
+/* Determine if the next characters to be lexed match SUFFIX.
+   SUFFIX must be pure ASCII.
+   If so, consume the characters and return true.
+   Otherwise, return false.  */
+
+bool
+lexer::rest_of_literal (const char *suffix)
+{
+  int suffix_idx = 0;
+  int buf_idx = m_next_char_idx;
+  while (1)
+    {
+      if (suffix[suffix_idx] == '\0')
+	{
+	  m_next_char_idx += suffix_idx;
+	  return true;
+	}
+      if (buf_idx >= (int)m_buffer.length ())
+	return false;
+      /* This assumes that suffix is ASCII.  */
+      if (m_buffer[buf_idx] != (unichar)suffix[suffix_idx])
+	return false;
+      buf_idx++;
+      suffix_idx++;
+    }
+}
+
+/* parser's ctor.  */
+
+parser::parser (char **err_out)
+: m_lexer (), m_err_out (err_out)
+{
+  gcc_assert (err_out);
+  gcc_assert (*err_out == NULL);
+  *err_out = NULL;
+}
+
+/* Add LENGTH bytes of UTF-8 encoded text from UTF8_BUF to this parser's
+   lexer's buffer.  */
+
+bool
+parser::add_utf8 (size_t length, const char *utf8_buf, char **err_out)
+{
+  return m_lexer.add_utf8 (length, utf8_buf, err_out);
+}
+
+/* Parse a JSON value (object, array, number, string, or literal).
+   (ECMA-404 section 5; RFC 7159 section 3).  */
+
+value *
+parser::parse_value (int depth)
+{
+  const token *tok = m_lexer.peek ();
+
+  /* Avoid stack overflow with deeply-nested inputs; RFC 7159 section 9
+     states: "An implementation may set limits on the maximum depth
+     of nesting.".
+
+     Ideally we'd avoid this limit (e.g. by rewriting parse_value,
+     parse_object, and parse_array into a single function with a vec of
+     state).  */
+  const int MAX_DEPTH = 100;
+  if (depth >= MAX_DEPTH)
+    {
+      error_at (tok->index, "maximum nesting depth exceeded: %i", MAX_DEPTH);
+      return NULL;
+    }
+
+  switch (tok->id)
+    {
+    case TOK_OPEN_CURLY:
+      return parse_object (depth);
+
+    case TOK_STRING:
+      {
+	string *result = new string (tok->u.string);
+	m_lexer.consume ();
+	return result;
+      }
+
+    case TOK_OPEN_SQUARE:
+      return parse_array (depth);
+
+    case TOK_NUMBER:
+      {
+	number *result = new number (tok->u.number);
+	m_lexer.consume ();
+	return result;
+      }
+
+    case TOK_TRUE:
+      {
+	literal *result = new literal (JSON_TRUE);
+	m_lexer.consume ();
+	return result;
+      }
+
+    case TOK_FALSE:
+      {
+	literal *result = new literal (JSON_FALSE);
+	m_lexer.consume ();
+	return result;
+      }
+
+    case TOK_NULL:
+      {
+	literal *result = new literal (JSON_NULL);
+	m_lexer.consume ();
+	return result;
+      }
+
+    default:
+      error_at (tok->index, "unexpected token: %s", token_id_name[tok->id]);
+      return NULL;
+    }
+}
+
+/* Parse a JSON object.
+   (ECMA-404 section 6; RFC 7159 section 4).  */
+
+object *
+parser::parse_object (int depth)
+{
+  require (TOK_OPEN_CURLY);
+
+  object *result = new object ();
+
+  const token *tok = m_lexer.peek ();
+  if (tok->id == TOK_CLOSE_CURLY)
+    {
+      require (TOK_CLOSE_CURLY);
+      return result;
+    }
+  if (tok->id != TOK_STRING)
+    {
+      error_at (tok->index, "expected string for object key");
+      return result;
+    }
+  while (!seen_error_p ())
+    {
+      tok = m_lexer.peek ();
+      if (tok->id != TOK_STRING)
+	{
+	  error_at (tok->index, "expected string for object key");
+	  return result;
+	}
+      char *key = xstrdup (tok->u.string);
+      m_lexer.consume ();
+
+      require (TOK_COLON);
+
+      value *v = parse_value (depth + 1);
+      if (!v)
+	{
+	  free (key);
+	  return result;
+	}
+      /* We don't enforce uniqueness for keys.  */
+      result->set (key, v);
+      free (key);
+
+      tok = m_lexer.peek ();
+      if (tok->id == TOK_COMMA)
+	{
+	  m_lexer.consume ();
+	  continue;
+	}
+      else
+	{
+	  require (TOK_CLOSE_CURLY);
+	  break;
+	}
+    }
+  return result;
+}
+
+/* Parse a JSON array.
+   (ECMA-404 section 7; RFC 7159 section 5).  */
+
+array *
+parser::parse_array (int depth)
+{
+  require (TOK_OPEN_SQUARE);
+
+  array *result = new array ();
+
+  const token *tok = m_lexer.peek ();
+  if (tok->id == TOK_CLOSE_SQUARE)
+    {
+      m_lexer.consume ();
+      return result;
+    }
+
+  while (!seen_error_p ())
+    {
+      value *v = parse_value (depth + 1);
+      if (!v)
+	return result;
+
+      result->append (v);
+
+      tok = m_lexer.peek ();
+      if (tok->id == TOK_COMMA)
+	{
+	  m_lexer.consume ();
+	  continue;
+	}
+      else
+	{
+	  require (TOK_CLOSE_SQUARE);
+	  break;
+	}
+    }
+
+  return result;
+}
+
+/* Require an EOF, or fail if there is surplus input.  */
+
+void
+parser::require_eof ()
+{
+  require (TOK_EOF);
+}
+
+/* Consume the next token, issuing an error if it is not of kind TOK_ID.  */
+
+void
+parser::require (enum token_id tok_id)
+{
+  const token *tok = m_lexer.peek ();
+  if (tok->id != tok_id)
+    {
+      if (tok->id == TOK_ERROR)
+	error_at (tok->index, "expected %s; got bad token: %s",
+		  token_id_name[tok_id], tok->u.string);
+      else
+	error_at (tok->index, "expected %s; got %s", token_id_name[tok_id],
+		  token_id_name[tok->id]);
+    }
+  m_lexer.consume ();
+}
+
+/* Issue a parsing error.  If this is the first error that has occurred on
+   the parser, store it within the parser's m_err_out (the buffer will
+   eventually need to be free by the caller of the parser).
+   Otherwise the error is discarded.
+
+   TODO: maybe provide a callback so that client code can print all errors?  */
+
+void
+parser::error_at (int index, const char *fmt, ...)
+{
+  va_list ap;
+  va_start (ap, fmt);
+  char *formatted = xvasprintf (fmt, ap);
+  va_end (ap);
+
+  char *msg_with_index = xasprintf ("error at index %i: %s",
+				    index, formatted);
+  free (formatted);
+
+  if (0)
+    fprintf (stderr, "%s\n", msg_with_index);
+  if (*m_err_out == NULL)
+    *m_err_out = msg_with_index;
+  else
+    free (msg_with_index);
+}
+
+/* Attempt to parse the UTF-8 encoded buffer at UTF8_BUF
+   of the given LENGTH.
+   If successful, return a non-NULL json::value *.
+   if there was a problem, return NULL and write an error
+   message to err_out, which must be freed by the caller.  */
+
+value *
+json::parse_utf8_string (size_t length, const char *utf8_buf,
+			 char **err_out)
+{
+  gcc_assert (err_out);
+  gcc_assert (*err_out == NULL);
+
+  parser p (err_out);
+  if (!p.add_utf8 (length, utf8_buf, err_out))
+    return NULL;
+  value *result = p.parse_value (0);
+  if (!p.seen_error_p ())
+    p.require_eof ();
+  if (p.seen_error_p ())
+    {
+      gcc_assert (*err_out);
+      delete result;
+      return NULL;
+    }
+  return result;
+}
+
+/* Attempt to parse the nil-terminated UTF-8 encoded buffer at
+   UTF8_BUF.
+   If successful, return a non-NULL json::value *.
+   if there was a problem, return NULL and write an error
+   message to err_out, which must be freed by the caller.  */
+
+value *
+json::parse_utf8_string (const char *utf8, char **err_out)
+{
+  return parse_utf8_string (strlen (utf8), utf8, err_out);
+}
+
+
+#if CHECKING_P
+
+namespace selftest {
+
+/* Selftests.  */
+
+/* Verify that JV->to_str () equals EXPECTED_JSON.  */
+
+static void
+assert_to_str_eq (const char *expected_json, json::value *jv)
+{
+  char *json = jv->to_str ();
+  ASSERT_STREQ (expected_json, json);
+  free (json);
+}
+
+/* FIXME.  */
+
+static void
+test_parse_string ()
+{
+  char *err = NULL;
+  json::value *jv = parse_utf8_string ("\"foo\"", &err);
+  ASSERT_EQ (NULL, err);
+  ASSERT_EQ (JSON_STRING, jv->get_kind ());
+  ASSERT_STREQ ("foo", ((json::string *)jv)->get_string ());
+  assert_to_str_eq ("\"foo\"", jv);
+
+  json::value *clone = jv->clone ();
+  ASSERT_EQ (JSON_STRING, clone->get_kind ());
+  ASSERT_STREQ ("foo", ((json::string *)clone)->get_string ());
+  assert_to_str_eq ("\"foo\"", clone);
+  delete clone;
+  delete jv;
+
+  const char *contains_quotes = "\"before \\\"quoted\\\" after\"";
+  jv = parse_utf8_string (contains_quotes, &err);
+  ASSERT_EQ (NULL, err);
+  ASSERT_EQ (JSON_STRING, jv->get_kind ());
+  ASSERT_STREQ ("before \"quoted\" after", ((json::string *)jv)->get_string ());
+  assert_to_str_eq (contains_quotes, jv);
+  delete jv;
+
+  /* Test of non-ASCII input.  This string is the Japanese word "mojibake",
+     written as C octal-escaped UTF-8.  */
+  const char *mojibake = (/* Opening quote.  */
+			  "\""
+			  /* U+6587 CJK UNIFIED IDEOGRAPH-6587
+			     UTF-8: 0xE6 0x96 0x87
+			     C octal escaped UTF-8: \346\226\207.  */
+			  "\346\226\207"
+			  /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57
+			     UTF-8: 0xE5 0xAD 0x97
+			     C octal escaped UTF-8: \345\255\227.  */
+			  "\345\255\227"
+			 /* U+5316 CJK UNIFIED IDEOGRAPH-5316
+			      UTF-8: 0xE5 0x8C 0x96
+			      C octal escaped UTF-8: \345\214\226.  */
+			  "\345\214\226"
+			 /* U+3051 HIRAGANA LETTER KE
+			      UTF-8: 0xE3 0x81 0x91
+			      C octal escaped UTF-8: \343\201\221.  */
+			  "\343\201\221"
+			  /* Closing quote.  */
+			  "\"");
+  jv = parse_utf8_string (mojibake, &err);
+  ASSERT_EQ (NULL, err);
+  ASSERT_EQ (JSON_STRING, jv->get_kind ());
+  /* Result of get_string should be UTF-8 encoded, without quotes.  */
+  ASSERT_STREQ ("\346\226\207" "\345\255\227" "\345\214\226" "\343\201\221",
+		((json::string *)jv)->get_string ());
+  /* Result of dump should be UTF-8 encoded, with quotes.  */
+  assert_to_str_eq (mojibake, jv);
+  delete jv;
+
+  /* Test of \u-escaped unicode.  This is "mojibake" again, as above.  */
+  const char *escaped_unicode = "\"\\u6587\\u5b57\\u5316\\u3051\"";
+  jv = parse_utf8_string (escaped_unicode, &err);
+  ASSERT_EQ (NULL, err);
+  ASSERT_EQ (JSON_STRING, jv->get_kind ());
+  /* Result of get_string should be UTF-8 encoded, without quotes.  */
+  ASSERT_STREQ ("\346\226\207" "\345\255\227" "\345\214\226" "\343\201\221",
+		((json::string *)jv)->get_string ());
+  /* Result of dump should be UTF-8 encoded, with quotes.  */
+  assert_to_str_eq (mojibake, jv);
+  delete jv;
+}
+
+/* FIXME.  */
+
+static void
+test_parse_number ()
+{
+  json::value *jv, *clone;
+
+  char *err = NULL;
+  jv = parse_utf8_string ("42", &err);
+  ASSERT_EQ (NULL, err);
+  ASSERT_EQ (JSON_NUMBER, jv->get_kind ());
+  ASSERT_EQ (42.0, ((json::number *)jv)->get ());
+  assert_to_str_eq ("42", jv);
+  clone = jv->clone ();
+  ASSERT_EQ (JSON_NUMBER, clone->get_kind ());
+  delete clone;
+  delete jv;
+
+  /* Negative number.  */
+  jv = parse_utf8_string ("-17", &err);
+  ASSERT_EQ (NULL, err);
+  ASSERT_EQ (JSON_NUMBER, jv->get_kind ());
+  ASSERT_EQ (-17.0, ((json::number *)jv)->get ());
+  assert_to_str_eq ("-17", jv);
+  delete jv;
+
+  /* Decimal.  */
+  jv = parse_utf8_string ("3.141", &err);
+  ASSERT_EQ (NULL, err);
+  ASSERT_EQ (JSON_NUMBER, jv->get_kind ());
+  ASSERT_EQ (3.141, ((json::number *)jv)->get ());
+  assert_to_str_eq ("3.141", jv);
+  delete jv;
+
+  /* Exponents.  */
+  jv = parse_utf8_string ("3.141e+0", &err);
+  ASSERT_EQ (NULL, err);
+  ASSERT_EQ (JSON_NUMBER, jv->get_kind ());
+  ASSERT_EQ (3.141, ((json::number *)jv)->get ());
+  assert_to_str_eq ("3.141", jv);
+  delete jv;
+
+  jv = parse_utf8_string ("42e2", &err);
+  ASSERT_EQ (NULL, err);
+  ASSERT_EQ (JSON_NUMBER, jv->get_kind ());
+  ASSERT_EQ (4200, ((json::number *)jv)->get ());
+  assert_to_str_eq ("4200", jv);
+  delete jv;
+
+  jv = parse_utf8_string ("42e-1", &err);
+  ASSERT_EQ (NULL, err);
+  ASSERT_EQ (JSON_NUMBER, jv->get_kind ());
+  ASSERT_EQ (4.2, ((json::number *)jv)->get ());
+  assert_to_str_eq ("4.2", jv);
+  delete jv;
+
+}
+
+/* FIXME.  */
+
+static void
+test_parse_array ()
+{
+  json::value *jv, *clone;
+
+  char *err = NULL;
+  jv = parse_utf8_string ("[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]", &err);
+  ASSERT_EQ (NULL, err);
+  ASSERT_EQ (JSON_ARRAY, jv->get_kind ());
+  json::array *arr = static_cast <json::array *> (jv);
+  ASSERT_EQ (10, arr->get_length ());
+  for (int i = 0; i < 10; i++)
+    {
+      json::value *element = arr->get (i);
+      ASSERT_EQ (JSON_NUMBER, element->get_kind ());
+      ASSERT_EQ (i, ((json::number *)element)->get ());
+    }
+  assert_to_str_eq ("[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]", jv);
+
+  clone = jv->clone ();
+  ASSERT_EQ (JSON_ARRAY, clone->get_kind ());
+  arr = static_cast <json::array *> (clone);
+  ASSERT_EQ (10, arr->get_length ());
+  for (int i = 0; i < 10; i++)
+    {
+      json::value *element = arr->get (i);
+      ASSERT_EQ (JSON_NUMBER, element->get_kind ());
+      ASSERT_EQ (i, ((json::number *)element)->get ());
+    }
+  assert_to_str_eq ("[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]", clone);
+  delete clone;
+
+  delete jv;
+}
+
+/* FIXME.  */
+
+static void
+test_parse_object ()
+{
+  char *err = NULL;
+  json::value *jv
+    = parse_utf8_string ("{\"foo\": \"bar\", \"baz\": [42, null]}", &err);
+  ASSERT_EQ (NULL, err);
+  ASSERT_TRUE (jv != NULL);
+  ASSERT_EQ (JSON_OBJECT, jv->get_kind ());
+  json::object *jo = static_cast <json::object *> (jv);
+
+  json::value *foo_value = jo->get ("foo");
+  ASSERT_TRUE (foo_value != NULL);
+  ASSERT_EQ (JSON_STRING, foo_value->get_kind ());
+  ASSERT_STREQ ("bar", ((json::string *)foo_value)->get_string ());
+
+  json::value *baz_value = jo->get ("baz");
+  ASSERT_TRUE (baz_value != NULL);
+  ASSERT_EQ (JSON_ARRAY, baz_value->get_kind ());
+
+  json::array *baz_array = (json::array *)baz_value;
+  ASSERT_EQ (2, baz_array->get_length ());
+  ASSERT_EQ (42, baz_array->get (0)->as_number ()->get ());
+  ASSERT_EQ (JSON_NULL, baz_array->get (1)->get_kind ());
+
+  // TODO: error-handling
+  // TODO: partial document
+
+  /* We can't use assert_to_str_eq since ordering is not guaranteed.  */
+
+  json::value *clone = jv->clone ();
+  ASSERT_EQ (JSON_OBJECT, clone->get_kind ());
+  ASSERT_EQ (JSON_STRING, clone->as_object ()->get ("foo")->get_kind ());
+  delete clone;
+
+  delete jv;
+}
+
+/* Verify that the literals "true", "false" and "null" are parsed,
+   dumped, and are clonable.  */
+
+static void
+test_parse_literals ()
+{
+  json::value *jv, *clone;
+  char *err = NULL;
+  jv = parse_utf8_string ("true", &err);
+  ASSERT_EQ (NULL, err);
+  ASSERT_TRUE (jv != NULL);
+  ASSERT_EQ (JSON_TRUE, jv->get_kind ());
+  assert_to_str_eq ("true", jv);
+  clone = jv->clone ();
+  ASSERT_EQ (JSON_TRUE, clone->get_kind ());
+  delete clone;
+  delete jv;
+
+  jv = parse_utf8_string ("false", &err);
+  ASSERT_EQ (NULL, err);
+  ASSERT_TRUE (jv != NULL);
+  ASSERT_EQ (JSON_FALSE, jv->get_kind ());
+  assert_to_str_eq ("false", jv);
+  clone = jv->clone ();
+  ASSERT_EQ (JSON_FALSE, clone->get_kind ());
+  delete clone;
+  delete jv;
+
+  jv = parse_utf8_string ("null", &err);
+  ASSERT_EQ (NULL, err);
+  ASSERT_TRUE (jv != NULL);
+  ASSERT_EQ (JSON_NULL, jv->get_kind ());
+  assert_to_str_eq ("null", jv);
+  clone = jv->clone ();
+  ASSERT_EQ (JSON_NULL, clone->get_kind ());
+  delete clone;
+  delete jv;
+}
+
+/* FIXME.  */
+
+static void
+test_parse_jsonrpc ()
+{
+  char *err = NULL;
+  const char *request
+    = ("{\"jsonrpc\": \"2.0\", \"method\": \"subtract\","
+       " \"params\": [42, 23], \"id\": 1}");
+  json::value *jv = parse_utf8_string (request, &err);
+  ASSERT_EQ (NULL, err);
+  ASSERT_TRUE (jv != NULL);
+  delete jv;
+}
+
+/* FIXME.  */
+
+static void
+test_parse_empty_object ()
+{
+  char *err = NULL;
+  json::value *jv = parse_utf8_string ("{}", &err);
+  ASSERT_EQ (NULL, err);
+  ASSERT_TRUE (jv != NULL);
+  ASSERT_EQ (JSON_OBJECT, jv->get_kind ());
+  assert_to_str_eq ("{}", jv);
+  delete jv;
+}
+
+/* FIXME.  */
+
+static void
+test_error_empty_string ()
+{
+  char *err = NULL;
+  json::value *jv = parse_utf8_string ("", &err);
+  ASSERT_STREQ ("error at index 0: unexpected token: EOF", err);
+  ASSERT_TRUE (jv == NULL);
+  free (err);
+}
+
+/* FIXME.  */
+
+static void
+test_error_missing_comma ()
+{
+  char *err = NULL;
+  /*                  01234567.  */
+  const char *json = "[0, 1 2]";
+  json::value *jv = parse_utf8_string (json, &err);
+  ASSERT_STREQ ("error at index 6: expected ']'; got number",
+		err);
+  // FIXME: unittest the lexer?
+  ASSERT_TRUE (jv == NULL);
+  free (err);
+}
+
+/* Run all of the selftests within this file.  */
+
+void
+json_c_tests ()
+{
+  test_parse_string ();
+  test_parse_number ();
+  test_parse_array ();
+  test_parse_object ();
+  test_parse_literals ();
+  test_parse_jsonrpc ();
+  test_parse_empty_object ();
+  test_error_empty_string ();
+  test_error_missing_comma ();
+
+  /* FIXME: tests for roundtripping (noting that we don't preserve
+     object key ordering).  */
+
+  /* FIXME: cloning.  */
+}
+
+} // namespace selftest
+
+#endif /* #if CHECKING_P */
diff --git a/gcc/json.h b/gcc/json.h
new file mode 100644
index 0000000..aedf84a
--- /dev/null
+++ b/gcc/json.h
@@ -0,0 +1,214 @@
+/* JSON parsing
+   Copyright (C) 2017 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_JSON_H
+#define GCC_JSON_H
+
+/* Implementation of JSON, a lightweight data-interchange format.
+
+   See http://www.json.org/
+   and http://www.ecma-international.org/publications/files/ECMA-ST/ECMA-404.pdf
+   and https://tools.ietf.org/html/rfc7159
+
+   Supports parsing text into a DOM-like tree of json::value *, dumping
+   json::value * to text.  */
+
+namespace json
+{
+
+/* Forward decls of json::value and its subclasses (using indentation
+   to denote inheritance.  */
+
+class value;
+  class object;
+  class array;
+  class number;
+  class string;
+  class literal;
+
+/* An enum for discriminating the subclasses of json::value.  */
+
+enum kind
+{
+  /* class json::object.  */
+  JSON_OBJECT,
+
+  /* class json::array.  */
+  JSON_ARRAY,
+
+  /* class json::number.  */
+  JSON_NUMBER,
+
+  /* class json::string.  */
+  JSON_STRING,
+
+  /* class json::literal uses these three values to identify the
+     particular literal.  */
+  JSON_TRUE,
+  JSON_FALSE,
+  JSON_NULL
+};
+
+/* Base class of JSON value.  */
+
+class value
+{
+ public:
+  virtual ~value () {}
+  virtual enum kind get_kind () const = 0;
+  virtual void print (pretty_printer *pp) const = 0;
+
+  /* Create a deep copy of the value, returning a value which must be
+     deleted by the caller.  */
+  virtual value *clone () const = 0;
+
+  char *to_str () const;
+  void dump (FILE *) const;
+
+  /* Methods for dynamically casting a value to one of the subclasses,
+     returning NULL if the value is of the wrong kind.  */
+  const object *as_object () const;
+  const array *as_array () const;
+  const number *as_number () const;
+  const string *as_string () const;
+
+  /* Convenience accessors for attempting to perform key/value lookups
+     on this value as if it were an json::object.
+
+     On success, return true and write the value to OUT_VALUE.
+     On failure, return false and write an error message to OUT_ERR
+     (which must be freed by the caller).  */
+  bool get_value_by_key (const char *name, const value *&out_value,
+			 char *&out_err) const;
+  bool get_int_by_key (const char *name, int &out_value, char *&out_err) const;
+  bool get_string_by_key (const char *name, const char *&out_value,
+			  char *&out_err) const;
+  bool get_array_by_key (const char *name, const array *&out_value,
+			 char *&out_err) const;
+
+  /* As above, but the key is optional.  THIS must still be an object,
+     though.  */
+  bool get_optional_value_by_key (const char *name, const value *&out_value,
+				  char *&out_err) const;
+  bool get_optional_string_by_key (const char *name, const char *&out_value,
+				   char *&out_err) const;
+};
+
+/* Subclass of value for objects: an unordered collection of
+   key/value pairs.  */
+
+class object : public value
+{
+ public:
+  ~object ();
+
+  enum kind get_kind () const FINAL OVERRIDE { return JSON_OBJECT; }
+  void print (pretty_printer *pp) const FINAL OVERRIDE;
+  value *clone () const FINAL OVERRIDE;
+
+  value *get (const char *key) const;
+  value *get_if_nonnull (const char *key) const;
+
+  void set (const char *key, value *v);
+
+ private:
+  typedef hash_map <char *, value *,
+    simple_hashmap_traits<nofree_string_hash, value *> > map_t;
+  map_t m_map;
+};
+
+/* Subclass of value for arrays.  */
+
+class array : public value
+{
+ public:
+  ~array ();
+
+  enum kind get_kind () const FINAL OVERRIDE { return JSON_ARRAY; }
+  void print (pretty_printer *pp) const FINAL OVERRIDE;
+  value *clone () const FINAL OVERRIDE;
+
+  unsigned get_length () const { return m_elements.length (); }
+  value *get (int idx) const { return m_elements[idx]; }
+  void append (value *v) { m_elements.safe_push (v); }
+
+ private:
+  auto_vec<value *> m_elements;
+};
+
+/* Subclass of value for numbers.  */
+
+class number : public value
+{
+ public:
+  number (double value) : m_value (value) {}
+
+  enum kind get_kind () const FINAL OVERRIDE { return JSON_NUMBER; }
+  void print (pretty_printer *pp) const FINAL OVERRIDE;
+  value *clone () const FINAL OVERRIDE;
+
+  double get () const { return m_value; }
+
+ private:
+  double m_value;
+};
+
+/* Subclass of value for strings.  */
+
+class string : public value
+{
+ public:
+  string (const char *utf8) : m_utf8 (xstrdup (utf8)) {}
+  ~string () { free (m_utf8); }
+
+  enum kind get_kind () const FINAL OVERRIDE { return JSON_STRING; }
+  void print (pretty_printer *pp) const FINAL OVERRIDE;
+  value *clone () const FINAL OVERRIDE;
+
+  const char *get_string () const { return m_utf8; }
+
+ private:
+  char *m_utf8;
+};
+
+/* Subclass of value for the three JSON literals "true", "false",
+   and "null".  */
+
+class literal : public value
+{
+ public:
+  literal (enum kind kind) : m_kind (kind) {}
+
+  enum kind get_kind () const FINAL OVERRIDE { return m_kind; }
+  void print (pretty_printer *pp) const FINAL OVERRIDE;
+  value *clone () const FINAL OVERRIDE;
+
+ private:
+  enum kind m_kind;
+};
+
+/* Declarations for parsing JSON to a json::value * tree.  */
+
+extern value *parse_utf8_string (size_t length, const char *utf8_buf,
+				 char **err_out);
+extern value *parse_utf8_string (const char *utf8, char **err_out);
+
+} // namespace json
+
+#endif  /* GCC_JSON_H  */
diff --git a/gcc/selftest-run-tests.c b/gcc/selftest-run-tests.c
index 01e3504..a8668f4 100644
--- a/gcc/selftest-run-tests.c
+++ b/gcc/selftest-run-tests.c
@@ -67,6 +67,7 @@ selftest::run_tests ()
   fibonacci_heap_c_tests ();
   typed_splay_tree_c_tests ();
   blt_c_tests ();
+  json_c_tests ();
 
   /* Mid-level data structures.  */
   input_c_tests ();
diff --git a/gcc/selftest.h b/gcc/selftest.h
index 583bdb2..281658b 100644
--- a/gcc/selftest.h
+++ b/gcc/selftest.h
@@ -184,6 +184,7 @@ extern void ggc_tests_c_tests ();
 extern void hash_map_tests_c_tests ();
 extern void hash_set_tests_c_tests ();
 extern void input_c_tests ();
+extern void json_c_tests ();
 extern void pretty_print_c_tests ();
 extern void read_rtl_function_c_tests ();
 extern void rtl_tests_c_tests ();
-- 
1.8.5.3
References:
- [PATCH 00/17] RFC: New source-location representation; Language Server Protocol
  - From: David Malcolm
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]