Browse Source

[5014] Initial JSON parser written in bison/flex implemented (wip)

Tomek Mrugalski 8 years ago
parent
commit
14602ad0d8

+ 28 - 0
src/bin/dhcp6/Makefile.am

@@ -64,6 +64,10 @@ libdhcp6_la_SOURCES += ctrl_dhcp6_srv.cc ctrl_dhcp6_srv.h
 libdhcp6_la_SOURCES += json_config_parser.cc json_config_parser.h
 libdhcp6_la_SOURCES += dhcp6to4_ipc.cc dhcp6to4_ipc.h
 
+libdhcp6_la_SOURCES += dhcp6_lexer.ll location.hh position.hh stack.hh
+libdhcp6_la_SOURCES += dhcp6_parser.cc dhcp6_parser.h
+libdhcp6_la_SOURCES += parser_context.cc parser_context.h
+
 libdhcp6_la_SOURCES += kea_controller.cc
 
 nodist_libdhcp6_la_SOURCES = dhcp6_messages.h dhcp6_messages.cc
@@ -105,3 +109,27 @@ endif
 
 kea_dhcp6dir = $(pkgdatadir)
 kea_dhcp6_DATA = dhcp6.spec
+
+if GENERATE_PARSER
+
+parser: dhcp6_lexer.cc location.hh position.hh stack.hh dhcp6_parser.cc dhcp6_parser.h
+	@echo "Flex/bison files regenerated"
+
+# --- Flex/Bison stuff below --------------------------------------------------
+# When debugging grammar issues, it's useful to add -v to bison parameters.
+# bison will generate parser.output file that explains the whole grammar.
+# It can be used to manually follow what's going on in the parser.
+# This is especially useful if yydebug_ is set to 1 as that variable
+# will cause parser to print out its internal state.
+location.hh position.hh stack.hh dhcp6_parser.cc dhcp6_parser.h: dhcp6_parser.yy
+	$(YACC) --defines=dhcp6_parser.h -o dhcp6_parser.cc dhcp6_parser.yy
+
+dhcp6_lexer.cc: dhcp6_lexer.ll
+	$(LEX) -o dhcp6_lexer.cc dhcp6_lexer.ll
+
+else
+
+parser location.hh position.hh stack.hh dhcp6_parser.cc dhcp6_parser.h dhcp6_lexer.cc:
+	@echo Parser generation disabled. Configure with --enable-generate-parser to enable it.
+
+endif

+ 187 - 0
src/bin/dhcp6/dhcp6_lexer.ll

@@ -0,0 +1,187 @@
+/* Copyright (C) 2015-2016 Internet Systems Consortium, Inc. ("ISC")
+
+   This Source Code Form is subject to the terms of the Mozilla Public
+   License, v. 2.0. If a copy of the MPL was not distributed with this
+   file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+%{ /* -*- C++ -*- */
+#include <cerrno>
+#include <climits>
+#include <cstdlib>
+#include <string>
+#include <dhcp6/parser_context.h>
+#include <asiolink/io_address.h>
+#include <boost/lexical_cast.hpp>
+
+// Work around an incompatibility in flex (at least versions
+// 2.5.31 through 2.5.33): it generates code that does
+// not conform to C89.  See Debian bug 333231
+// <http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=333231>.
+# undef yywrap
+# define yywrap() 1
+
+// The location of the current token. The lexer will keep updating it. This
+// variable will be useful for logging errors.
+static isc::dhcp::location loc;
+
+// To avoid the call to exit... oops!
+#define YY_FATAL_ERROR(msg) isc::dhcp::Parser6Context::fatal(msg)
+%}
+
+/* noyywrap disables automatic rewinding for the next file to parse. Since we
+   always parse only a single string, there's no need to do any wraps. And
+   using yywrap requires linking with -lfl, which provides the default yywrap
+   implementation that always returns 1 anyway. */
+%option noyywrap
+
+/* nounput simplifies the lexer, by removing support for putting a character
+   back into the input stream. We never use such capability anyway. */
+%option nounput
+
+/* batch means that we'll never use the generated lexer interactively. */
+%option batch
+
+/* Enables debug mode. To see the debug messages, one needs to also set
+   yy_flex_debug to 1, then the debug messages will be printed on stderr. */
+%option debug
+
+/* I have no idea what this option does, except it was specified in the bison
+   examples and Postgres folks added it to remove gcc 4.3 warnings. Let's
+   be on the safe side and keep it. */
+%option noinput
+
+/* This line tells flex to track the line numbers. It's not really that
+   useful for client classes, which typically are one-liners, but it may be
+   useful in more complex cases. */
+%option yylineno
+
+/* These are not token expressions yet, just convenience expressions that
+   can be used during actual token definitions. Note some can match
+   incorrect inputs (e.g., IP addresses) which must be checked. */
+int   \-?[0-9]+
+blank [ \t]
+
+UnicodeEscapeSequence                   u[0-9A-Fa-f]{4}
+JSONEscapeCharacter                     ["\\/bfnrt]
+JSONEscapeSequence                      {JSONEscapeCharacter}|{UnicodeEscapeSequence}
+JSONStringCharacter                     [^"\\]|\\{JSONEscapeSequence}
+JSONString                              \"{JSONStringCharacter}*\"
+
+
+%{
+// This code run each time a pattern is matched. It updates the location
+// by moving it ahead by yyleng bytes. yyleng specifies the length of the
+// currently matched token.
+#define YY_USER_ACTION  loc.columns(yyleng);
+%}
+
+%%
+
+%{
+    // Code run each time yylex is called.
+    loc.step();
+%}
+
+{blank}+   {
+    // Ok, we found a with space. Let's ignore it and update loc variable.
+    loc.step();
+}
+[\n]+      {
+    // Newline found. Let's update the location and continue.
+    loc.lines(yyleng);
+    loc.step();
+}
+
+
+{JSONString} {
+    // A string has been matched. It contains the actual string and single quotes.
+    // We need to get those quotes out of the way and just use its content, e.g.
+    // for 'foo' we should get foo
+    std::string tmp(yytext+1);
+    tmp.resize(tmp.size() - 1);
+
+    return isc::dhcp::Dhcp6Parser::make_STRING(tmp, loc);
+}
+
+"["                                     { return isc::dhcp::Dhcp6Parser::make_LSQUARE_BRACKET(loc); }
+"]"                                     { return isc::dhcp::Dhcp6Parser::make_RSQUARE_BRACKET(loc); }
+"{"                                     { return isc::dhcp::Dhcp6Parser::make_LCURLY_BRACKET(loc); }
+"}"                                     { return isc::dhcp::Dhcp6Parser::make_RCURLY_BRACKET(loc); }
+","                                     { return isc::dhcp::Dhcp6Parser::make_COMMA(loc); }
+":"                                     { return isc::dhcp::Dhcp6Parser::make_COLON(loc); }
+
+{int} {
+    // An integer was found.
+    std::string tmp(yytext);
+    int64_t integer = 0;
+    try {
+        // In substring we want to use negative values (e.g. -1).
+        // In enterprise-id we need to use values up to 0xffffffff.
+        // To cover both of those use cases, we need at least
+        // int64_t.
+        integer = boost::lexical_cast<int64_t>(tmp);
+    } catch (const boost::bad_lexical_cast &) {
+        driver.error(loc, "Failed to convert " + tmp + " to an integer.");
+    }
+
+    // The parser needs the string form as double conversion is no lossless
+    return isc::dhcp::Dhcp6Parser::make_INTEGER(integer, loc);
+}
+[-+]?[0-9]*\.?[0-9]*([eE][-+]?[0-9]+)? {
+    // A floating point was found.
+    std::string tmp(yytext);
+    double fp = 0.0;
+    try {
+        // In substring we want to use negative values (e.g. -1).
+        // In enterprise-id we need to use values up to 0xffffffff.
+        // To cover both of those use cases, we need at least
+        // int64_t.
+        fp = boost::lexical_cast<double>(tmp);
+    } catch (const boost::bad_lexical_cast &) {
+        driver.error(loc, "Failed to convert " + tmp + " to a floating point.");
+    }
+
+    return isc::dhcp::Dhcp6Parser::make_FLOAT(fp, loc);
+}
+
+true|false {
+    string tmp(yytext);
+    return isc::dhcp::Dhcp6Parser::make_BOOLEAN(tmp == "true", loc);
+}
+
+null {
+   return isc::dhcp::Dhcp6Parser::make_NULL_TYPE(loc);
+}
+
+.          driver.error (loc, "Invalid character: " + std::string(yytext));
+<<EOF>>    return isc::dhcp::Dhcp6Parser::make_END(loc);
+%%
+
+using namespace isc::dhcp;
+
+void
+Parser6Context::scanStringBegin()
+{
+    loc.initialize(&file_);
+    yy_flex_debug = trace_scanning_;
+    YY_BUFFER_STATE buffer;
+    buffer = yy_scan_bytes(string_.c_str(), string_.size());
+    if (!buffer) {
+        fatal("cannot scan string");
+        // fatal() throws an exception so this can't be reached
+    }
+}
+
+void
+Parser6Context::scanStringEnd()
+{
+    yy_delete_buffer(YY_CURRENT_BUFFER);
+}
+
+namespace {
+/// To avoid unused function error
+class Dummy {
+    // cppcheck-suppress unusedPrivateFunction
+    void dummy() { yy_fatal_error("Fix me: how to disable its definition?"); }
+};
+}

+ 113 - 0
src/bin/dhcp6/dhcp6_parser.yy

@@ -0,0 +1,113 @@
+/* Copyright (C) 2015-2016 Internet Systems Consortium, Inc. ("ISC")
+
+   This Source Code Form is subject to the terms of the Mozilla Public
+   License, v. 2.0. If a copy of the MPL was not distributed with this
+   file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+%skeleton "lalr1.cc" /* -*- C++ -*- */
+%require "3.0.0"
+%defines
+%define parser_class_name {Dhcp6Parser}
+%define api.token.constructor
+%define api.value.type variant
+%define api.namespace {isc::dhcp}
+%define parse.assert
+%code requires
+{
+#include <string>
+#include <cc/data.h>
+#include <dhcp/option.h>
+#include <boost/lexical_cast.hpp>
+#include <dhcp6/parser_context_decl.h>
+
+using namespace isc::dhcp;
+using namespace isc::data;
+using namespace std;
+}
+// The parsing context.
+%param { isc::dhcp::Parser6Context& ctx }
+%locations
+%define parse.trace
+%define parse.error verbose
+%code
+{
+#include <dhcp6/parser_context.h>
+
+}
+
+%define api.token.prefix {TOKEN_}
+// Tokens in an order which makes sense and related to the intented use.
+%token
+  END  0  "end of file"
+  COMMA ","
+  COLON ":"
+  LSQUARE_BRACKET "["
+  RSQUARE_BRACKET "]"
+  LCURLY_BRACKET "{"
+  RCURLY_BRACKET "}"
+  NULL_TYPE "null"
+;
+
+%token <std::string> STRING "constant string"
+%token <int64_t> INTEGER "integer"
+%token <double> FLOAT "floating point"
+%token <bool> BOOLEAN "boolean"
+
+%type <ElementPtr> value
+
+
+%printer { yyoutput << $$; } <*>;
+
+%%
+// The whole grammar starts with a map, because the config file
+// constists of Dhcp, Logger and DhcpDdns entries in one big { }.
+%start map;
+
+// Values rule
+value : INTEGER { $$ = ElementPtr(new IntElement($1)); }
+     | FLOAT { $$ = ElementPtr(new DoubleElement($1)); }
+     | BOOLEAN { $$ = ElementPtr(new BoolElement($1)); }
+     | STRING { $$ = ElementPtr(new StringElement($1)); }
+     | NULL_TYPE { $$ = ElementPtr(new NullElement()); }
+     | map { $$ = ElementPtr(new MapElement()); }
+     | list { $$ = ElementPtr(new ListElement()); }
+    ;
+
+
+map: LCURLY_BRACKET {
+    ctx.stack_.push_back(ElementPtr(new MapElement()));
+ } map_content RCURLY_BRACKET {
+     ctx.stack_.pop_back();
+ };
+
+// Assignments rule
+map_content:  { /* do nothing, it's an empty map */ }
+    | STRING COLON value {
+        (*ctx.stack_.end())->set($1, $3);
+    }
+    | map COMMA STRING COLON value {
+        (*ctx.stack_.end())->set($3, $5);
+    }
+    ;
+
+list: LSQUARE_BRACKET list_content RSQUARE_BRACKET { };
+
+list_content: { /* do nothing, it's an empty list */ }
+    | value {
+        // List consisting of a single element.
+        (*ctx.stack_.end())->add($1);
+    }
+    | list COMMA value {
+        // List ending with , and a value.
+        (*ctx.stack_.end())->add($3);
+    }
+    ;
+
+%%
+
+void
+isc::dhcp::Dhcp6Parser::error(const location_type& loc,
+                              const std::string& what)
+{
+    ctx.error(loc, what);
+}

+ 62 - 0
src/bin/dhcp6/parser_context.cc

@@ -0,0 +1,62 @@
+// Copyright (C) 2016 Internet Systems Consortium, Inc. ("ISC")
+//
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include <dhcp6/parser_context.h>
+#include <dhcp6/dhcp6_parser.h>
+#include <exceptions/exceptions.h>
+#include <cc/data.h>
+#include <boost/lexical_cast.hpp>
+#include <fstream>
+#include <limits>
+
+namespace isc {
+namespace dhcp {
+
+Parser6Context::Parser6Context()
+  : trace_scanning_(false), trace_parsing_(false)
+{
+}
+
+Parser6Context::~Parser6Context()
+{
+}
+
+isc::data::ConstElementPtr
+Parser6Context::parseString(const std::string& str)
+{
+    file_ = "<string>";
+    string_ = str;
+    scanStringBegin();
+    isc::dhcp::Dhcp6Parser parser(*this);
+    parser.set_debug_level(trace_parsing_);
+    int res = parser.parse();
+    if (res != 0) {
+
+    }
+    scanStringEnd();
+    return (*stack_.end());
+}
+
+void
+Parser6Context::error(const isc::dhcp::location& loc, const std::string& what)
+{
+    isc_throw(EvalParseError, loc << ": " << what);
+}
+
+void
+Parser6Context::error (const std::string& what)
+{
+    isc_throw(EvalParseError, what);
+}
+
+void
+Parser6Context::fatal (const std::string& what)
+{
+    isc_throw(Unexpected, what);
+}
+
+};
+};

+ 97 - 0
src/bin/dhcp6/parser_context.h

@@ -0,0 +1,97 @@
+// Copyright (C) 2015-2016 Internet Systems Consortium, Inc. ("ISC")
+//
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef PARSER_CONTEXT_H
+#define PARSER_CONTEXT_H
+#include <string>
+#include <map>
+#include <vector>
+#include <dhcp6/dhcp6_parser.h>
+#include <dhcp6/parser_context_decl.h>
+#include <exceptions/exceptions.h>
+
+// Tell Flex the lexer's prototype ...
+#define YY_DECL isc::dhcp::Dhcp6Parser::symbol_type yylex (Parser6Context& driver)
+
+// ... and declare it for the parser's sake.
+YY_DECL;
+
+namespace isc {
+namespace dhcp {
+
+/// @brief Evaluation error exception raised when trying to parse an axceptions.
+class EvalParseError : public isc::Exception {
+public:
+    EvalParseError(const char* file, size_t line, const char* what) :
+        isc::Exception(file, line, what) { };
+};
+
+
+/// @brief Evaluation context, an interface to the expression evaluation.
+class Parser6Context
+{
+public:
+    /// @brief Default constructor.
+    ///
+    /// @param option_universe Option universe: DHCPv4 or DHCPv6. This is used
+    /// by the parser to determine which option definitions set should be used
+    /// to map option names to option codes.
+    Parser6Context();
+
+    /// @brief destructor
+    virtual ~Parser6Context();
+
+    /// @brief JSON elements being parsed.
+    std::vector<ElementPtr> stack_;
+
+    /// @brief Method called before scanning starts on a string.
+    void scanStringBegin();
+
+    /// @brief Method called after the last tokens are scanned from a string.
+    void scanStringEnd();
+
+    /// @brief Run the parser on the string specified.
+    ///
+    /// @param str string to be written
+    /// @return true on success.
+    isc::data::ConstElementPtr parseString(const std::string& str);
+
+    /// @brief The name of the file being parsed.
+    /// Used later to pass the file name to the location tracker.
+    std::string file_;
+
+    /// @brief The string being parsed.
+    std::string string_;
+
+    /// @brief Error handler
+    ///
+    /// @param loc location within the parsed file when experienced a problem.
+    /// @param what string explaining the nature of the error.
+    static void error(const isc::dhcp::location& loc, const std::string& what);
+
+    /// @brief Error handler
+    ///
+    /// This is a simplified error reporting tool for possible future
+    /// cases when the EvalParser is not able to handle the packet.
+    static void error(const std::string& what);
+
+    /// @brief Fatal error handler
+    ///
+    /// This is for should not happen but fatal errors
+    static void fatal(const std::string& what);
+
+ private:
+    /// @brief Flag determining scanner debugging.
+    bool trace_scanning_;
+
+    /// @brief Flag determing parser debugging.
+    bool trace_parsing_;
+};
+
+}; // end of isc::eval namespace
+}; // end of isc namespace
+
+#endif

+ 1 - 0
src/bin/dhcp6/tests/Makefile.am

@@ -93,6 +93,7 @@ dhcp6_unittests_SOURCES += dhcp6_message_test.cc dhcp6_message_test.h
 dhcp6_unittests_SOURCES += kea_controller_unittest.cc
 dhcp6_unittests_SOURCES += dhcp6to4_ipc_unittest.cc
 dhcp6_unittests_SOURCES += classify_unittests.cc
+dhcp6_unittests_SOURCES += parser_unittest.cc
 
 nodist_dhcp6_unittests_SOURCES  = marker_file.h test_libraries.h
 

+ 27 - 0
src/bin/dhcp6/tests/parser_unittest.cc

@@ -0,0 +1,27 @@
+// Copyright (C) 2016 Internet Systems Consortium, Inc. ("ISC")
+//
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include <gtest/gtest.h>
+#include <cc/data.h>
+#include <dhcp6/parser_context.h>
+
+using namespace isc::data;
+using namespace std;
+
+namespace {
+
+TEST(ParserTest, basic) {
+
+    Parser6Context ctx;
+
+    string txt = "{ \"Dhcp6\": { } }";
+
+    ConstElementPtr json = ctx.parseString(txt);
+
+    ASSERT_TRUE(json);
+}
+
+};