Browse Source

[4088] Lexer improvements, TokenOption now working

Tomek Mrugalski 9 years ago
parent
commit
5feb3d8fde
3 changed files with 152 additions and 35 deletions
  1. 116 27
      src/lib/eval/lexer.ll
  2. 26 8
      src/lib/eval/tests/context_unittest.cc
  3. 10 0
      src/lib/eval/token.h

+ 116 - 27
src/lib/eval/lexer.ll

@@ -1,10 +1,25 @@
+/* Copyright (C) 2015 Internet Systems Consortium, Inc. ("ISC")
+
+   Permission to use, copy, modify, and/or distribute this software for any
+   purpose with or without fee is hereby granted, provided that the above
+   copyright notice and this permission notice appear in all copies.
+
+   THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+   REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+   AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+   INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+   LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+   OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+   PERFORMANCE OF THIS SOFTWARE. */
+
 %{ /* -*- C++ -*- */
-# include <cerrno>
-# include <climits>
-# include <cstdlib>
-# include <string>
-# include "eval_context.h"
-# include "parser.h"
+#include <cerrno>
+#include <climits>
+#include <cstdlib>
+#include <string>
+#include <eval/eval_context.h>
+#include <eval/parser.h>
+#include <boost/lexical_cast.hpp>
 
 // Work around an incompatibility in flex (at least versions
 // 2.5.31 through 2.5.33): it generates code that does
@@ -13,16 +28,48 @@
 # undef yywrap
 # define yywrap() 1
 
-// The location of the current token.
+// The location of the current token. The lexer will keep updating it. This
+// variable will be useful for logging errors.
 static isc::eval::location loc;
 %}
-%option noyywrap nounput batch debug noinput
+
+/* noyywrap disables automatic rewinding for the next file to parse. Since we
+   always parse only a single string, there's no need to do any wraps. And
+   using yywrap requires linking with -lfl, which provides the default yywrap
+   implementation that always returns 1 anyway. */
+%option noyywrap
+
+/* nounput simplifies the lexer, by removing support for putting a character
+   back into the input stream. We never use such capability anyway. */
+%option nounput
+
+/* batch means that we'll never use the generated lexer interactively. */
+%option batch
+
+/* Enables debug mode. To see the debug messages, one needs to also set
+   yy_flex_debug to 1, then the debug messages will be printed on stderr. */
+%option debug
+
+/* I have no idea what this option does, except it was specified in the bison
+   examples and Postgres folks added it to remove gcc 4.3 warnings. Let's
+   be on the safe side and keep it. */
+%option noinput
+
+/* This line tells flex to track the line numbers. It's not really that
+   useful for client classes, which typically are one-liners, but it may be
+   useful in more complex cases. */
+%option yylineno
+
+/* These are not token expressions yet, just convenience expressions that
+   can be used during actual token definitions. */
 int   [0-9]+
 blank [ \t]
 str [a-zA-Z_0-9]*
 
 %{
-    // Code run each time a pattern is matched.
+// This code run each time a pattern is matched. It updates the location
+// by moving it ahead by yyleng bytes. yyleng specifies the length of the
+// currently matched token.
 #define YY_USER_ACTION  loc.columns(yyleng);
 %}
 
@@ -33,12 +80,20 @@ str [a-zA-Z_0-9]*
     loc.step();
 %}
 
-{blank}+   loc.step();
-[\n]+      loc.lines(yyleng); loc.step();
+{blank}+   {
+    // Ok, we found a with space. Let's ignore it and update loc variable.
+    loc.step();
+}
+[\n]+      {
+    // Newline found. Let's update the location and continue.
+    loc.lines(yyleng);
+    loc.step();
+}
 
 \'{str}\' {
-    // This is a string. It contains the actual string and single quotes.
-    // We need to get those quotes out of the way.
+    // A string has been matched. It contains the actual string and single quotes.
+    // We need to get those quotes out of the way and just use its content, e.g.
+    // for 'foo' we should get foo
     std::string tmp(yytext+1);
     tmp.resize(tmp.size() - 1);
 
@@ -46,26 +101,60 @@ str [a-zA-Z_0-9]*
 }
 
 option\[{int}\] {
-    long n = strtol(yytext, NULL, 10);
-    /// @todo: Sanity check n
-    if (n<0 || n>65535) {
-        driver.error(loc, "Option code has invalid values. Allowed range: 0..65535");
+    // option[123] token found. Let's see if the numeric value can be
+    // converted to integer and if it has a reasonable value.
+    // yytext contains the whole expression (.e.g. option[123]). We need
+    // to trim it down to just the code, which will be transformed to
+    // integer.
+    std::string tmp(yytext);
+
+    // Sanity check if the token is at least 9 (strlen("option[X]")) long.
+    // This should never happen as it would indicate bison bug.
+    if (tmp.length() < 9) {
+        driver.error(loc, "The string matched (" + tmp + ") is too short,"
+                     " expected at least 9 (option[X]) characters");
     }
+    size_t pos = tmp.find("[");
+    if (pos == std::string::npos) {
+        driver.error(loc, "The string matched (" + tmp + ") is invalid,"
+                     " as it does not contain opening bracket.");
+    }
+    // Let's get rid of all the text before [, including [.
+    tmp = tmp.substr(pos + 1);
 
-    return isc::eval::EvalParser::make_OPTION(n, loc);
-}
+    pos = tmp.find("]");
+    if (pos == std::string::npos) {
+        driver.error(loc, "The string matched (" + tmp + ") is invalid,"
+                     " as it does not contain closing bracket.");
+    }
+    tmp = tmp.substr(0, pos);
+
+    uint16_t n = 0;
+    try {
+        n = boost::lexical_cast<int>(tmp);
+    } catch (const boost::bad_lexical_cast &) {
+        driver.error(loc, "Failed to convert specified option code to "
+                     "number ('" + tmp + "' in expression " + std::string(yytext));
+    }
 
-"==" {
-    return isc::eval::EvalParser::make_EQUAL(loc);
-}
+    // 65535 is the maximum value of the option code in DHCPv6. We want the
+    // code to be the same for v4 and v6, so let's ignore for a moment that
+    // max. option code in DHCPv4 is 255.
+    /// @todo: Maybe add a flag somewhere in EvalContext to indicate if we're
+    /// running in v4 (allowed max 255) or v6 (allowed max 65535).
+    if (n<0 || n>65535) {
+        driver.error(loc, "Option code has invalid values:[" +
+                     std::string(yytext) + ". Allowed range: 0..65535");
+    }
 
-"substring" {
-    return isc::eval::EvalParser::make_SUBSTRING(loc);
+    return isc::eval::EvalParser::make_OPTION(n, loc);
 }
 
-"("      return isc::eval::EvalParser::make_LPAREN(loc);
-")"      return isc::eval::EvalParser::make_RPAREN(loc);
-","      return isc::eval::EvalParser::make_COMA(loc);
+"=="        return isc::eval::EvalParser::make_EQUAL(loc);
+"substring" return isc::eval::EvalParser::make_SUBSTRING(loc);
+"("         return isc::eval::EvalParser::make_LPAREN(loc);
+")"         return isc::eval::EvalParser::make_RPAREN(loc);
+","         return isc::eval::EvalParser::make_COMA(loc);
 
 .          driver.error (loc, "invalid character");
 <<EOF>>    return isc::eval::EvalParser::make_END(loc);

+ 26 - 8
src/lib/eval/tests/context_unittest.cc

@@ -29,9 +29,10 @@ namespace {
 
 class EvalContextTest : public ::testing::Test {
 public:
-    void checkStringToken(const TokenPtr& token, const std::string& expected) {
+    void checkTokenString(const TokenPtr& token, const std::string& expected) {
         ASSERT_TRUE(token);
-        boost::shared_ptr<TokenString> str = boost::dynamic_pointer_cast<TokenString>(token);
+        boost::shared_ptr<TokenString> str =
+            boost::dynamic_pointer_cast<TokenString>(token);
         ASSERT_TRUE(str);
 
         Pkt4Ptr pkt4(new Pkt4(DHCPDISCOVER, 12345));
@@ -44,12 +45,21 @@ public:
         EXPECT_EQ(expected, values.top());
     }
 
-    void checkEqToken(const TokenPtr& token) {
+    void checkTokenEq(const TokenPtr& token) {
         ASSERT_TRUE(token);
-        boost::shared_ptr<TokenEqual> eq = boost::dynamic_pointer_cast<TokenEqual>(token);
+        boost::shared_ptr<TokenEqual> eq =
+            boost::dynamic_pointer_cast<TokenEqual>(token);
         EXPECT_TRUE(eq);
     }
 
+    void checkTokenOption(const TokenPtr& token, uint16_t expected_option) {
+        ASSERT_TRUE(token);
+        boost::shared_ptr<TokenOption> opt =
+            boost::dynamic_pointer_cast<TokenOption>(token);
+        ASSERT_TRUE(opt);
+
+        EXPECT_EQ(expected_option, opt->getCode());
+    }
 };
 
 TEST_F(EvalContextTest, basic) {
@@ -68,7 +78,7 @@ TEST_F(EvalContextTest, string) {
 
     TokenPtr tmp = eval.expression.at(0);
 
-    checkStringToken(tmp, "foo");
+    checkTokenString(tmp, "foo");
 }
 
 TEST_F(EvalContextTest, equal) {
@@ -82,9 +92,17 @@ TEST_F(EvalContextTest, equal) {
     TokenPtr tmp2 = eval.expression.at(1);
     TokenPtr tmp3 = eval.expression.at(2);
 
-    checkStringToken(tmp1, "foo");
-    checkStringToken(tmp2, "bar");
-    checkEqToken(tmp3);
+    checkTokenString(tmp1, "foo");
+    checkTokenString(tmp2, "bar");
+    checkTokenEq(tmp3);
+}
+
+TEST_F(EvalContextTest, option) {
+    EvalContext eval;
+
+    EXPECT_NO_THROW(eval.parseString("option[123]"));
+    ASSERT_EQ(1, eval.expression.size());
+    checkTokenOption(eval.expression.at(0), 123);
 }
 
 };

+ 10 - 0
src/lib/eval/token.h

@@ -130,6 +130,16 @@ public:
     /// @param values value of the option will be pushed here (or "")
     void evaluate(const Pkt& pkt, ValueStack& values);
 
+    /// @brief Returns option-code
+    ///
+    /// This method is used in testing to determine if the parser had
+    /// instantiated TokenOption with correct parameters.
+    ///
+    /// @return option-code of the option this token expects to extract.
+    uint16_t getCode() const {
+        return (option_code_);
+    }
+
 private:
     uint16_t option_code_; ///< code of the option to be extracted
 };