Browse Source

[5014_phase2] Implemented string escape (but not unicode) cf ECMA 404

Francis Dupont 8 years ago
parent
commit
652bb17f31
2 changed files with 101 additions and 14 deletions
  1. 83 8
      src/bin/dhcp6/dhcp6_lexer.ll
  2. 18 6
      src/bin/dhcp6/tests/parser_unittest.cc

+ 83 - 8
src/bin/dhcp6/dhcp6_lexer.ll

@@ -84,12 +84,19 @@ unsigned int comment_start_line = 0;
 int   \-?[0-9]+
 blank [ \t]
 
-UnicodeEscapeSequence                   u[0-9A-Fa-f]{4}
-JSONEscapeCharacter                     ["\\/bfnrt]
-JSONEscapeSequence                      {JSONEscapeCharacter}|{UnicodeEscapeSequence}
-JSONStringCharacter                     [^"\\]|\\{JSONEscapeSequence}
-JSONString                              \"{JSONStringCharacter}*\"
+UnicodeEscapeSequence           u[0-9A-Fa-f]{4}
+JSONEscapeCharacter             ["\\/bfnrt]
+JSONEscapeSequence              {JSONEscapeCharacter}|{UnicodeEscapeSequence}
+JSONStandardCharacter           [^\x00-\x1f"\\]
+JSONStringCharacter             {JSONStandardCharacter}|\\{JSONEscapeSequence}
+JSONString                      \"{JSONStringCharacter}*\"
 
+/* for errors */
+
+BadUnicodeEscapeSequence        u[0-9A-Fa-f]{0,3}[^0-9A-Fa-f]
+BadJSONEscapeSequence           [^"\\/bfnrtu]|{BadUnicodeEscapeSequence}
+ControlCharacter                [\x00-\x1f]
+ControlCharacterFill            [^"\\]|\\{JSONEscapeSequence}
 
 %{
 // This code run each time a pattern is matched. It updates the location
@@ -760,10 +767,78 @@ JSONString                              \"{JSONStringCharacter}*\"
     // A string has been matched. It contains the actual string and single quotes.
     // We need to get those quotes out of the way and just use its content, e.g.
     // for 'foo' we should get foo
-    std::string tmp(yytext+1);
-    tmp.resize(tmp.size() - 1);
+    std::string raw(yytext+1);
+    size_t len = raw.size() - 1;
+    raw.resize(len);
+    std::string decoded;
+    decoded.reserve(len);
+    for (size_t pos = 0; pos < len; ++pos) {
+        char c = raw[pos];
+        switch (c) {
+        case '"':
+            // impossible condition
+            driver.error(loc, "Bad quote in \"" + raw + "\"");
+        case '\\':
+            ++pos;
+            if (pos >= len) {
+                // impossible condition
+                driver.error(loc, "Overflow escape in \"" + raw + "\"");
+            }
+            c = raw[pos];
+            switch (c) {
+            case '"':
+            case '\\':
+            case '/':
+                decoded.push_back(c);
+                break;
+            case 'b':
+                decoded.push_back('\b');
+                break;
+            case 'f':
+                decoded.push_back('\f');
+                break;
+            case 'n':
+                decoded.push_back('\n');
+                break;
+            case 'r':
+                decoded.push_back('\r');
+                break;
+            case 't':
+                decoded.push_back('\t');
+                break;
+            case 'u':
+                // not yet implemented
+                driver.error(loc, "Unsupported unicode escape in \"" + raw + "\"");
+            default:
+                // impossible condition
+                driver.error(loc, "Bad escape in \"" + raw + "\"");
+            }
+            break;
+        default:
+            if (c < 0x20) {
+                // impossible condition
+                driver.error(loc, "Invalid control in \"" + raw + "\"");
+            }
+            decoded.push_back(c);
+        }
+    }
 
-    return isc::dhcp::Dhcp6Parser::make_STRING(tmp, loc);
+    return isc::dhcp::Dhcp6Parser::make_STRING(decoded, loc);
+}
+
+\"{JSONStringCharacter}*{ControlCharacter}{ControlCharacterFill}*\" {
+    // Bad string with a forbidden control character inside
+    driver.error(loc, "Invalid control in " + std::string(yytext));
+}
+
+\"{JSONStringCharacter}*\\{BadJSONEscapeSequence}[^\x00-\x1f"]*\" {
+    // Bad string with a bad escape inside
+    driver.error(loc, "Bad escape in " + std::string(yytext));
+}
+    
+\"{JSONStringCharacter}*\\\" {
+    // Bad string with an open escape at the end
+    driver.error(loc, "Overflow escape in " + std::string(yytext));
 }
 
 "["    { return isc::dhcp::Dhcp6Parser::make_LSQUARE_BRACKET(loc); }

+ 18 - 6
src/bin/dhcp6/tests/parser_unittest.cc

@@ -377,22 +377,34 @@ TEST(ParserTest, errors) {
               "<string>:1.3: Invalid character: e");
     testError("\"a\n\tb\"",
               Parser6Context::PARSER_GENERIC_JSON,
-              "<string>:1.1-6: syntax error, unexpected constant string, "
-              "expecting {");
+              "<string>:1.1-6: Invalid control in \"a\n\tb\"");
     testError("\"a\\n\\tb\"",
               Parser6Context::PARSER_GENERIC_JSON,
               "<string>:1.1-8: syntax error, unexpected constant string, "
               "expecting {");
     testError("\"a\\x01b\"",
               Parser6Context::PARSER_GENERIC_JSON,
-              "<string>:1.1: Invalid character: \"");
+              "<string>:1.1-8: Bad escape in \"a\\x01b\"");
     testError("\"a\\u0062\"",
               Parser6Context::PARSER_GENERIC_JSON,
-              "<string>:1.1-9: syntax error, unexpected constant string, "
-              "expecting {");
+              "<string>:1.1-9: Unsupported unicode escape in \"a\\u0062\"");
     testError("\"a\\u062z\"",
               Parser6Context::PARSER_GENERIC_JSON,
-              "<string>:1.1: Invalid character: \"");
+              "<string>:1.1-9: Bad escape in \"a\\u062z\"");
+    testError("\"abc\\\"",
+              Parser6Context::PARSER_GENERIC_JSON,
+              "<string>:1.1-6: Overflow escape in \"abc\\\"");
+
+    // from data_unittest.c
+    testError("\\a",
+              Parser6Context::PARSER_GENERIC_JSON,
+              "<string>:1.1: Invalid character: \\");
+    testError("\\",
+              Parser6Context::PARSER_GENERIC_JSON,
+              "<string>:1.1: Invalid character: \\");
+    testError("\\\"\\\"",
+              Parser6Context::PARSER_GENERIC_JSON,
+              "<string>:1.1: Invalid character: \\");
 
     // want a map
     testError("[]\n",