Parcourir la source

[5085] Implemented \u00xy escapes in toJSON and lexer

Francis Dupont il y a 8 ans
Parent
commit
757d1f48cb
3 fichiers modifiés avec 90 ajouts et 9 suppressions
  1. 37 2
      src/bin/dhcp6/dhcp6_lexer.ll
  2. 43 6
      src/bin/dhcp6/tests/parser_unittest.cc
  3. 10 1
      src/lib/cc/data.cc

+ 37 - 2
src/bin/dhcp6/dhcp6_lexer.ll

@@ -926,6 +926,7 @@ ControlCharacterFill            [^"\\]|\\{JSONEscapeSequence}
     std::string decoded;
     decoded.reserve(len);
     for (size_t pos = 0; pos < len; ++pos) {
+        int b = 0;
         char c = raw[pos];
         switch (c) {
         case '"':
@@ -960,8 +961,42 @@ ControlCharacterFill            [^"\\]|\\{JSONEscapeSequence}
                 decoded.push_back('\t');
                 break;
             case 'u':
-                // not yet implemented
-                driver.error(driver.loc_, "Unsupported unicode escape in \"" + raw + "\"");
+                // support only \u0000 to \u00ff
+                ++pos;
+                if (pos + 4 > len) {
+                    // impossible condition
+                    driver.error(driver.loc_,
+                                 "Overflow unicode escape in \"" + raw + "\"");
+                }
+                if ((raw[pos] != '0') || (raw[pos + 1] != '0')) {
+                    driver.error(driver.loc_, "Unsupported unicode escape in \"" + raw + "\"");
+                }
+                pos += 2;
+                c = raw[pos];
+                if ((c >= '0') && (c <= '9')) {
+                    b = (c - '0') << 4;
+                } else if ((c >= 'A') && (c <= 'F')) {
+                    b = (c - 'A' + 10) << 4;
+                } else if ((c >= 'a') && (c <= 'f')) {
+                    b = (c - 'a' + 10) << 4;
+                } else {
+                    // impossible condition
+                    driver.error(driver.loc_, "Not hexadecimal in unicode escape in \"" + raw + "\"");
+                }
+                pos++;
+                c = raw[pos];
+                if ((c >= '0') && (c <= '9')) {
+                    b |= c - '0';
+                } else if ((c >= 'A') && (c <= 'F')) {
+                    b |= c - 'A' + 10;
+                } else if ((c >= 'a') && (c <= 'f')) {
+                    b |= c - 'a' + 10;
+                } else {
+                    // impossible condition
+                    driver.error(driver.loc_, "Not hexadecimal in unicode escape in \"" + raw + "\"");
+                }
+                decoded.push_back(static_cast<char>(b & 0xff));
+                break;
             default:
                 // impossible condition
                 driver.error(driver.loc_, "Bad escape in \"" + raw + "\"");

+ 43 - 6
src/bin/dhcp6/tests/parser_unittest.cc

@@ -31,8 +31,8 @@ void testParser(const std::string& txt, Parser6Context::ParserType parser_type)
     ASSERT_NO_THROW(reference_json = Element::fromJSON(txt, true));
     ASSERT_NO_THROW({
             try {
-        Parser6Context ctx;
-        test_json = ctx.parseString(txt, parser_type);
+                Parser6Context ctx;
+                test_json = ctx.parseString(txt, parser_type);
             } catch (const std::exception &e) {
                 cout << "EXCEPTION: " << e.what() << endl;
                 throw;
@@ -49,8 +49,8 @@ void testParser2(const std::string& txt, Parser6Context::ParserType parser_type)
 
     ASSERT_NO_THROW({
             try {
-        Parser6Context ctx;
-        test_json = ctx.parseString(txt, parser_type);
+                Parser6Context ctx;
+                test_json = ctx.parseString(txt, parser_type);
             } catch (const std::exception &e) {
                 cout << "EXCEPTION: " << e.what() << endl;
                 throw;
@@ -386,9 +386,9 @@ TEST(ParserTest, errors) {
     testError("\"a\\x01b\"",
               Parser6Context::PARSER_JSON,
               "<string>:1.1-8: Bad escape in \"a\\x01b\"");
-    testError("\"a\\u0062\"",
+    testError("\"a\\u0162\"",
               Parser6Context::PARSER_JSON,
-              "<string>:1.1-9: Unsupported unicode escape in \"a\\u0062\"");
+              "<string>:1.1-9: Unsupported unicode escape in \"a\\u0162\"");
     testError("\"a\\u062z\"",
               Parser6Context::PARSER_JSON,
               "<string>:1.1-9: Bad escape in \"a\\u062z\"");
@@ -476,4 +476,41 @@ TEST(ParserTest, errors) {
               "\"preferred_lifetime\" in Dhcp6 map.");
 }
 
+// Check unicode escapes
+TEST(ParserTest, unicodeEscapes) {
+    ConstElementPtr result;
+    string json;
+
+    // check we can reread output
+    for (char c = -128; c < 127; ++c) {
+        string ins(" ");
+        ins[1] = c;
+        ConstElementPtr e(new StringElement(ins));
+        json = e->str();
+        ASSERT_NO_THROW(
+        try {
+            Parser6Context ctx;
+            result = ctx.parseString(json, Parser6Context::PARSER_JSON);
+        } catch (const std::exception &x) {
+            cout << "EXCEPTION: " << x.what() << endl;
+            throw;
+        });
+        ASSERT_EQ(Element::string, result->getType());
+        EXPECT_EQ(ins, result->stringValue());
+    }
+
+    // check the 4 possible encodings of solidus '/'
+    json = "\"/\\/\\u002f\\u002F\"";
+    ASSERT_NO_THROW(
+    try {
+        Parser6Context ctx;
+        result = ctx.parseString(json, Parser6Context::PARSER_JSON);
+    } catch (const std::exception &x) {
+        cout << "EXCEPTION: " << x.what() << endl;
+        throw;
+    });
+    ASSERT_EQ(Element::string, result->getType());
+    EXPECT_EQ("////", result->stringValue());
+}       
+
 };

+ 10 - 1
src/lib/cc/data.cc

@@ -14,6 +14,7 @@
 #include <map>
 #include <cstdio>
 #include <iostream>
+#include <iomanip>
 #include <string>
 #include <sstream>
 #include <fstream>
@@ -795,7 +796,15 @@ StringElement::toJSON(std::ostream& ss) const {
             ss << '\\' << 't';
             break;
         default:
-            ss << c;
+            if ((c >= 0) && (c < 0x20)) {
+                ss << "\\u"
+                   << hex
+                   << setw(4)
+                   << setfill('0')
+                   << (static_cast<unsigned>(c) & 0xff);
+            } else {
+                ss << c;
+            }
         }
     }
     ss << "\"";