8 years ago · 995f389acf
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,9 +1,15 @@
 
				-1213.	[func]		tomek
			
 
				+1214.	[func]		tomek
			
 
				 	Bison parser implemented for Control-agent. The code is able
			
 
				 	to syntactically parse input configuration, but the output
			
 
				 	is not used yet.
			
 
				 	(Trac #5076, git d99048aa5b90efa7812a75cdae98a0913470f5a6)
			
 
				 
			
 
				+1213.	[bug]		fdupont
			
 
				+	Option string values containing comma can now be specified
			
 
				+	correctly by preceding comma with double backslashes (e.g.
			
 
				+	"foo\\,bar").
			
 
				+	(Trac #5105, git fa79ac2396aa94d7bac91bd12d3593ebaaa9386d)
			
 
				+
			
 
				 1212.	[doc]		andreipavelQ
			
 
				 	Many spelling corrections.
			
 
				 	(Github #47, git a6a7ca1ced8c63c1e11ef4c572f09272340afdd7)
			
--- a/doc/examples/kea4/multiple-options.json
+++ b/doc/examples/kea4/multiple-options.json
@@ -27,10 +27,10 @@
 
				 #  "renew-timer": 1000,
			
 
				 #  "rebind-timer": 2000,
			
 
				 
			
 
				-# Defining a subnet. There are 3 DHCP options returned to the
			
 
				-# clients connected to this subnet. The first two options are
			
 
				-# identified by the name. The third option is identified by the
			
 
				-# option code.
			
 
				+// Defining a subnet. There are 3 DHCP options returned to the
			
 
				+// clients connected to this subnet. The first and third options are
			
 
				+// identified by the name. The third option is identified by the
			
 
				+// option code.
			
 
				   "subnet4": [
			
 
				     {
			
 
				        "pools": [ { "pool":  "192.0.2.10 - 192.0.2.200" } ],
			
@@ -46,8 +46,22 @@
 
				              "data": "192.0.2.1"
			
 
				          },
			
 
				          {
			
 
				-             "code": 15,
			
 
				-             "data": "example.org"
			
 
				+             // String options that have a comma in their values need to have
			
 
				+             // it escaped (i.e. each comma is predeced by two backslashes).
			
 
				+             // That's because commas are reserved for separating fields in
			
 
				+             // compound options. At the same time, we need to be conformant
			
 
				+             // with JSON spec, that does not allow "\,". Therefore the
			
 
				+             // slightly uncommon double backslashes notation is needed.
			
 
				+             "name": "boot-file-name",
			
 
				+             "data": "EST5EDT4\\,M3.2.0/02:00\\,M11.1.0/02:00"
			
 
				+
			
 
				+             // Legal JSON escapes are \ followed by "\/bfnrt character
			
 
				+             // or \u followed by 4 hexa-decimal numbers (currently Kea
			
 
				+             // supports only \u0000 to \u00ff code points).
			
 
				+             // CSV processing translates '\\' into '\' and '\,' into ','
			
 
				+             // only so for instance '\x' is translated into '\x'. But
			
 
				+             // as it works on a JSON string value each of these '\'
			
 
				+             // characters must be doubled on JSON input.
			
 
				          }
			
 
				        ]
			
 
				     } 
			
--- a/doc/examples/kea6/multiple-options.json
+++ b/doc/examples/kea6/multiple-options.json
@@ -28,7 +28,7 @@
 
				   "renew-timer": 1000,
			
 
				   "rebind-timer": 2000,
			
 
				 
			
 
				-# Defining a subnet. There are 2 DHCP options returned to the
			
 
				+# Defining a subnet. There are 3 DHCP options returned to the
			
 
				 # clients connected to this subnet. The first option is identified
			
 
				 # by the name. The second option is identified by the code.
			
 
				 # There are two address pools defined within this subnet. Pool
			
@@ -51,6 +51,24 @@
 
				         {
			
 
				             "code": 12,
			
 
				             "data": "2001:db8:1:0:ff00::1"
			
 
				+        },
			
 
				+        {
			
 
				+            // String options that have a comma in their values need to have
			
 
				+            // it escaped (i.e. each comma is predeced by two backslashes).
			
 
				+            // That's because commas are reserved for separating fields in
			
 
				+            // compound options. At the same time, we need to be conformant
			
 
				+            // with JSON spec, that does not allow "\,". Therefore the
			
 
				+            // slightly uncommon double backslashes notation is needed.
			
 
				+            "name": "new-posix-timezone",
			
 
				+            "data": "EST5EDT4\\,M3.2.0/02:00\\,M11.1.0/02:00"
			
 
				+
			
 
				+            // Legal JSON escapes are \ followed by "\/bfnrt character
			
 
				+            // or \u followed by 4 hexa-decimal numbers (currently Kea
			
 
				+            // supports only \u0000 to \u00ff code points).
			
 
				+            // CSV processing translates '\\' into '\' and '\,' into ','
			
 
				+            // only so for instance '\x' is translated into '\x'. But
			
 
				+            // as it works on a JSON string value each of these '\'
			
 
				+            // characters must be doubled on JSON input.
			
 
				         }
			
 
				       ],
			
 
				       "pools": [
			
@@ -64,7 +82,7 @@
 
				             ]
			
 
				         },
			
 
				         {
			
 
				-            "pool": "2001:db8:1::500 - 2001:db8:2::1000"
			
 
				+            "pool": "2001:db8:1::500 - 2001:db8:1::1000"
			
 
				         }
			
 
				       ],
			
 
				       "pd-pools": [
			
--- a/doc/guide/dhcp4-srv.xml
+++ b/doc/guide/dhcp4-srv.xml
@@ -1016,6 +1016,36 @@ temporarily override a list of interface names and listen on all interfaces.
 
				         structures. "Type" designates the format of the data: the meanings of
			
 
				         the various types is given in <xref linkend="dhcp-types"/>.
			
 
				       </para>
			
 
				+
			
 
				+      <para>When a data field is a string, and that string contains the comma
			
 
				+      (,; U+002C) character, the comma must be escaped with a double reverse solidus
			
 
				+      character (\; U+005C). This double escape is required, because both the
			
 
				+      routine splitting CSV data into fields and JSON use the same escape
			
 
				+      character: a single escape (\,) would make the JSON invalid.
			
 
				+      For example, the string &quot;foo,bar&quot; would be represented as:
			
 
				+      <screen>
			
 
				+"Dhcp4": {
			
 
				+    "subnet4": [
			
 
				+        {
			
 
				+            "pools": [
			
 
				+                {
			
 
				+                    <userinput>"option-data": [
			
 
				+                        {
			
 
				+                            "name": "boot-file-name",
			
 
				+                            "data": "foo\\,bar"
			
 
				+                        }
			
 
				+                    ]</userinput>
			
 
				+                },
			
 
				+                ...
			
 
				+            ],
			
 
				+            ...
			
 
				+        },
			
 
				+        ...
			
 
				+    ],
			
 
				+    ...
			
 
				+}
			
 
				+</screen>
			
 
				+      </para>
			
 
				       <para>
			
 
				         Some options are designated as arrays, which means that more than one
			
 
				         value is allowed in such an option. For example the option time-servers
			
--- a/doc/guide/dhcp6-srv.xml
+++ b/doc/guide/dhcp6-srv.xml
@@ -1050,6 +1050,37 @@ temporarily override a list of interface names and listen on all interfaces.
 
				       which was not assigned by IANA) are listed in
			
 
				       <xref linkend="dhcp6-exp-options-list"/>.
			
 
				     </para>
			
 
				+    <para>When a data field is a string, and that string contains
			
 
				+    the comma (,; U+002C) character, the comma must be escaped with a
			
 
				+    reverse solidus character (\; U+005C). This double escape is
			
 
				+    required, because both the routine splitting CSV data into fields
			
 
				+    and JSON use the same escape character: a single escape (\,) would
			
 
				+    make the JSON invalid. For example, the string
			
 
				+    &quot;EST5EDT4,M3.2.0/02:00,M11.1.0/02:00&quot; would be
			
 
				+    represented as:
			
 
				+<screen>
			
 
				+"Dhcp6": {
			
 
				+    "subnet6": [
			
 
				+        {
			
 
				+            "pools": [
			
 
				+                {
			
 
				+                    <userinput>"option-data": [
			
 
				+                        {
			
 
				+                            "name": "new-posix-timezone",
			
 
				+                            "data": "EST5EDT4\,M3.2.0/02:00\,M11.1.0/02:00"
			
 
				+                        }
			
 
				+                    ]</userinput>
			
 
				+                },
			
 
				+                ...
			
 
				+            ],
			
 
				+            ...
			
 
				+        },
			
 
				+        ...
			
 
				+    ],
			
 
				+    ...
			
 
				+}
			
 
				+</screen>
			
 
				+    </para>
			
 
				     <para>
			
 
				       Some options are designated as arrays, which means that more than one
			
 
				       value is allowed in such an option. For example the option dns-servers
			
--- a/src/lib/dhcpsrv/parsers/dhcp_parsers.cc
+++ b/src/lib/dhcpsrv/parsers/dhcp_parsers.cc
@@ -526,7 +526,10 @@ OptionDataParser::createOption(ConstElementPtr option_data) {
 
				         // separated values then we need to split this string into
			
 
				         // individual values - each value will be used to initialize
			
 
				         // one data field of an option.
			
 
				-        data_tokens = isc::util::str::tokens(data_param, ",");
			
 
				+        // It is the only usage of the escape option: this allows
			
 
				+        // to embed commas in individual values and to return
			
 
				+        // for instance a string value with embedded commas.
			
 
				+        data_tokens = isc::util::str::tokens(data_param, ",", true);
			
 
				 
			
 
				     } else {
			
 
				         // Otherwise, the option data is specified as a string of
			
--- a/src/lib/dhcpsrv/tests/dhcp_parsers_unittest.cc
+++ b/src/lib/dhcpsrv/tests/dhcp_parsers_unittest.cc
@@ -11,6 +11,7 @@
 
				 #include <dhcp/option.h>
			
 
				 #include <dhcp/option_custom.h>
			
 
				 #include <dhcp/option_int.h>
			
 
				+#include <dhcp/option_string.h>
			
 
				 #include <dhcp/option6_addrlst.h>
			
 
				 #include <dhcp/tests/iface_mgr_test_config.h>
			
 
				 #include <dhcpsrv/cfgmgr.h>
			
@@ -1210,6 +1211,37 @@ TEST_F(ParseConfigTest, optionDataNoSubOpion) {
 
				     ASSERT_EQ(0, opt->getOptions().size());
			
 
				 }
			
 
				 
			
 
				+// This tests option-data in CSV format and embedded commas.
			
 
				+TEST_F(ParseConfigTest, commaCSVFormatOptionData) {
			
 
				+
			
 
				+    // Configuration string.
			
 
				+    std::string config =
			
 
				+        "{ \"option-data\": [ {"
			
 
				+        "     \"csv-format\": true,"
			
 
				+        "     \"code\": 41,"
			
 
				+        "     \"data\": \"EST5EDT4\\\\,M3.2.0/02:00\\\\,M11.1.0/02:00\","
			
 
				+        "     \"space\": \"dhcp6\""
			
 
				+        " } ]"
			
 
				+        "}";
			
 
				+
			
 
				+    // Verify that the configuration string parses.
			
 
				+    int rcode = parseConfiguration(config, true);
			
 
				+    ASSERT_EQ(0, rcode);
			
 
				+
			
 
				+    // Verify that the option can be retrieved.
			
 
				+    OptionPtr opt = getOptionPtr(DHCP6_OPTION_SPACE, 41);
			
 
				+    ASSERT_TRUE(opt);
			
 
				+
			
 
				+    // Get the option as an option string.
			
 
				+    OptionStringPtr opt_str = boost::dynamic_pointer_cast<OptionString>(opt);
			
 
				+    ASSERT_TRUE(opt_str);
			
 
				+
			
 
				+
			
 
				+    // Verify that the option data is correct.
			
 
				+    string val = "EST5EDT4,M3.2.0/02:00,M11.1.0/02:00";
			
 
				+    EXPECT_EQ(val, opt_str->getValue());
			
 
				+}
			
 
				+
			
 
				 /// The next set of tests check basic operation of the HooksLibrariesParser.
			
 
				 //
			
 
				 // Convenience function to set a configuration of zero or more hooks
			
--- a/src/lib/util/strutil.cc
+++ b/src/lib/util/strutil.cc
@@ -1,4 +1,4 @@
 
				-// Copyright (C) 2011-2016 Internet Systems Consortium, Inc. ("ISC")
			
 
				+// Copyright (C) 2011-2017 Internet Systems Consortium, Inc. ("ISC")
			
 
				 //
			
 
				 // This Source Code Form is subject to the terms of the Mozilla Public
			
 
				 // License, v. 2.0. If a copy of the MPL was not distributed with this
			
@@ -61,29 +61,68 @@ trim(const string& instring) {
 
				 // another dependency on a Boost library.
			
 
				 
			
 
				 vector<string>
			
 
				-tokens(const std::string& text, const std::string& delim) {
			
 
				+tokens(const std::string& text, const std::string& delim, bool escape) {
			
 
				     vector<string> result;
			
 
				-
			
 
				-    // Search for the first non-delimiter character
			
 
				-    size_t start = text.find_first_not_of(delim);
			
 
				-    while (start != string::npos) {
			
 
				-
			
 
				-        // Non-delimiter found, look for next delimiter
			
 
				-        size_t end = text.find_first_of(delim, start);
			
 
				-        if (end != string::npos) {
			
 
				-
			
 
				-            // Delimiter found, so extract string & search for start of next
			
 
				-            // non-delimiter segment.
			
 
				-            result.push_back(text.substr(start, (end - start)));
			
 
				-            start = text.find_first_not_of(delim, end);
			
 
				-
			
 
				+    string token;
			
 
				+    bool in_token = false;
			
 
				+    bool escaped = false;
			
 
				+    for (auto c = text.cbegin(); c != text.cend(); ++c) {
			
 
				+        if (delim.find(*c) != string::npos) {
			
 
				+            // Current character is a delimiter
			
 
				+            if (!in_token) {
			
 
				+                // Two or more delimiters, eat them
			
 
				+            } else if (escaped) {
			
 
				+                // Escaped delimiter in a token: reset escaped and keep it
			
 
				+                escaped = false;
			
 
				+                token.push_back(*c);
			
 
				+            } else {
			
 
				+                // End of the current token: save it if not empty
			
 
				+                if (!token.empty()) {
			
 
				+                    result.push_back(token);
			
 
				+                }
			
 
				+                // Reset state
			
 
				+                in_token = false;
			
 
				+                token.clear();
			
 
				+            }
			
 
				+        } else if (escape && (*c == '\\')) {
			
 
				+            // Current character is the escape character
			
 
				+            if (!in_token) {
			
 
				+                // The escape character is the first character of a new token
			
 
				+                in_token = true;
			
 
				+            }
			
 
				+            if (escaped) {
			
 
				+                // Escaped escape: reset escaped and keep one character
			
 
				+                escaped = false;
			
 
				+                token.push_back(*c);
			
 
				+            } else {
			
 
				+                // Remember to keep the next character
			
 
				+                escaped = true;
			
 
				+            }
			
 
				         } else {
			
 
				-
			
 
				-            // End of string found, extract rest of string and flag to exit
			
 
				-            result.push_back(text.substr(start));
			
 
				-            start = string::npos;
			
 
				+            // Not a delimiter nor an escape
			
 
				+            if (!in_token) {
			
 
				+                // First character of a new token
			
 
				+                in_token = true;
			
 
				+            }
			
 
				+            if (escaped) {
			
 
				+                // Escaped common character: as escape was false
			
 
				+                escaped = false;
			
 
				+                token.push_back('\\');
			
 
				+                token.push_back(*c);
			
 
				+            } else {
			
 
				+                // The common case: keep it
			
 
				+                token.push_back(*c);
			
 
				+            }
			
 
				         }
			
 
				     }
			
 
				+    // End of input: close and save the current token if not empty
			
 
				+    if (escaped) {
			
 
				+        // Pending escape
			
 
				+        token.push_back('\\');
			
 
				+    }
			
 
				+    if (!token.empty()) {
			
 
				+        result.push_back(token);
			
 
				+    }
			
 
				 
			
 
				     return (result);
			
 
				 }
			
--- a/src/lib/util/strutil.h
+++ b/src/lib/util/strutil.h
@@ -1,4 +1,4 @@
 
				-// Copyright (C) 2011-2016 Internet Systems Consortium, Inc. ("ISC")
			
 
				+// Copyright (C) 2011-2017 Internet Systems Consortium, Inc. ("ISC")
			
 
				 //
			
 
				 // This Source Code Form is subject to the terms of the Mozilla Public
			
 
				 // License, v. 2.0. If a copy of the MPL was not distributed with this
			
@@ -69,6 +69,8 @@ std::string trim(const std::string& instring);
 
				 /// invisible leading and trailing delimiter characters.  Therefore both cases
			
 
				 /// reduce to a set of contiguous delimiters, which are considered a single
			
 
				 /// delimiter (so getting rid of the string).
			
 
				+/// Optional escape allows to escape delimiter characters (and *only* them
			
 
				+/// and the escape character itself) using backslash.
			
 
				 ///
			
 
				 /// We could use Boost for this, but this (simple) function eliminates one
			
 
				 /// dependency in the code.
			
@@ -76,10 +78,12 @@ std::string trim(const std::string& instring);
 
				 /// \param text String to be split.  Passed by value as the internal copy is
			
 
				 /// altered during the processing.
			
 
				 /// \param delim Delimiter characters
			
 
				+/// \param escape Use backslash to escape delimiter characters
			
 
				 ///
			
 
				 /// \return Vector of tokens.
			
 
				 std::vector<std::string> tokens(const std::string& text,
			
 
				-        const std::string& delim = std::string(" \t\n"));
			
 
				+        const std::string& delim = std::string(" \t\n"),
			
 
				+        bool escape = false);
			
 
				 
			
 
				 
			
 
				 /// \brief Uppercase Character
			
--- a/src/lib/util/tests/strutil_unittest.cc
+++ b/src/lib/util/tests/strutil_unittest.cc
@@ -1,4 +1,4 @@
 
				-// Copyright (C) 2011-2016 Internet Systems Consortium, Inc. ("ISC")
			
 
				+// Copyright (C) 2011-2017 Internet Systems Consortium, Inc. ("ISC")
			
 
				 //
			
 
				 // This Source Code Form is subject to the terms of the Mozilla Public
			
 
				 // License, v. 2.0. If a copy of the MPL was not distributed with this
			
@@ -143,6 +143,42 @@ TEST(StringUtilTest, Tokens) {
 
				     EXPECT_EQ(string("gamma"), result[3]);
			
 
				     EXPECT_EQ(string("delta"), result[4]);
			
 
				     EXPECT_EQ(string("epsilon"), result[5]);
			
 
				+
			
 
				+    // Escaped delimiter
			
 
				+    result = isc::util::str::tokens("foo\\,bar", ",", true);
			
 
				+    EXPECT_EQ(1, result.size());
			
 
				+    EXPECT_EQ(string("foo,bar"), result[0]);
			
 
				+
			
 
				+    // Escaped escape
			
 
				+    result = isc::util::str::tokens("foo\\\\,bar", ",", true);
			
 
				+    ASSERT_EQ(2, result.size());
			
 
				+    EXPECT_EQ(string("foo\\"), result[0]);
			
 
				+    EXPECT_EQ(string("bar"), result[1]);
			
 
				+
			
 
				+    // Double escapes
			
 
				+    result = isc::util::str::tokens("foo\\\\\\\\,\\bar", ",", true);
			
 
				+    ASSERT_EQ(2, result.size());
			
 
				+    EXPECT_EQ(string("foo\\\\"), result[0]);
			
 
				+    EXPECT_EQ(string("\\bar"), result[1]);
			
 
				+
			
 
				+    // Escaped standard character
			
 
				+    result = isc::util::str::tokens("fo\\o,bar", ",", true);
			
 
				+    ASSERT_EQ(2, result.size());
			
 
				+    EXPECT_EQ(string("fo\\o"), result[0]);
			
 
				+    EXPECT_EQ(string("bar"), result[1]);
			
 
				+
			
 
				+    // Escape at the end
			
 
				+    result = isc::util::str::tokens("foo,bar\\", ",", true);
			
 
				+    ASSERT_EQ(2, result.size());
			
 
				+    EXPECT_EQ(string("foo"), result[0]);
			
 
				+    EXPECT_EQ(string("bar\\"), result[1]);
			
 
				+
			
 
				+    // Escape opening a token
			
 
				+    result = isc::util::str::tokens("foo,\\,,bar", ",", true);
			
 
				+    ASSERT_EQ(3, result.size());
			
 
				+    EXPECT_EQ(string("foo"), result[0]);
			
 
				+    EXPECT_EQ(string(","), result[1]);
			
 
				+    EXPECT_EQ(string("bar"), result[2]);
			
 
				 }
			
 
				 
			
 
				 // Changing case