strutil.h 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263
  1. // Copyright (C) 2011-2017 Internet Systems Consortium, Inc. ("ISC")
  2. //
  3. // This Source Code Form is subject to the terms of the Mozilla Public
  4. // License, v. 2.0. If a copy of the MPL was not distributed with this
  5. // file, You can obtain one at http://mozilla.org/MPL/2.0/.
  6. #ifndef STRUTIL_H
  7. #define STRUTIL_H
  8. #include <algorithm>
  9. #include <cctype>
  10. #include <stdint.h>
  11. #include <string>
  12. #include <sstream>
  13. #include <vector>
  14. #include <exceptions/exceptions.h>
  15. #include <boost/lexical_cast.hpp>
  16. namespace isc {
  17. namespace util {
  18. namespace str {
  19. /// \brief A Set of C++ Utilities for Manipulating Strings
  20. ///
  21. /// \brief A standard string util exception that is thrown if getToken or
  22. /// numToToken are called with bad input data
  23. ///
  24. class StringTokenError : public Exception {
  25. public:
  26. StringTokenError(const char* file, size_t line, const char* what) :
  27. isc::Exception(file, line, what) {}
  28. };
  29. /// \brief Normalize Backslash
  30. ///
  31. /// Only relevant to Windows, this replaces all "\" in a string with "/"
  32. /// and returns the result. On other systems it is a no-op. Note
  33. /// that Windows does recognize file names with the "\" replaced by "/"
  34. /// (at least in system calls, if not the command line).
  35. ///
  36. /// \param name Name to be substituted
  37. void normalizeSlash(std::string& name);
  38. /// \brief Trim Leading and Trailing Spaces
  39. ///
  40. /// Returns a copy of the input string but with any leading or trailing spaces
  41. /// or tabs removed.
  42. ///
  43. /// \param instring Input string to modify
  44. ///
  45. /// \return String with leading and trailing spaces removed
  46. std::string trim(const std::string& instring);
  47. /// \brief Split String into Tokens
  48. ///
  49. /// Splits a string into tokens (the tokens being delimited by one or more of
  50. /// the delimiter characters) and returns the tokens in a vector array. Note
  51. /// that adjacent delimiters are considered to be a single delimiter.
  52. ///
  53. /// Special cases are:
  54. /// -# The empty string is considered to be zero tokens.
  55. /// -# A string comprising nothing but delimiters is considered to be zero
  56. /// tokens.
  57. ///
  58. /// The reasoning behind this is that the string can be thought of as having
  59. /// invisible leading and trailing delimiter characters. Therefore both cases
  60. /// reduce to a set of contiguous delimiters, which are considered a single
  61. /// delimiter (so getting rid of the string).
  62. /// Optional escape allows to escape delimiter characters (and *only* them
  63. /// and the escape character itself) using backslash.
  64. ///
  65. /// We could use Boost for this, but this (simple) function eliminates one
  66. /// dependency in the code.
  67. ///
  68. /// \param text String to be split. Passed by value as the internal copy is
  69. /// altered during the processing.
  70. /// \param delim Delimiter characters
  71. /// \param escape Use backslash to escape delimiter characters
  72. ///
  73. /// \return Vector of tokens.
  74. std::vector<std::string> tokens(const std::string& text,
  75. const std::string& delim = std::string(" \t\n"),
  76. bool escape = false);
  77. /// \brief Uppercase Character
  78. ///
  79. /// Used in uppercase() to pass as an argument to std::transform(). The
  80. /// function std::toupper() can't be used as it takes an "int" as its argument;
  81. /// this confuses the template expansion mechanism because dereferencing a
  82. /// string::iterator returns a char.
  83. ///
  84. /// \param chr Character to be upper-cased.
  85. ///
  86. /// \return Uppercase version of the argument
  87. inline char toUpper(char chr) {
  88. return (static_cast<char>(std::toupper(static_cast<int>(chr))));
  89. }
  90. /// \brief Uppercase String
  91. ///
  92. /// A convenience function to uppercase a string.
  93. ///
  94. /// \param text String to be upper-cased.
  95. inline void uppercase(std::string& text) {
  96. std::transform(text.begin(), text.end(), text.begin(),
  97. isc::util::str::toUpper);
  98. }
  99. /// \brief Lowercase Character
  100. ///
  101. /// Used in lowercase() to pass as an argument to std::transform(). The
  102. /// function std::tolower() can't be used as it takes an "int" as its argument;
  103. /// this confuses the template expansion mechanism because dereferencing a
  104. /// string::iterator returns a char.
  105. ///
  106. /// \param chr Character to be lower-cased.
  107. ///
  108. /// \return Lowercase version of the argument
  109. inline char toLower(char chr) {
  110. return (static_cast<char>(std::tolower(static_cast<int>(chr))));
  111. }
  112. /// \brief Lowercase String
  113. ///
  114. /// A convenience function to lowercase a string
  115. ///
  116. /// \param text String to be lower-cased.
  117. inline void lowercase(std::string& text) {
  118. std::transform(text.begin(), text.end(), text.begin(),
  119. isc::util::str::toLower);
  120. }
  121. /// \brief Apply Formatting
  122. ///
  123. /// Given a printf-style format string containing only "%s" place holders
  124. /// (others are ignored) and a vector of strings, this produces a single string
  125. /// with the placeholders replaced.
  126. ///
  127. /// \param format Format string
  128. /// \param args Vector of argument strings
  129. ///
  130. /// \return Resultant string
  131. std::string format(const std::string& format,
  132. const std::vector<std::string>& args);
  133. /// \brief Returns one token from the given stringstream
  134. ///
  135. /// Using the >> operator, with basic error checking
  136. ///
  137. /// \exception StringTokenError if the token cannot be read from the stream
  138. ///
  139. /// \param iss stringstream to read one token from
  140. ///
  141. /// \return the first token read from the stringstream
  142. std::string getToken(std::istringstream& iss);
  143. /// \brief Converts a string token to an *unsigned* integer.
  144. ///
  145. /// The value is converted using a lexical cast, with error and bounds
  146. /// checking.
  147. ///
  148. /// NumType is a *signed* integral type (e.g. int32_t) that is sufficiently
  149. /// wide to store resulting integers.
  150. ///
  151. /// BitSize is the maximum number of bits that the resulting integer can take.
  152. /// This function first checks whether the given token can be converted to
  153. /// an integer of NumType type. It then confirms the conversion result is
  154. /// within the valid range, i.e., [0, 2^BitSize - 1]. The second check is
  155. /// necessary because lexical_cast<T> where T is an unsigned integer type
  156. /// doesn't correctly reject negative numbers when compiled with SunStudio.
  157. ///
  158. /// \exception StringTokenError if the value is out of range, or if it
  159. /// could not be converted
  160. ///
  161. /// \param num_token the string token to convert
  162. ///
  163. /// \return the converted value, of type NumType
  164. template <typename NumType, int BitSize>
  165. NumType
  166. tokenToNum(const std::string& num_token) {
  167. NumType num;
  168. try {
  169. num = boost::lexical_cast<NumType>(num_token);
  170. } catch (const boost::bad_lexical_cast&) {
  171. isc_throw(StringTokenError, "Invalid SRV numeric parameter: " <<
  172. num_token);
  173. }
  174. if (num < 0 || num >= (static_cast<NumType>(1) << BitSize)) {
  175. isc_throw(StringTokenError, "Numeric SRV parameter out of range: " <<
  176. num);
  177. }
  178. return (num);
  179. }
  180. /// \brief Converts a string in quotes into vector.
  181. ///
  182. /// A converted string is first trimmed. If a trimmed string is in
  183. /// quotes, the quotes are removed and the resulting string is copied
  184. /// into a vector. If the string is not in quotes, an empty vector is
  185. /// returned.
  186. ///
  187. /// The resulting string is copied to a vector and returned.
  188. ///
  189. /// This function is intended to be used by the server configuration
  190. /// parsers to convert string values surrounded with quotes into
  191. /// binary form.
  192. ///
  193. /// \param quoted_string String to be converted.
  194. /// \return Vector containing converted string or empty string if
  195. /// input string didn't contain expected quote characters.
  196. std::vector<uint8_t>
  197. quotedStringToBinary(const std::string& quoted_string);
  198. /// \brief Converts a string of hexadecimal digits with colons into
  199. /// a vector.
  200. ///
  201. /// This function supports the following formats:
  202. /// - yy:yy:yy:yy:yy
  203. /// - y:y:y:y:y
  204. /// - y:yy:yy:y:y
  205. ///
  206. /// If the decoded string doesn't match any of the supported formats,
  207. /// an exception is thrown.
  208. ///
  209. /// \param hex_string Input string.
  210. /// \param binary Vector receiving converted string into binary.
  211. /// \throw isc::BadValue if the format of the input string is invalid.
  212. void
  213. decodeColonSeparatedHexString(const std::string& hex_string,
  214. std::vector<uint8_t>& binary);
  215. /// \brief Converts a formatted string of hexadecimal digits into
  216. /// a vector.
  217. ///
  218. /// This function supports formats supported by
  219. /// @ref decodeColonSeparatedHexString and the following additional
  220. /// formats:
  221. /// - yyyyyyyyyy
  222. /// - 0xyyyyyyyyyy
  223. ///
  224. /// If there is an odd number of hexadecimal digits in the input
  225. /// string, the '0' is prepended to the string before decoding.
  226. ///
  227. /// \param hex_string Input string.
  228. /// \param binary Vector receiving converted string into binary.
  229. /// \throw isc::BadValue if the format of the input string is invalid.
  230. void
  231. decodeFormattedHexString(const std::string& hex_string,
  232. std::vector<uint8_t>& binary);
  233. } // namespace str
  234. } // namespace util
  235. } // namespace isc
  236. #endif // STRUTIL_H