request_parser.h 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457
  1. // Copyright (C) 2016 Internet Systems Consortium, Inc. ("ISC")
  2. //
  3. // This Source Code Form is subject to the terms of the Mozilla Public
  4. // License, v. 2.0. If a copy of the MPL was not distributed with this
  5. // file, You can obtain one at http://mozilla.org/MPL/2.0/.
  6. #ifndef HTTP_REQUEST_PARSER_H
  7. #define HTTP_REQUEST_PARSER_H
  8. #include <exceptions/exceptions.h>
  9. #include <http/request.h>
  10. #include <util/state_model.h>
  11. #include <boost/function.hpp>
  12. #include <list>
  13. #include <stdint.h>
  14. #include <string>
  15. namespace isc {
  16. namespace http {
  17. /// @brief Exception thrown when an error during parsing HTTP request
  18. /// has occurred.
  19. ///
  20. /// The most common errors are due to receiving malformed requests.
  21. class HttpRequestParserError : public Exception {
  22. public:
  23. HttpRequestParserError(const char* file, size_t line, const char* what) :
  24. isc::Exception(file, line, what) { };
  25. };
  26. /// @brief A generic parser for HTTP requests.
  27. ///
  28. /// This class implements a parser for HTTP requests. The parser derives from
  29. /// @ref isc::util::StateModel class and implements its own state machine on
  30. /// top of it. The states of the parser reflect various parts of the HTTP
  31. /// message being parsed, e.g. parsing HTTP method, parsing URI, parsing
  32. /// message body etc. The descriptions of all parser states are provided
  33. /// below together with the constants defining these states.
  34. ///
  35. /// HTTP uses TCP as a transport which is asynchronous in nature, i.e. the
  36. /// HTTP message is received in chunks and multiple TCP connections can be
  37. /// established at the same time. Multiplexing between these connections
  38. /// requires providing a separate state machine per connection to "remember"
  39. /// the state of each transaction when the parser is waiting for asynchronous
  40. /// data to be delivered. While the parser is waiting for the data, it can
  41. /// parse requests received over other connections. This class provides means
  42. /// for parsing partial data received over the specific connection and
  43. /// interrupting data parsing to switch to a different context.
  44. ///
  45. /// The request parser validates the syntax of the received message as it
  46. /// progresses with parsing the data. Though, it doesn't interpret the received
  47. /// data until the whole message is parsed. In most cases we want to apply some
  48. /// restrictions on the message content, e.g. Kea Control API requires that
  49. /// commands are sent using HTTP POST, with a JSON command being carried in a
  50. /// message body. The parser doesn't verify if the message meets these
  51. /// restrictions until the whole message is parsed, i.e. stored in the
  52. /// @ref HttpRequestContext object. This object is associated with a
  53. /// @ref HttpRequest object (or its derivation). When the parsing is completed,
  54. /// the @ref HttpRequest::create method is called to retrieve the data from
  55. /// the @ref HttpRequestContext and interpret the data. In particular, the
  56. /// @ref HttpRequest or its derivation checks if the received message meets
  57. /// desired restrictions.
  58. ///
  59. /// Kea Control API uses @ref PostHttpRequestJson class (which derives from the
  60. /// @ref HttpRequest) to interpret received request. This class requires
  61. /// that the HTTP request uses POST method and contains the following headers:
  62. /// - Content-Type: application/json,
  63. /// - Content-Length
  64. ///
  65. /// If any of these restrictions is not met in the received message, an
  66. /// exception will be thrown, thereby @ref HttpRequestParser will fail parsing
  67. /// the message.
  68. ///
  69. /// A new method @ref HttpRequestParser::poll has been created to run the
  70. /// parser's state machine as long as there are unparsed data in the parser's
  71. /// internal buffer. This method returns control to the caller when the parser
  72. /// runs out of data in this buffer. The caller must feed the buffer by calling
  73. /// @ref HttpRequestParser::postBuffer and then run @ref HttpRequestParser::poll
  74. /// again.
  75. ///
  76. /// In case the caller provides more data than indicated by the "Content-Length"
  77. /// header the parser will return from poll() after parsing the data which
  78. /// constitute the HTTP request and not parse the extraneous data. The caller
  79. /// should test the @ref HttpRequestParser::needData and
  80. /// @ref HttpRequestParser::httpParseOk to determine whether parsing has
  81. /// completed.
  82. ///
  83. /// The @ref util::StateModel::runModel must not be used to run the
  84. /// @ref HttpRequestParser state machine, thus it is made private method.
  85. class HttpRequestParser : public util::StateModel {
  86. public:
  87. /// @name States supported by the HttpRequestParser.
  88. ///
  89. //@{
  90. /// @brief State indicating a beginning of parsing.
  91. static const int RECEIVE_START_ST = SM_DERIVED_STATE_MIN + 1;
  92. /// @brief Parsing HTTP method, e.g. GET, POST etc.
  93. static const int HTTP_METHOD_ST = SM_DERIVED_STATE_MIN + 2;
  94. /// @brief Parsing URI.
  95. static const int HTTP_URI_ST = SM_DERIVED_STATE_MIN + 3;
  96. /// @brief Parsing letter "H" of "HTTP".
  97. static const int HTTP_VERSION_H_ST = SM_DERIVED_STATE_MIN + 4;
  98. /// @brief Parsing first occurrence of "T" in "HTTP".
  99. static const int HTTP_VERSION_T1_ST = SM_DERIVED_STATE_MIN + 5;
  100. /// @brief Parsing second occurrence of "T" in "HTTP".
  101. static const int HTTP_VERSION_T2_ST = SM_DERIVED_STATE_MIN + 6;
  102. /// @brief Parsing letter "P" in "HTTP".
  103. static const int HTTP_VERSION_P_ST = SM_DERIVED_STATE_MIN + 7;
  104. /// @brief Parsing slash character in "HTTP/Y.X"
  105. static const int HTTP_VERSION_SLASH_ST = SM_DERIVED_STATE_MIN + 8;
  106. /// @brief Starting to parse major HTTP version number.
  107. static const int HTTP_VERSION_MAJOR_START_ST = SM_DERIVED_STATE_MIN + 9;
  108. /// @brief Parsing major HTTP version number.
  109. static const int HTTP_VERSION_MAJOR_ST = SM_DERIVED_STATE_MIN + 10;
  110. /// @brief Starting to parse minor HTTP version number.
  111. static const int HTTP_VERSION_MINOR_START_ST = SM_DERIVED_STATE_MIN + 11;
  112. /// @brief Parsing minor HTTP version number.
  113. static const int HTTP_VERSION_MINOR_ST = SM_DERIVED_STATE_MIN + 12;
  114. /// @brief Parsing first new line (after HTTP version number).
  115. static const int EXPECTING_NEW_LINE1_ST = SM_DERIVED_STATE_MIN + 13;
  116. /// @brief Starting to parse a header line.
  117. static const int HEADER_LINE_START_ST = SM_DERIVED_STATE_MIN + 14;
  118. /// @brief Parsing LWS (Linear White Space), i.e. new line with a space
  119. /// or tab character while parsing a HTTP header.
  120. static const int HEADER_LWS_ST = SM_DERIVED_STATE_MIN + 15;
  121. /// @brief Parsing header name.
  122. static const int HEADER_NAME_ST = SM_DERIVED_STATE_MIN + 16;
  123. /// @brief Parsing space before header value.
  124. static const int SPACE_BEFORE_HEADER_VALUE_ST = SM_DERIVED_STATE_MIN + 17;
  125. /// @brief Parsing header value.
  126. static const int HEADER_VALUE_ST = SM_DERIVED_STATE_MIN + 18;
  127. /// @brief Expecting new line after parsing header value.
  128. static const int EXPECTING_NEW_LINE2_ST = SM_DERIVED_STATE_MIN + 19;
  129. /// @brief Expecting second new line marking end of HTTP headers.
  130. static const int EXPECTING_NEW_LINE3_ST = SM_DERIVED_STATE_MIN + 20;
  131. /// @brief Parsing body of a HTTP message.
  132. static const int HTTP_BODY_ST = SM_DERIVED_STATE_MIN + 21;
  133. /// @brief Parsing successfully completed.
  134. static const int HTTP_PARSE_OK_ST = SM_DERIVED_STATE_MIN + 100;
  135. /// @brief Parsing failed.
  136. static const int HTTP_PARSE_FAILED_ST = SM_DERIVED_STATE_MIN + 101;
  137. //@}
  138. /// @name Events used during HTTP message parsing.
  139. ///
  140. //@{
  141. /// @brief Chunk of data successfully read and parsed.
  142. static const int DATA_READ_OK_EVT = SM_DERIVED_EVENT_MIN + 1;
  143. /// @brief Unable to proceed with parsing until new data is provided.
  144. static const int NEED_MORE_DATA_EVT = SM_DERIVED_EVENT_MIN + 2;
  145. /// @brief New data provided and parsing should continue.
  146. static const int MORE_DATA_PROVIDED_EVT = SM_DERIVED_EVENT_MIN + 3;
  147. /// @brief Parsing HTTP request successful.
  148. static const int HTTP_PARSE_OK_EVT = SM_DERIVED_EVENT_MIN + 100;
  149. /// @brief Parsing HTTP request failed.
  150. static const int HTTP_PARSE_FAILED_EVT = SM_DERIVED_EVENT_MIN + 101;
  151. //@}
  152. /// @brief Constructor.
  153. ///
  154. /// Creates new instance of the parser.
  155. ///
  156. /// @param request Reference to the @ref HttpRequest object or its
  157. /// derivation that should be used to validate the parsed request and
  158. /// to be used as a container for the parsed request.
  159. HttpRequestParser(HttpRequest& request);
  160. /// @brief Initialize the state model for parsing.
  161. ///
  162. /// This method must be called before parsing the request, i.e. before
  163. /// calling @ref HttpRequestParser::poll. It initializes dictionaries of
  164. /// states and events, and sets the initial model state to RECEIVE_START_ST.
  165. void initModel();
  166. /// @brief Run the parser as long as the amount of data is sufficient.
  167. ///
  168. /// The data to be parsed should be provided by calling
  169. /// @ref HttpRequestParser::postBuffer. When the parser reaches the end of
  170. /// the data buffer the @ref HttpRequestParser::poll sets the next event to
  171. /// @ref NEED_MORE_DATA_EVT and returns. The caller should then invoke
  172. /// @ref HttpRequestParser::postBuffer again to provide more data to the
  173. /// parser, and call @ref HttpRequestParser::poll to continue parsing.
  174. ///
  175. /// This method also returns when parsing completes or fails. The last
  176. /// event can be examined to check whether parsing was successful or not.
  177. void poll();
  178. /// @brief Returns true if the parser needs more data to continue.
  179. ///
  180. /// @return true if the next event is NEED_MORE_DATA_EVT.
  181. bool needData() const;
  182. /// @brief Returns true if a request has been parsed successfully.
  183. bool httpParseOk() const;
  184. /// @brief Returns error message.
  185. std::string getErrorMessage() const {
  186. return (error_message_);
  187. }
  188. /// @brief Provides more input data to the parser.
  189. ///
  190. /// This method must be called prior to calling @ref HttpRequestParser::poll
  191. /// to deliver data to be parsed. HTTP requests are received over TCP and
  192. /// multiple reads may be necessary to retrieve the entire request. There is
  193. /// no need to accumulate the entire request to start parsing it. A chunk
  194. /// of data can be provided to the parser using this method and parsed right
  195. /// away using @ref HttpRequestParser::poll.
  196. ///
  197. /// @param buf A pointer to the buffer holding the data.
  198. /// @param buf_size Size of the data within the buffer.
  199. void postBuffer(const void* buf, const size_t buf_size);
  200. private:
  201. /// @brief Make @ref runModel private to make sure that the caller uses
  202. /// @ref poll method instead.
  203. using StateModel::runModel;
  204. /// @brief Define events used by the parser.
  205. virtual void defineEvents();
  206. /// @brief Verifies events used by the parser.
  207. virtual void verifyEvents();
  208. /// @brief Defines states of the parser.
  209. virtual void defineStates();
  210. /// @brief Transition parser to failure state.
  211. ///
  212. /// This method transitions the parser to @ref HTTP_PARSE_FAILED_ST and
  213. /// sets next event to HTTP_PARSE_FAILED_EVT.
  214. ///
  215. /// @param error_msg Error message explaining the failure.
  216. void parseFailure(const std::string& error_msg);
  217. /// @brief A method called when parsing fails.
  218. ///
  219. /// @param explanation Error message explaining the reason for parsing
  220. /// failure.
  221. virtual void onModelFailure(const std::string& explanation);
  222. /// @brief Retrieves next byte of data from the buffer.
  223. ///
  224. /// During normal operation, when there is no more data in the buffer,
  225. /// the parser sets NEED_MORE_DATA_EVT as next event to signal the need for
  226. /// calling @ref HttpRequestParser::postBuffer.
  227. ///
  228. /// @throw HttpRequestParserError If current event is already set to
  229. /// NEED_MORE_DATA_EVT or MORE_DATA_PROVIDED_EVT. In the former case, it
  230. /// indicates that the caller failed to provide new data using
  231. /// @ref HttpRequestParser::postBuffer. The latter case is highly unlikely
  232. /// as it indicates that no new data were provided but the state of the
  233. /// parser was changed from NEED_MORE_DATA_EVT or the data were provided
  234. /// but the data buffer is empty. In both cases, it is an internal server
  235. /// error.
  236. char getNextFromBuffer();
  237. /// @brief This method is called when invalid event occurred in a particular
  238. /// parser state.
  239. ///
  240. /// This method simply throws @ref HttpRequestParserError informing about
  241. /// invalid event occurring for the particular parser state. The error
  242. /// message includes the name of the handler in which the exception
  243. /// has been thrown. It also includes the event which caused the
  244. /// exception.
  245. ///
  246. /// @param handler_name Name of the handler in which the exception is
  247. /// thrown.
  248. /// @param event An event which caused the exception.
  249. ///
  250. /// @throw HttpRequestParserError.
  251. void invalidEventError(const std::string& handler_name,
  252. const unsigned int event);
  253. /// @brief Generic parser handler which reads a single byte of data and
  254. /// parses it using specified callback function.
  255. ///
  256. /// This generic handler is used in most of the parser states to parse a
  257. /// single byte of input data. If there is no more data it simply returns.
  258. /// Otherwise, if the next event is DATA_READ_OK_EVT or
  259. /// MORE_DATA_PROVIDED_EVT, it calls the provided callback function to
  260. /// parse the new byte of data. For all other states it throws an exception.
  261. ///
  262. /// @param handler_name Name of the handler function which called this
  263. /// method.
  264. /// @param after_read_logic Callback function to parse the byte of data.
  265. /// This callback function implements state specific logic.
  266. ///
  267. /// @throw HttpRequestParserError when invalid event occurred.
  268. void stateWithReadHandler(const std::string& handler_name,
  269. boost::function<void(const char c)>
  270. after_read_logic);
  271. /// @name State handlers.
  272. ///
  273. //@{
  274. /// @brief Handler for RECEIVE_START_ST.
  275. void receiveStartHandler();
  276. /// @brief Handler for HTTP_METHOD_ST.
  277. void httpMethodHandler();
  278. /// @brief Handler for HTTP_URI_ST.
  279. void uriHandler();
  280. /// @brief Handler for states parsing "HTTP" string within the first line
  281. /// of the HTTP request.
  282. ///
  283. /// @param expected_letter One of the 'H', 'T', 'P'.
  284. /// @param next_state A state to which the parser should transition after
  285. /// parsing the character.
  286. void versionHTTPHandler(const char expected_letter,
  287. const unsigned int next_state);
  288. /// @brief Handler for HTTP_VERSION_MAJOR_START_ST and
  289. /// HTTP_VERSION_MINOR_START_ST.
  290. ///
  291. /// This handler calculates version number using the following equation:
  292. /// @code
  293. /// storage = storage * 10 + c - '0';
  294. /// @endcode
  295. ///
  296. /// @param next_state State to which the parser should transition.
  297. /// @param [out] storage Reference to a number holding current product of
  298. /// parsing major or minor version number.
  299. void versionNumberStartHandler(const unsigned int next_state,
  300. unsigned int* storage);
  301. /// @brief Handler for HTTP_VERSION_MAJOR_ST and HTTP_VERSION_MINOR_ST.
  302. ///
  303. /// This handler calculates version number using the following equation:
  304. /// @code
  305. /// storage = storage * 10 + c - '0';
  306. /// @endcode
  307. ///
  308. /// @param following_character Character following the version number, i.e.
  309. /// '.' for major version, \r for minor version.
  310. /// @param next_state State to which the parser should transition.
  311. /// @param [out] storage Pointer to a number holding current product of
  312. /// parsing major or minor version number.
  313. void versionNumberHandler(const char following_character,
  314. const unsigned int next_state,
  315. unsigned int* const storage);
  316. /// @brief Handler for states related to new lines.
  317. ///
  318. /// If the next_state is HTTP_PARSE_OK_ST it indicates that the parsed
  319. /// value is a 3rd new line within request HTTP message. In this case the
  320. /// handler calls @ref HttpRequest::create to validate the received message
  321. /// (excluding body). The hander then reads the "Content-Length" header to
  322. /// check if the request contains a body. If the "Content-Length" is greater
  323. /// than zero, the parser transitions to HTTP_BODY_ST. If the
  324. /// "Content-Length" doesn't exist the parser transitions to
  325. /// HTTP_PARSE_OK_ST.
  326. ///
  327. /// @param next_state A state to which parser should transition.
  328. void expectingNewLineHandler(const unsigned int next_state);
  329. /// @brief Handler for HEADER_LINE_START_ST.
  330. void headerLineStartHandler();
  331. /// @brief Handler for HEADER_LWS_ST.
  332. void headerLwsHandler();
  333. /// @brief Handler for HEADER_NAME_ST.
  334. void headerNameHandler();
  335. /// @brief Handler for SPACE_BEFORE_HEADER_VALUE_ST.
  336. void spaceBeforeHeaderValueHandler();
  337. /// @brief Handler for HEADER_VALUE_ST.
  338. void headerValueHandler();
  339. /// @brief Handler for HTTP_BODY_ST.
  340. void bodyHandler();
  341. /// @brief Handler for HTTP_PARSE_OK_ST and HTTP_PARSE_FAILED_ST.
  342. ///
  343. /// If parsing is successful, it calls @ref HttpRequest::create to validate
  344. /// the HTTP request. In both cases it transitions the parser to the END_ST.
  345. void parseEndedHandler();
  346. /// @brief Tries to read next byte from buffer.
  347. ///
  348. /// @param [out] next A reference to the variable where read data should be
  349. /// stored.
  350. ///
  351. /// @return true if character was successfully read, false otherwise.
  352. bool popNextFromBuffer(char& next);
  353. /// @brief Checks if specified value is a character.
  354. ///
  355. /// @return true, if specified value is a character.
  356. bool isChar(const char c) const;
  357. /// @brief Checks if specified value is a control value.
  358. ///
  359. /// @return true, if specified value is a control value.
  360. bool isCtl(const char c) const;
  361. /// @brief Checks if specified value is a special character.
  362. ///
  363. /// @return true, if specified value is a special character.
  364. bool isSpecial(const char c) const;
  365. /// @brief Internal buffer from which parser reads data.
  366. std::list<char> buffer_;
  367. /// @brief Reference to the request object specified in the constructor.
  368. HttpRequest& request_;
  369. /// @brief Pointer to the internal context of the @ref HttpRequest object.
  370. HttpRequestContextPtr context_;
  371. /// @brief Error message set by @ref onModelFailure.
  372. std::string error_message_;
  373. };
  374. } // namespace http
  375. } // namespace isc
  376. #endif // HTTP_REQUEST_PARSER_H