master_lexer.h 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682
  1. // Copyright (C) 2012 Internet Systems Consortium, Inc. ("ISC")
  2. //
  3. // Permission to use, copy, modify, and/or distribute this software for any
  4. // purpose with or without fee is hereby granted, provided that the above
  5. // copyright notice and this permission notice appear in all copies.
  6. //
  7. // THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
  8. // REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
  9. // AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
  10. // INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
  11. // LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
  12. // OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
  13. // PERFORMANCE OF THIS SOFTWARE.
  14. #ifndef MASTER_LEXER_H
  15. #define MASTER_LEXER_H 1
  16. #include <dns/exceptions.h>
  17. #include <istream>
  18. #include <string>
  19. #include <stdint.h>
  20. #include <boost/noncopyable.hpp>
  21. namespace isc {
  22. namespace dns {
  23. namespace master_lexer_internal {
  24. class State;
  25. }
  26. /// \brief Tokens for \c MasterLexer
  27. ///
  28. /// This is a simple value-class encapsulating a type of a lexer token and
  29. /// (if it has a value) its value. Essentially, the class provides
  30. /// constructors corresponding to different types of tokens, and corresponding
  31. /// getter methods. The type and value are fixed at the time of construction
  32. /// and will never be modified throughout the lifetime of the object.
  33. /// The getter methods are still provided to maximize the safety; an
  34. /// application cannot refer to a value that is invalid for the type of token.
  35. ///
  36. /// This class is intentionally implemented as copyable and assignable
  37. /// (using the default version of copy constructor and assignment operator),
  38. /// but it's mainly for internal implementation convenience. Applications will
  39. /// simply refer to Token object as a reference via the \c MasterLexer class.
  40. class MasterToken {
  41. public:
  42. /// \brief Enumeration for token types
  43. ///
  44. /// \note At the time of initial implementation, all numeric tokens
  45. /// that would be extracted from \c MasterLexer should be represented
  46. /// as an unsigned 32-bit integer. If we see the need for larger integers
  47. /// or negative numbers, we can then extend the token types.
  48. enum Type {
  49. END_OF_LINE, ///< End of line detected
  50. END_OF_FILE, ///< End of file detected
  51. INITIAL_WS, ///< White spaces at the beginning of a line after an
  52. ///< end of line or at the beginning of file (if asked
  53. // for detecting it)
  54. NOVALUE_TYPE_MAX = INITIAL_WS, ///< Max integer corresponding to
  55. /// no-value (type only) types.
  56. /// Mainly for internal use.
  57. STRING, ///< A single string
  58. QSTRING, ///< A single string quoted by double-quotes (").
  59. NUMBER, ///< A decimal number (unsigned 32-bit)
  60. ERROR ///< Error detected in getting a token
  61. };
  62. /// \brief Enumeration for lexer error codes
  63. enum ErrorCode {
  64. NOT_STARTED, ///< The lexer is just initialized and has no token
  65. UNBALANCED_PAREN, ///< Unbalanced parentheses detected
  66. UNEXPECTED_END, ///< The lexer reaches the end of line or file
  67. /// unexpectedly
  68. UNBALANCED_QUOTES, ///< Unbalanced quotations detected
  69. NO_TOKEN_PRODUCED, ///< No token was produced. This means programmer
  70. /// error and should never get out of the lexer.
  71. NUMBER_OUT_OF_RANGE, ///< Number was out of range
  72. BAD_NUMBER, ///< Number is expected but not recognized
  73. MAX_ERROR_CODE ///< Max integer corresponding to valid error codes.
  74. /// (excluding this one). Mainly for internal use.
  75. };
  76. /// \brief A simple representation of a range of a string.
  77. ///
  78. /// This is a straightforward pair of the start pointer of a string
  79. /// and its length. The \c STRING and \c QSTRING types of tokens
  80. /// will be primarily represented in this form.
  81. ///
  82. /// Any character can be stored in the valid range of the region.
  83. /// In particular, there can be a nul character (\0) in the middle of
  84. /// the region. So the usual string manipulation API may not work
  85. /// as expected.
  86. ///
  87. /// The `MasterLexer` implementation ensures that there are at least
  88. /// len + 1 bytes of valid memory region starting from beg, and that
  89. /// beg[len] is \0. This means the application can use the bytes as a
  90. /// validly nul-terminated C string if there is no intermediate nul
  91. /// character. Note also that due to this property beg is always non
  92. /// NULL; for an empty string len will be set to 0 and beg[0] is \0.
  93. struct StringRegion {
  94. const char* beg; ///< The start address of the string
  95. size_t len; ///< The length of the string in bytes
  96. };
  97. /// \brief Constructor for non-value type of token.
  98. ///
  99. /// \throw InvalidParameter A value type token is specified.
  100. /// \param type The type of the token. It must indicate a non-value
  101. /// type (not larger than \c NOVALUE_TYPE_MAX).
  102. explicit MasterToken(Type type) : type_(type) {
  103. if (type > NOVALUE_TYPE_MAX) {
  104. isc_throw(InvalidParameter, "Token per-type constructor "
  105. "called with invalid type: " << type);
  106. }
  107. }
  108. /// \brief Constructor for string and quoted-string types of token.
  109. ///
  110. /// The optional \c quoted parameter specifies whether it's a quoted or
  111. /// non quoted string.
  112. ///
  113. /// The string is specified as a pair of a pointer to the start address
  114. /// and its length. Any character can be contained in any position of
  115. /// the valid range (see \c StringRegion).
  116. ///
  117. /// When it's a quoted string, the quotation marks must be excluded
  118. /// from the specified range.
  119. ///
  120. /// \param str_beg The start address of the string
  121. /// \param str_len The size of the string in bytes
  122. /// \param quoted true if it's a quoted string; false otherwise.
  123. MasterToken(const char* str_beg, size_t str_len, bool quoted = false) :
  124. type_(quoted ? QSTRING : STRING)
  125. {
  126. val_.str_region_.beg = str_beg;
  127. val_.str_region_.len = str_len;
  128. }
  129. /// \brief Constructor for number type of token.
  130. ///
  131. /// \brief number An unsigned 32-bit integer corresponding to the token
  132. /// value.
  133. explicit MasterToken(uint32_t number) : type_(NUMBER) {
  134. val_.number_ = number;
  135. }
  136. /// \brief Constructor for error type of token.
  137. ///
  138. /// \throw InvalidParameter Invalid error code value is specified.
  139. /// \brief error_code A pre-defined constant of \c ErrorCode.
  140. explicit MasterToken(ErrorCode error_code) : type_(ERROR) {
  141. if (!(error_code < MAX_ERROR_CODE)) {
  142. isc_throw(InvalidParameter, "Invalid master lexer error code: "
  143. << error_code);
  144. }
  145. val_.error_code_ = error_code;
  146. }
  147. /// \brief Return the token type.
  148. ///
  149. /// \throw none
  150. Type getType() const { return (type_); }
  151. /// \brief Return the value of a string-variant token.
  152. ///
  153. /// \throw InvalidOperation Called on a non string-variant types of token.
  154. /// \return A reference to \c StringRegion corresponding to the string
  155. /// token value.
  156. const StringRegion& getStringRegion() const {
  157. if (type_ != STRING && type_ != QSTRING) {
  158. isc_throw(InvalidOperation,
  159. "Token::getStringRegion() for non string-variant type");
  160. }
  161. return (val_.str_region_);
  162. }
  163. /// \brief Return the value of a string-variant token as a string object.
  164. ///
  165. /// Note that the underlying string may contain a nul (\0) character
  166. /// in the middle. The returned string object will contain all characters
  167. /// of the valid range of the underlying string. So some string
  168. /// operations such as c_str() may not work as expected.
  169. ///
  170. /// \throw InvalidOperation Called on a non string-variant types of token.
  171. /// \throw std::bad_alloc Resource allocation failure in constructing the
  172. /// string object.
  173. /// \return A std::string object corresponding to the string token value.
  174. std::string getString() const {
  175. std::string ret;
  176. getString(ret);
  177. return (ret);
  178. }
  179. /// \brief Fill in a string with the value of a string-variant token.
  180. ///
  181. /// This is similar to the other version of \c getString(), but
  182. /// the caller is supposed to pass a placeholder string object.
  183. /// This will be more efficient if the caller uses the same
  184. /// \c MasterLexer repeatedly and needs to get string token in the
  185. /// form of a string object many times as this version could reuse
  186. /// the existing internal storage of the passed string.
  187. ///
  188. /// Any existing content of the passed string will be removed.
  189. ///
  190. /// \throw InvalidOperation Called on a non string-variant types of token.
  191. /// \throw std::bad_alloc Resource allocation failure in constructing the
  192. /// string object.
  193. ///
  194. /// \param ret A string object to be filled with the token string.
  195. void getString(std::string& ret) const {
  196. if (type_ != STRING && type_ != QSTRING) {
  197. isc_throw(InvalidOperation,
  198. "Token::getString() for non string-variant type");
  199. }
  200. ret.assign(val_.str_region_.beg,
  201. val_.str_region_.beg + val_.str_region_.len);
  202. }
  203. /// \brief Return the value of a string-variant token as a string object.
  204. ///
  205. /// \throw InvalidOperation Called on a non number type of token.
  206. /// \return The integer corresponding to the number token value.
  207. uint32_t getNumber() const {
  208. if (type_ != NUMBER) {
  209. isc_throw(InvalidOperation,
  210. "Token::getNumber() for non number type");
  211. }
  212. return (val_.number_);
  213. }
  214. /// \brief Return the error code of a error type token.
  215. ///
  216. /// \throw InvalidOperation Called on a non error type of token.
  217. /// \return The error code of the token.
  218. ErrorCode getErrorCode() const {
  219. if (type_ != ERROR) {
  220. isc_throw(InvalidOperation,
  221. "Token::getErrorCode() for non error type");
  222. }
  223. return (val_.error_code_);
  224. };
  225. /// \brief Return a textual description of the error of a error type token.
  226. ///
  227. /// The returned string would be useful to produce a log message when
  228. /// a zone file parser encounters an error.
  229. ///
  230. /// \throw InvalidOperation Called on a non error type of token.
  231. /// \throw std::bad_alloc Resource allocation failure in constructing the
  232. /// string object.
  233. /// \return A string object that describes the meaning of the error.
  234. std::string getErrorText() const;
  235. private:
  236. Type type_; // this is not const so the class can be assignable
  237. // We use a union to represent different types of token values via the
  238. // unified Token class. The class integrity should ensure valid operation
  239. // on the union; getter methods should only refer to the member set at
  240. // the construction.
  241. union {
  242. StringRegion str_region_;
  243. uint32_t number_;
  244. ErrorCode error_code_;
  245. } val_;
  246. };
  247. /// \brief Tokenizer for parsing DNS master files.
  248. ///
  249. /// The \c MasterLexer class provides tokenize interfaces for parsing DNS
  250. /// master files. It understands some special rules of master files as
  251. /// defined in RFC 1035, such as comments, character escaping, and multi-line
  252. /// data, and provides the user application with the actual data in a
  253. /// more convenient form such as a std::string object.
  254. ///
  255. /// In order to support the $INCLUDE notation, this class is designed to be
  256. /// able to operate on multiple files or input streams in the nested way.
  257. /// The \c pushSource() and \c popSource() methods correspond to the push
  258. /// and pop operations.
  259. ///
  260. /// While this class is public, it is less likely to be used by normal
  261. /// applications; it's mainly expected to be used within this library,
  262. /// specifically by the \c MasterLoader class and \c Rdata implementation
  263. /// classes.
  264. ///
  265. /// \note The error handling policy of this class is slightly different from
  266. /// that of other classes of this library. We generally throw an exception
  267. /// for an invalid input, whether it's more likely to be a program error or
  268. /// a "user error", which means an invalid input that comes from outside of
  269. /// the library. But, this class returns an error code for some certain
  270. /// types of user errors instead of throwing an exception. Such cases include
  271. /// a syntax error identified by the lexer or a misspelled file name that
  272. /// causes a system error at the time of open. This is based on the assumption
  273. /// that the main user of this class is a parser of master files, where
  274. /// we want to give an option to ignore some non fatal errors and continue
  275. /// the parsing. This will be useful if it just performs overall error
  276. /// checks on a master file. When the (immediate) caller needs to do explicit
  277. /// error handling, exceptions are not that a useful tool for error reporting
  278. /// because we cannot separate the normal and error cases anyway, which would
  279. /// be one major advantage when we use exceptions. And, exceptions are
  280. /// generally more expensive, either when it happens or just by being able
  281. /// to handle with \c try and \c catch (depending on the underlying
  282. /// implementation of the exception handling). For these reasons, some of
  283. /// this class does not throw for an error that would be reported as an
  284. /// exception in other classes.
  285. class MasterLexer : public boost::noncopyable {
  286. friend class master_lexer_internal::State;
  287. public:
  288. /// \brief Exception thrown when we fail to read from the input
  289. /// stream or file.
  290. class ReadError : public Unexpected {
  291. public:
  292. ReadError(const char* file, size_t line, const char* what) :
  293. Unexpected(file, line, what)
  294. {}
  295. };
  296. /// \brief Exception thrown from a wrapper version of
  297. /// \c MasterLexer::getNextToken() for non fatal errors.
  298. ///
  299. /// See the method description for more details.
  300. ///
  301. /// The \c token_ member variable (read-only) is set to a \c MasterToken
  302. /// object of type ERROR indicating the reason for the error.
  303. class LexerError : public isc::dns::Exception {
  304. public:
  305. LexerError(const char* file, size_t line, MasterToken error_token) :
  306. isc::dns::Exception(file, line, error_token.getErrorText().c_str()),
  307. token_(error_token)
  308. {}
  309. const MasterToken token_;
  310. };
  311. /// \brief Special value for input source size meaning "unknown".
  312. ///
  313. /// This constant value will be used as a return value of
  314. /// \c getTotalSourceSize() when the size of one of the pushed sources
  315. /// is unknown. Note that this value itself is a valid integer in the
  316. /// range of the type, so there's still a small possibility of
  317. /// ambiguity. In practice, however, the value should be sufficiently
  318. /// large that should eliminate the possibility.
  319. static const size_t SOURCE_SIZE_UNKNOWN;
  320. /// \brief Options for getNextToken.
  321. ///
  322. /// A compound option, indicating multiple options are set, can be
  323. /// specified using the logical OR operator (operator|()).
  324. enum Options {
  325. NONE = 0, ///< No option
  326. INITIAL_WS = 1, ///< recognize begin-of-line spaces after an
  327. ///< end-of-line
  328. QSTRING = 2, ///< recognize quoted string
  329. NUMBER = 4 ///< recognize numeric text as integer
  330. };
  331. /// \brief The constructor.
  332. ///
  333. /// \throw std::bad_alloc Internal resource allocation fails (rare case).
  334. MasterLexer();
  335. /// \brief The destructor.
  336. ///
  337. /// It internally closes any remaining input sources.
  338. ~MasterLexer();
  339. /// \brief Open a file and make it the current input source of MasterLexer.
  340. ///
  341. /// The opened file can be explicitly closed by the \c popSource() method;
  342. /// if \c popSource() is not called within the lifetime of the
  343. /// \c MasterLexer, it will be closed in the destructor.
  344. ///
  345. /// In the case possible system errors in opening the file (most likely
  346. /// because of specifying a non-existent or unreadable file), it returns
  347. /// false, and if the optional \c error parameter is non NULL, it will be
  348. /// set to a description of the error (any existing content of the string
  349. /// will be discarded). If opening the file succeeds, the given
  350. /// \c error parameter will be intact.
  351. ///
  352. /// Note that this method has two styles of error reporting: one by
  353. /// returning \c false (and setting \c error optionally) and the other
  354. /// by throwing an exception. See the note for the class description
  355. /// about the distinction.
  356. ///
  357. /// \throw InvalidParameter filename is NULL
  358. /// \param filename A non NULL string specifying a master file
  359. /// \param error If non null, a placeholder to set error description in
  360. /// case of failure.
  361. ///
  362. /// \return true if pushing the file succeeds; false otherwise.
  363. bool pushSource(const char* filename, std::string* error = NULL);
  364. /// \brief Make the given stream the current input source of MasterLexer.
  365. ///
  366. /// The caller still holds the ownership of the passed stream; it's the
  367. /// caller's responsibility to keep it valid as long as it's used in
  368. /// \c MasterLexer or to release any resource for the stream after that.
  369. /// The caller can explicitly tell \c MasterLexer to stop using the
  370. /// stream by calling the \c popSource() method.
  371. ///
  372. /// The data in \c input must be complete at the time of this call.
  373. /// The behavior of the lexer is undefined if the caller builds or adds
  374. /// data in \c input after pushing it.
  375. ///
  376. /// Except for rare case system errors such as memory allocation failure,
  377. /// this method is generally expected to be exception free. However,
  378. /// it can still throw if it encounters an unexpected failure when it
  379. /// tries to identify the "size" of the input source (see
  380. /// \c getTotalSourceSize()). It's an unexpected result unless the
  381. /// caller intentionally passes a broken stream; otherwise it would mean
  382. /// some system-dependent unexpected behavior or possibly an internal bug.
  383. /// In these cases it throws an \c Unexpected exception. Note that
  384. /// this version of the method doesn't return a boolean unlike the
  385. /// other version that takes a file name; since this failure is really
  386. /// unexpected and can be critical, it doesn't make sense to give the
  387. /// caller an option to continue (other than by explicitly catching the
  388. /// exception).
  389. ///
  390. /// \throw Unexpected An unexpected failure happens in initialization.
  391. ///
  392. /// \param input An input stream object that produces textual
  393. /// representation of DNS RRs.
  394. void pushSource(std::istream& input);
  395. /// \brief Stop using the most recently opened input source (file or
  396. /// stream).
  397. ///
  398. /// If it's a file, the previously opened file will be closed internally.
  399. /// If it's a stream, \c MasterLexer will simply stop using
  400. /// the stream; the caller can assume it will be never used in
  401. /// \c MasterLexer thereafter.
  402. ///
  403. /// This method must not be called when there is no source pushed for
  404. /// \c MasterLexer. This method is otherwise exception free.
  405. ///
  406. /// \throw isc::InvalidOperation Called with no pushed source.
  407. void popSource();
  408. /// \brief Get number of sources inside the lexer.
  409. ///
  410. /// This method never throws.
  411. size_t getSourceCount() const;
  412. /// \brief Return the name of the current input source name.
  413. ///
  414. /// If it's a file, it will be the C string given at the corresponding
  415. /// \c pushSource() call, that is, its filename. If it's a stream, it will
  416. /// be formatted as \c "stream-%p" where \c %p is hex representation
  417. /// of the address of the stream object.
  418. ///
  419. /// If there is no opened source at the time of the call, this method
  420. /// returns an empty string.
  421. ///
  422. /// \throw std::bad_alloc Resource allocation failed for string
  423. /// construction (rare case)
  424. ///
  425. /// \return A string representation of the current source (see the
  426. /// description)
  427. std::string getSourceName() const;
  428. /// \brief Return the input source line number.
  429. ///
  430. /// If there is an opened source, the return value will be a non-0
  431. /// integer indicating the line number of the current source where
  432. /// the \c MasterLexer is currently working. The expected usage of
  433. /// this value is to print a helpful error message when parsing fails
  434. /// by specifically identifying the position of the error.
  435. ///
  436. /// If there is no opened source at the time of the call, this method
  437. /// returns 0.
  438. ///
  439. /// \throw None
  440. ///
  441. /// \return The current line number of the source (see the description)
  442. size_t getSourceLine() const;
  443. /// \brief Return the total size of pushed sources.
  444. ///
  445. /// This method returns the sum of the size of sources that have been
  446. /// pushed to the lexer by the time of the call. It would give the
  447. /// caller some hint about the amount of data the lexer is working on.
  448. ///
  449. /// The size of a normal file is equal to the file size at the time of
  450. /// the source is pushed. The size of other type of input stream is
  451. /// the size of the data available in the stream at the time of the
  452. /// source is pushed.
  453. ///
  454. /// In some special cases, it's possible that the size of the file or
  455. /// stream is unknown. It happens, for example, if the standard input
  456. /// is associated with a pipe from the output of another process and it's
  457. /// specified as an input source. If the size of some of the pushed
  458. /// source is unknown, this method returns SOURCE_SIZE_UNKNOWN.
  459. ///
  460. /// The total size won't change when a source is popped. So the return
  461. /// values of this method will monotonically increase or
  462. /// \c SOURCE_SIZE_UNKNOWN; once it returns \c SOURCE_SIZE_UNKNOWN,
  463. /// any subsequent call will also result in that value, by the above
  464. /// definition.
  465. ///
  466. /// Before pushing any source, it returns 0.
  467. ///
  468. /// \throw None
  469. size_t getTotalSourceSize() const;
  470. /// \brief Return the position of lexer in the pushed sources so far.
  471. ///
  472. /// This method returns the position in terms of the number of recognized
  473. /// characters from all sources that have been pushed by the time of the
  474. /// call. Conceptually, the position in a single source is the offset
  475. /// from the beginning of the file or stream to the current "read cursor"
  476. /// of the lexer. The return value of this method is the sum of the
  477. /// positions in all the pushed sources. If any of the sources has
  478. /// already been popped, the position of the source at the time of the
  479. /// pop operation will be used for the calculation.
  480. ///
  481. /// If the lexer reaches the end for each of all the pushed sources,
  482. /// the return value should be equal to that of \c getTotalSourceSize().
  483. /// It's generally expected that a source is popped when the lexer
  484. /// reaches the end of the source. So, when the application of this
  485. /// class parses all contents of all sources, possibly with multiple
  486. /// pushes and pops, the return value of this method and
  487. /// \c getTotalSourceSize() should be identical (unless the latter
  488. /// returns SOURCE_SIZE_UNKNOWN). But this is not necessarily
  489. /// guaranteed as the application can pop a source in the middle of
  490. /// parsing it.
  491. ///
  492. /// Before pushing any source, it returns 0.
  493. ///
  494. /// The return values of this method and \c getTotalSourceSize() would
  495. /// give the caller an idea of the progress of the lexer at the time of
  496. /// the call. Note, however, that since it's not predictable whether
  497. /// more sources will be pushed after the call, the progress determined
  498. /// this way may not make much sense; it can only give an informational
  499. /// hint of the progress.
  500. ///
  501. /// Note that the conceptual "read cursor" would move backward after a
  502. /// call to \c ungetToken(), in which case this method will return a
  503. /// smaller value. That is, unlike \c getTotalSourceSize(), return
  504. /// values of this method may not always monotonically increase.
  505. ///
  506. /// \throw None
  507. size_t getPosition() const;
  508. /// \brief Parse and return another token from the input.
  509. ///
  510. /// It reads a bit of the last opened source and produces another token
  511. /// found in it.
  512. ///
  513. /// This method does not provide the strong exception guarantee. Generally,
  514. /// if it throws, the object should not be used any more and should be
  515. /// discarded. It was decided all the exceptions thrown from here are
  516. /// serious enough that aborting the loading process is the only reasonable
  517. /// recovery anyway, so the strong exception guarantee is not needed.
  518. ///
  519. /// \param options The options can be used to modify the tokenization.
  520. /// The method can be made reporting things which are usually ignored
  521. /// by this parameter. Multiple options can be passed at once by
  522. /// bitwise or (eg. option1 | option 2). See description of available
  523. /// options.
  524. /// \return Next token found in the input. Note that the token refers to
  525. /// some internal data in the lexer. It is valid only until
  526. /// getNextToken or ungetToken is called. Also, the token becomes
  527. /// invalid when the lexer is destroyed.
  528. /// \throw isc::InvalidOperation in case the source is not available. This
  529. /// may mean the pushSource() has not been called yet, or that the
  530. /// current source has been read past the end.
  531. /// \throw ReadError in case there's problem reading from the underlying
  532. /// source (eg. I/O error in the file on the disk).
  533. /// \throw std::bad_alloc in case allocation of some internal resources
  534. /// or the token fail.
  535. const MasterToken& getNextToken(Options options = NONE);
  536. /// \brief Parse the input for the expected type of token.
  537. ///
  538. /// This method is a wrapper of the other version, customized for the case
  539. /// where a particular type of token is expected as the next one.
  540. /// More specifically, it's intended to be used to get tokens for RDATA
  541. /// fields. Since most RDATA types of fixed format, the token type is
  542. /// often predictable and the method interface can be simplified.
  543. ///
  544. /// This method basically works as follows: it gets the type of the
  545. /// expected token, calls the other version of \c getNextToken(Options),
  546. /// and returns the token if it's of the expected type (due to the usage
  547. /// assumption this should be normally the case). There are some non
  548. /// trivial details though:
  549. ///
  550. /// - If the expected type is MasterToken::QSTRING, both quoted and
  551. /// unquoted strings are recognized and returned.
  552. /// - If the optional \c eol_ok parameter is \c true (very rare case),
  553. /// MasterToken::END_OF_LINE and MasterToken::END_OF_FILE are recognized
  554. /// and returned if they are found instead of the expected type of
  555. /// token.
  556. /// - If the next token is not of the expected type (including the case
  557. /// a number is expected but it's out of range), ungetToken() is
  558. /// internally called so the caller can re-read that token.
  559. /// - If other types or errors (such as unbalanced parentheses) are
  560. /// detected, the erroneous part isn't "ungotten"; the caller can
  561. /// continue parsing after that part.
  562. ///
  563. /// In some very rare cases where the RDATA has an optional trailing field,
  564. /// the \c eol_ok parameter would be set to \c true. This way the caller
  565. /// can handle both cases (the field does or does not exist) by a single
  566. /// call to this method. In all other cases \c eol_ok should be set to
  567. /// \c false, and that is the default and can be omitted.
  568. ///
  569. /// Unlike the other version of \c getNextToken(Options), this method
  570. /// throws an exception of type \c LexerError for non fatal errors such as
  571. /// broken syntax or encountering an unexpected type of token. This way
  572. /// the caller can write RDATA parser code without bothering to handle
  573. /// errors for each field. For example, pseudo parser code for MX RDATA
  574. /// would look like this:
  575. /// \code
  576. /// const uint32_t pref =
  577. /// lexer.getNextToken(MasterToken::NUMBER).getNumber();
  578. /// // check if pref is the uint16_t range; no other check is needed.
  579. /// const Name mx(lexer.getNextToken(MasterToken::STRING).getString());
  580. /// \endcode
  581. ///
  582. /// In the case where \c LexerError exception is thrown, it's expected
  583. /// to be handled comprehensively for the parser of the RDATA or at a
  584. /// higher layer. The \c token_ member variable of the corresponding
  585. /// \c LexerError exception object stores a token of type
  586. /// \c MasterToken::ERROR that indicates the reason for the error.
  587. ///
  588. /// Due to the specific intended usage of this method, only a subset
  589. /// of \c MasterToken::Type values are acceptable for the \c expect
  590. /// parameter: \c MasterToken::STRING, \c MasterToken::QSTRING, and
  591. /// \c MasterToken::NUMBER. Specifying other values will result in
  592. /// an \c InvalidParameter exception.
  593. ///
  594. /// \throw InvalidParameter The expected token type is not allowed for
  595. /// this method.
  596. /// \throw LexerError The lexer finds non fatal error or it finds an
  597. /// \throw other Anything the other version of getNextToken() can throw.
  598. ///
  599. /// \param expect Expected type of token. Must be either STRING, QSTRING,
  600. /// or NUMBER.
  601. /// \param eol_ok \c true iff END_OF_LINE or END_OF_FILE is acceptable.
  602. /// \return The expected type of token.
  603. const MasterToken& getNextToken(MasterToken::Type expect,
  604. bool eol_ok = false);
  605. /// \brief Return the last token back to the lexer.
  606. ///
  607. /// The method undoes the lasts call to getNextToken(). If you call the
  608. /// getNextToken() again with the same options, it'll return the same
  609. /// token. If the options are different, it may return a different token,
  610. /// but it acts as if the previous getNextToken() was never called.
  611. ///
  612. /// It is possible to return only one token back in time (you can't call
  613. /// ungetToken() twice in a row without calling getNextToken() in between
  614. /// successfully).
  615. ///
  616. /// It does not work after change of source (by pushSource or popSource).
  617. ///
  618. /// \throw isc::InvalidOperation If called second time in a row or if
  619. /// getNextToken() was not called since the last change of the source.
  620. void ungetToken();
  621. private:
  622. struct MasterLexerImpl;
  623. MasterLexerImpl* impl_;
  624. };
  625. /// \brief Operator to combine \c MasterLexer options
  626. ///
  627. /// This is a trivial shortcut so that compound options can be specified
  628. /// in an intuitive way.
  629. inline MasterLexer::Options
  630. operator|(MasterLexer::Options o1, MasterLexer::Options o2) {
  631. return (static_cast<MasterLexer::Options>(
  632. static_cast<unsigned>(o1) | static_cast<unsigned>(o2)));
  633. }
  634. } // namespace dns
  635. } // namespace isc
  636. #endif // MASTER_LEXER_H
  637. // Local Variables:
  638. // mode: c++
  639. // End: