|
@@ -28,225 +28,6 @@ namespace master_lexer_internal {
|
|
|
class State;
|
|
|
}
|
|
|
|
|
|
-/// \brief Tokenizer for parsing DNS master files.
|
|
|
-///
|
|
|
-/// The \c MasterLexer class provides tokenize interfaces for parsing DNS
|
|
|
-/// master files. It understands some special rules of master files as
|
|
|
-/// defined in RFC 1035, such as comments, character escaping, and multi-line
|
|
|
-/// data, and provides the user application with the actual data in a
|
|
|
-/// more convenient form such as a std::string object.
|
|
|
-///
|
|
|
-/// In order to support the $INCLUDE notation, this class is designed to be
|
|
|
-/// able to operate on multiple files or input streams in the nested way.
|
|
|
-/// The \c pushSource() and \c popSource() methods correspond to the push
|
|
|
-/// and pop operations.
|
|
|
-///
|
|
|
-/// While this class is public, it is less likely to be used by normal
|
|
|
-/// applications; it's mainly expected to be used within this library,
|
|
|
-/// specifically by the \c MasterLoader class and \c Rdata implementation
|
|
|
-/// classes.
|
|
|
-///
|
|
|
-/// \note The error handling policy of this class is slightly different from
|
|
|
-/// that of other classes of this library. We generally throw an exception
|
|
|
-/// for an invalid input, whether it's more likely to be a program error or
|
|
|
-/// a "user error", which means an invalid input that comes from outside of
|
|
|
-/// the library. But, this class returns an error code for some certain
|
|
|
-/// types of user errors instead of throwing an exception. Such cases include
|
|
|
-/// a syntax error identified by the lexer or a misspelled file name that
|
|
|
-/// causes a system error at the time of open. This is based on the assumption
|
|
|
-/// that the main user of this class is a parser of master files, where
|
|
|
-/// we want to give an option to ignore some non fatal errors and continue
|
|
|
-/// the parsing. This will be useful if it just performs overall error
|
|
|
-/// checks on a master file. When the (immediate) caller needs to do explicit
|
|
|
-/// error handling, exceptions are not that a useful tool for error reporting
|
|
|
-/// because we cannot separate the normal and error cases anyway, which would
|
|
|
-/// be one major advantage when we use exceptions. And, exceptions are
|
|
|
-/// generally more expensive, either when it happens or just by being able
|
|
|
-/// to handle with \c try and \c catch (depending on the underlying
|
|
|
-/// implementation of the exception handling). For these reasons, some of
|
|
|
-/// this class does not throw for an error that would be reported as an
|
|
|
-/// exception in other classes.
|
|
|
-class MasterLexer {
|
|
|
- friend class master_lexer_internal::State;
|
|
|
-public:
|
|
|
- /// \brief Exception thrown when we fail to read from the input
|
|
|
- /// stream or file.
|
|
|
- struct ReadError : public Unexpected {
|
|
|
- ReadError(const char* file, size_t line, const char* what) :
|
|
|
- Unexpected(file, line, what)
|
|
|
- {}
|
|
|
- };
|
|
|
-
|
|
|
- class Token; // we define it separately for better readability
|
|
|
-
|
|
|
- /// \brief Options for getNextToken.
|
|
|
- ///
|
|
|
- /// A compound option, indicating multiple options are set, can be
|
|
|
- /// specified using the logical OR operator (operator|()).
|
|
|
- enum Options {
|
|
|
- NONE = 0, ///< No option
|
|
|
- INITIAL_WS = 1, ///< recognize begin-of-line spaces after an
|
|
|
- ///< end-of-line
|
|
|
- QSTRING = 2, ///< recognize quoted string
|
|
|
- NUMBER = 4 ///< recognize numeric text as integer
|
|
|
- };
|
|
|
-
|
|
|
- /// \brief The constructor.
|
|
|
- ///
|
|
|
- /// \throw std::bad_alloc Internal resource allocation fails (rare case).
|
|
|
- MasterLexer();
|
|
|
-
|
|
|
- /// \brief The destructor.
|
|
|
- ///
|
|
|
- /// It internally closes any remaining input sources.
|
|
|
- ~MasterLexer();
|
|
|
-
|
|
|
- /// \brief Open a file and make it the current input source of MasterLexer.
|
|
|
- ///
|
|
|
- /// The opened file can be explicitly closed by the \c popSource() method;
|
|
|
- /// if \c popSource() is not called within the lifetime of the
|
|
|
- /// \c MasterLexer, it will be closed in the destructor.
|
|
|
- ///
|
|
|
- /// In the case possible system errors in opening the file (most likely
|
|
|
- /// because of specifying a non-existent or unreadable file), it returns
|
|
|
- /// false, and if the optional \c error parameter is non NULL, it will be
|
|
|
- /// set to a description of the error (any existing content of the string
|
|
|
- /// will be discarded). If opening the file succeeds, the given
|
|
|
- /// \c error parameter will be intact.
|
|
|
- ///
|
|
|
- /// Note that this method has two styles of error reporting: one by
|
|
|
- /// returning \c false (and setting \c error optionally) and the other
|
|
|
- /// by throwing an exception. See the note for the class description
|
|
|
- /// about the distinction.
|
|
|
- ///
|
|
|
- /// \throw InvalidParameter filename is NULL
|
|
|
- /// \param filename A non NULL string specifying a master file
|
|
|
- /// \param error If non null, a placeholder to set error description in
|
|
|
- /// case of failure.
|
|
|
- ///
|
|
|
- /// \return true if pushing the file succeeds; false otherwise.
|
|
|
- bool pushSource(const char* filename, std::string* error = NULL);
|
|
|
-
|
|
|
- /// \brief Make the given stream the current input source of MasterLexer.
|
|
|
- ///
|
|
|
- /// The caller still holds the ownership of the passed stream; it's the
|
|
|
- /// caller's responsibility to keep it valid as long as it's used in
|
|
|
- /// \c MasterLexer or to release any resource for the stream after that.
|
|
|
- /// The caller can explicitly tell \c MasterLexer to stop using the
|
|
|
- /// stream by calling the \c popSource() method.
|
|
|
- ///
|
|
|
- /// \param input An input stream object that produces textual
|
|
|
- /// representation of DNS RRs.
|
|
|
- void pushSource(std::istream& input);
|
|
|
-
|
|
|
- /// \brief Stop using the most recently opened input source (file or
|
|
|
- /// stream).
|
|
|
- ///
|
|
|
- /// If it's a file, the previously opened file will be closed internally.
|
|
|
- /// If it's a stream, \c MasterLexer will simply stop using
|
|
|
- /// the stream; the caller can assume it will be never used in
|
|
|
- /// \c MasterLexer thereafter.
|
|
|
- ///
|
|
|
- /// This method must not be called when there is no source pushed for
|
|
|
- /// \c MasterLexer. This method is otherwise exception free.
|
|
|
- ///
|
|
|
- /// \throw isc::InvalidOperation Called with no pushed source.
|
|
|
- void popSource();
|
|
|
-
|
|
|
- /// \brief Return the name of the current input source name.
|
|
|
- ///
|
|
|
- /// If it's a file, it will be the C string given at the corresponding
|
|
|
- /// \c pushSource() call, that is, its filename. If it's a stream, it will
|
|
|
- /// be formatted as \c "stream-%p" where \c %p is hex representation
|
|
|
- /// of the address of the stream object.
|
|
|
- ///
|
|
|
- /// If there is no opened source at the time of the call, this method
|
|
|
- /// returns an empty string.
|
|
|
- ///
|
|
|
- /// \throw std::bad_alloc Resource allocation failed for string
|
|
|
- /// construction (rare case)
|
|
|
- ///
|
|
|
- /// \return A string representation of the current source (see the
|
|
|
- /// description)
|
|
|
- std::string getSourceName() const;
|
|
|
-
|
|
|
- /// \brief Return the input source line number.
|
|
|
- ///
|
|
|
- /// If there is an opened source, the return value will be a non-0
|
|
|
- /// integer indicating the line number of the current source where
|
|
|
- /// the \c MasterLexer is currently working. The expected usage of
|
|
|
- /// this value is to print a helpful error message when parsing fails
|
|
|
- /// by specifically identifying the position of the error.
|
|
|
- ///
|
|
|
- /// If there is no opened source at the time of the call, this method
|
|
|
- /// returns 0.
|
|
|
- ///
|
|
|
- /// \throw None
|
|
|
- ///
|
|
|
- /// \return The current line number of the source (see the description)
|
|
|
- size_t getSourceLine() const;
|
|
|
-
|
|
|
- /// \brief Parse and return another token from the input.
|
|
|
- ///
|
|
|
- /// It reads a bit of the last opened source and produces another token
|
|
|
- /// found in it.
|
|
|
- ///
|
|
|
- /// This method does not provide the strong exception guarantee. Generally,
|
|
|
- /// if it throws, the object should not be used any more and should be
|
|
|
- /// discarded. It was decided all the exceptions thrown from here are
|
|
|
- /// serious enough that aborting the loading process is the only reasonable
|
|
|
- /// recovery anyway, so the strong exception guarantee is not needed.
|
|
|
- ///
|
|
|
- /// \param options The options can be used to modify the tokenization.
|
|
|
- /// The method can be made reporting things which are usually ignored
|
|
|
- /// by this parameter. Multiple options can be passed at once by
|
|
|
- /// bitwise or (eg. option1 | option 2). See description of available
|
|
|
- /// options.
|
|
|
- /// \return Next token found in the input. Note that the token refers to
|
|
|
- /// some internal data in the lexer. It is valid only until
|
|
|
- /// getNextToken or ungetToken is called. Also, the token becomes
|
|
|
- /// invalid when the lexer is destroyed.
|
|
|
- /// \throw isc::InvalidOperation in case the source is not available. This
|
|
|
- /// may mean the pushSource() has not been called yet, or that the
|
|
|
- /// current source has been read past the end.
|
|
|
- /// \throw ReadError in case there's problem reading from the underlying
|
|
|
- /// source (eg. I/O error in the file on the disk).
|
|
|
- /// \throw std::bad_alloc in case allocation of some internal resources
|
|
|
- /// or the token fail.
|
|
|
- const Token& getNextToken(Options options = NONE);
|
|
|
-
|
|
|
- /// \brief Return the last token back to the lexer.
|
|
|
- ///
|
|
|
- /// The method undoes the lasts call to getNextToken(). If you call the
|
|
|
- /// getNextToken() again with the same options, it'll return the same
|
|
|
- /// token. If the options are different, it may return a different token,
|
|
|
- /// but it acts as if the previous getNextToken() was never called.
|
|
|
- ///
|
|
|
- /// It is possible to return only one token back in time (you can't call
|
|
|
- /// ungetToken() twice in a row without calling getNextToken() in between
|
|
|
- /// successfully).
|
|
|
- ///
|
|
|
- /// It does not work after change of source (by pushSource or popSource).
|
|
|
- ///
|
|
|
- /// \throw isc::InvalidOperation If called second time in a row or if
|
|
|
- /// getNextToken() was not called since the last change of the source.
|
|
|
- void ungetToken();
|
|
|
-
|
|
|
-private:
|
|
|
- struct MasterLexerImpl;
|
|
|
- MasterLexerImpl* impl_;
|
|
|
-};
|
|
|
-
|
|
|
-/// \brief Operator to combine \c MasterLexer options
|
|
|
-///
|
|
|
-/// This is a trivial shortcut so that compound options can be specified
|
|
|
-/// in an intuitive way.
|
|
|
-inline MasterLexer::Options
|
|
|
-operator|(MasterLexer::Options o1, MasterLexer::Options o2) {
|
|
|
- return (static_cast<MasterLexer::Options>(
|
|
|
- static_cast<unsigned>(o1) | static_cast<unsigned>(o2)));
|
|
|
-}
|
|
|
-
|
|
|
/// \brief Tokens for \c MasterLexer
|
|
|
///
|
|
|
/// This is a simple value-class encapsulating a type of a lexer token and
|
|
@@ -261,7 +42,7 @@ operator|(MasterLexer::Options o1, MasterLexer::Options o2) {
|
|
|
/// (using the default version of copy constructor and assignment operator),
|
|
|
/// but it's mainly for internal implementation convenience. Applications will
|
|
|
/// simply refer to Token object as a reference via the \c MasterLexer class.
|
|
|
-class MasterLexer::Token {
|
|
|
+class MasterToken {
|
|
|
public:
|
|
|
/// \brief Enumeration for token types
|
|
|
///
|
|
@@ -293,6 +74,7 @@ public:
|
|
|
NO_TOKEN_PRODUCED, ///< No token was produced. This means programmer
|
|
|
/// error and should never get out of the lexer.
|
|
|
NUMBER_OUT_OF_RANGE, ///< Number was out of range
|
|
|
+ BAD_NUMBER, ///< Number is expected but not recognized
|
|
|
MAX_ERROR_CODE ///< Max integer corresponding to valid error codes.
|
|
|
/// (excluding this one). Mainly for internal use.
|
|
|
};
|
|
@@ -308,6 +90,13 @@ public:
|
|
|
/// the region. On the other hand, it is not ensured that the string
|
|
|
/// is nul-terminated. So the usual string manipulation API may not work
|
|
|
/// as expected.
|
|
|
+ ///
|
|
|
+ /// The `MasterLexer` implementation ensures that there are at least
|
|
|
+ /// len + 1 bytes of valid memory region starting from beg, and that
|
|
|
+ /// beg[len] is \0. This means the application can use the bytes as a
|
|
|
+ /// validly nul-terminated C string if there is no intermediate nul
|
|
|
+ /// character. Note also that due to this property beg is always non
|
|
|
+ /// NULL; for an empty string len will be set to 0 and beg[0] is \0.
|
|
|
struct StringRegion {
|
|
|
const char* beg; ///< The start address of the string
|
|
|
size_t len; ///< The length of the string in bytes
|
|
@@ -318,7 +107,7 @@ public:
|
|
|
/// \throw InvalidParameter A value type token is specified.
|
|
|
/// \param type The type of the token. It must indicate a non-value
|
|
|
/// type (not larger than \c NOVALUE_TYPE_MAX).
|
|
|
- explicit Token(Type type) : type_(type) {
|
|
|
+ explicit MasterToken(Type type) : type_(type) {
|
|
|
if (type > NOVALUE_TYPE_MAX) {
|
|
|
isc_throw(InvalidParameter, "Token per-type constructor "
|
|
|
"called with invalid type: " << type);
|
|
@@ -340,7 +129,7 @@ public:
|
|
|
/// \param str_beg The start address of the string
|
|
|
/// \param str_len The size of the string in bytes
|
|
|
/// \param quoted true if it's a quoted string; false otherwise.
|
|
|
- Token(const char* str_beg, size_t str_len, bool quoted = false) :
|
|
|
+ MasterToken(const char* str_beg, size_t str_len, bool quoted = false) :
|
|
|
type_(quoted ? QSTRING : STRING)
|
|
|
{
|
|
|
val_.str_region_.beg = str_beg;
|
|
@@ -351,7 +140,7 @@ public:
|
|
|
///
|
|
|
/// \brief number An unsigned 32-bit integer corresponding to the token
|
|
|
/// value.
|
|
|
- explicit Token(uint32_t number) : type_(NUMBER) {
|
|
|
+ explicit MasterToken(uint32_t number) : type_(NUMBER) {
|
|
|
val_.number_ = number;
|
|
|
}
|
|
|
|
|
@@ -359,7 +148,7 @@ public:
|
|
|
///
|
|
|
/// \throw InvalidParameter Invalid error code value is specified.
|
|
|
/// \brief error_code A pre-defined constant of \c ErrorCode.
|
|
|
- explicit Token(ErrorCode error_code) : type_(ERROR) {
|
|
|
+ explicit MasterToken(ErrorCode error_code) : type_(ERROR) {
|
|
|
if (!(error_code < MAX_ERROR_CODE)) {
|
|
|
isc_throw(InvalidParameter, "Invalid master lexer error code: "
|
|
|
<< error_code);
|
|
@@ -476,6 +265,310 @@ private:
|
|
|
} val_;
|
|
|
};
|
|
|
|
|
|
+/// \brief Tokenizer for parsing DNS master files.
|
|
|
+///
|
|
|
+/// The \c MasterLexer class provides tokenize interfaces for parsing DNS
|
|
|
+/// master files. It understands some special rules of master files as
|
|
|
+/// defined in RFC 1035, such as comments, character escaping, and multi-line
|
|
|
+/// data, and provides the user application with the actual data in a
|
|
|
+/// more convenient form such as a std::string object.
|
|
|
+///
|
|
|
+/// In order to support the $INCLUDE notation, this class is designed to be
|
|
|
+/// able to operate on multiple files or input streams in the nested way.
|
|
|
+/// The \c pushSource() and \c popSource() methods correspond to the push
|
|
|
+/// and pop operations.
|
|
|
+///
|
|
|
+/// While this class is public, it is less likely to be used by normal
|
|
|
+/// applications; it's mainly expected to be used within this library,
|
|
|
+/// specifically by the \c MasterLoader class and \c Rdata implementation
|
|
|
+/// classes.
|
|
|
+///
|
|
|
+/// \note The error handling policy of this class is slightly different from
|
|
|
+/// that of other classes of this library. We generally throw an exception
|
|
|
+/// for an invalid input, whether it's more likely to be a program error or
|
|
|
+/// a "user error", which means an invalid input that comes from outside of
|
|
|
+/// the library. But, this class returns an error code for some certain
|
|
|
+/// types of user errors instead of throwing an exception. Such cases include
|
|
|
+/// a syntax error identified by the lexer or a misspelled file name that
|
|
|
+/// causes a system error at the time of open. This is based on the assumption
|
|
|
+/// that the main user of this class is a parser of master files, where
|
|
|
+/// we want to give an option to ignore some non fatal errors and continue
|
|
|
+/// the parsing. This will be useful if it just performs overall error
|
|
|
+/// checks on a master file. When the (immediate) caller needs to do explicit
|
|
|
+/// error handling, exceptions are not that a useful tool for error reporting
|
|
|
+/// because we cannot separate the normal and error cases anyway, which would
|
|
|
+/// be one major advantage when we use exceptions. And, exceptions are
|
|
|
+/// generally more expensive, either when it happens or just by being able
|
|
|
+/// to handle with \c try and \c catch (depending on the underlying
|
|
|
+/// implementation of the exception handling). For these reasons, some of
|
|
|
+/// this class does not throw for an error that would be reported as an
|
|
|
+/// exception in other classes.
|
|
|
+class MasterLexer {
|
|
|
+ friend class master_lexer_internal::State;
|
|
|
+public:
|
|
|
+ /// \brief Exception thrown when we fail to read from the input
|
|
|
+ /// stream or file.
|
|
|
+ class ReadError : public Unexpected {
|
|
|
+ public:
|
|
|
+ ReadError(const char* file, size_t line, const char* what) :
|
|
|
+ Unexpected(file, line, what)
|
|
|
+ {}
|
|
|
+ };
|
|
|
+
|
|
|
+ /// \brief Exception thrown from a wrapper version of
|
|
|
+ /// \c MasterLexer::getNextToken() for non fatal errors.
|
|
|
+ ///
|
|
|
+ /// See the method description for more details.
|
|
|
+ ///
|
|
|
+ /// The \c token_ member variable (read-only) is set to a \c MasterToken
|
|
|
+ /// object of type ERROR indicating the reason for the error.
|
|
|
+ class LexerError : public Exception {
|
|
|
+ public:
|
|
|
+ LexerError(const char* file, size_t line, MasterToken error_token) :
|
|
|
+ Exception(file, line, error_token.getErrorText().c_str()),
|
|
|
+ token_(error_token)
|
|
|
+ {}
|
|
|
+ const MasterToken token_;
|
|
|
+ };
|
|
|
+
|
|
|
+ /// \brief Options for getNextToken.
|
|
|
+ ///
|
|
|
+ /// A compound option, indicating multiple options are set, can be
|
|
|
+ /// specified using the logical OR operator (operator|()).
|
|
|
+ enum Options {
|
|
|
+ NONE = 0, ///< No option
|
|
|
+ INITIAL_WS = 1, ///< recognize begin-of-line spaces after an
|
|
|
+ ///< end-of-line
|
|
|
+ QSTRING = 2, ///< recognize quoted string
|
|
|
+ NUMBER = 4 ///< recognize numeric text as integer
|
|
|
+ };
|
|
|
+
|
|
|
+ /// \brief The constructor.
|
|
|
+ ///
|
|
|
+ /// \throw std::bad_alloc Internal resource allocation fails (rare case).
|
|
|
+ MasterLexer();
|
|
|
+
|
|
|
+ /// \brief The destructor.
|
|
|
+ ///
|
|
|
+ /// It internally closes any remaining input sources.
|
|
|
+ ~MasterLexer();
|
|
|
+
|
|
|
+ /// \brief Open a file and make it the current input source of MasterLexer.
|
|
|
+ ///
|
|
|
+ /// The opened file can be explicitly closed by the \c popSource() method;
|
|
|
+ /// if \c popSource() is not called within the lifetime of the
|
|
|
+ /// \c MasterLexer, it will be closed in the destructor.
|
|
|
+ ///
|
|
|
+ /// In the case possible system errors in opening the file (most likely
|
|
|
+ /// because of specifying a non-existent or unreadable file), it returns
|
|
|
+ /// false, and if the optional \c error parameter is non NULL, it will be
|
|
|
+ /// set to a description of the error (any existing content of the string
|
|
|
+ /// will be discarded). If opening the file succeeds, the given
|
|
|
+ /// \c error parameter will be intact.
|
|
|
+ ///
|
|
|
+ /// Note that this method has two styles of error reporting: one by
|
|
|
+ /// returning \c false (and setting \c error optionally) and the other
|
|
|
+ /// by throwing an exception. See the note for the class description
|
|
|
+ /// about the distinction.
|
|
|
+ ///
|
|
|
+ /// \throw InvalidParameter filename is NULL
|
|
|
+ /// \param filename A non NULL string specifying a master file
|
|
|
+ /// \param error If non null, a placeholder to set error description in
|
|
|
+ /// case of failure.
|
|
|
+ ///
|
|
|
+ /// \return true if pushing the file succeeds; false otherwise.
|
|
|
+ bool pushSource(const char* filename, std::string* error = NULL);
|
|
|
+
|
|
|
+ /// \brief Make the given stream the current input source of MasterLexer.
|
|
|
+ ///
|
|
|
+ /// The caller still holds the ownership of the passed stream; it's the
|
|
|
+ /// caller's responsibility to keep it valid as long as it's used in
|
|
|
+ /// \c MasterLexer or to release any resource for the stream after that.
|
|
|
+ /// The caller can explicitly tell \c MasterLexer to stop using the
|
|
|
+ /// stream by calling the \c popSource() method.
|
|
|
+ ///
|
|
|
+ /// \param input An input stream object that produces textual
|
|
|
+ /// representation of DNS RRs.
|
|
|
+ void pushSource(std::istream& input);
|
|
|
+
|
|
|
+ /// \brief Stop using the most recently opened input source (file or
|
|
|
+ /// stream).
|
|
|
+ ///
|
|
|
+ /// If it's a file, the previously opened file will be closed internally.
|
|
|
+ /// If it's a stream, \c MasterLexer will simply stop using
|
|
|
+ /// the stream; the caller can assume it will be never used in
|
|
|
+ /// \c MasterLexer thereafter.
|
|
|
+ ///
|
|
|
+ /// This method must not be called when there is no source pushed for
|
|
|
+ /// \c MasterLexer. This method is otherwise exception free.
|
|
|
+ ///
|
|
|
+ /// \throw isc::InvalidOperation Called with no pushed source.
|
|
|
+ void popSource();
|
|
|
+
|
|
|
+ /// \brief Return the name of the current input source name.
|
|
|
+ ///
|
|
|
+ /// If it's a file, it will be the C string given at the corresponding
|
|
|
+ /// \c pushSource() call, that is, its filename. If it's a stream, it will
|
|
|
+ /// be formatted as \c "stream-%p" where \c %p is hex representation
|
|
|
+ /// of the address of the stream object.
|
|
|
+ ///
|
|
|
+ /// If there is no opened source at the time of the call, this method
|
|
|
+ /// returns an empty string.
|
|
|
+ ///
|
|
|
+ /// \throw std::bad_alloc Resource allocation failed for string
|
|
|
+ /// construction (rare case)
|
|
|
+ ///
|
|
|
+ /// \return A string representation of the current source (see the
|
|
|
+ /// description)
|
|
|
+ std::string getSourceName() const;
|
|
|
+
|
|
|
+ /// \brief Return the input source line number.
|
|
|
+ ///
|
|
|
+ /// If there is an opened source, the return value will be a non-0
|
|
|
+ /// integer indicating the line number of the current source where
|
|
|
+ /// the \c MasterLexer is currently working. The expected usage of
|
|
|
+ /// this value is to print a helpful error message when parsing fails
|
|
|
+ /// by specifically identifying the position of the error.
|
|
|
+ ///
|
|
|
+ /// If there is no opened source at the time of the call, this method
|
|
|
+ /// returns 0.
|
|
|
+ ///
|
|
|
+ /// \throw None
|
|
|
+ ///
|
|
|
+ /// \return The current line number of the source (see the description)
|
|
|
+ size_t getSourceLine() const;
|
|
|
+
|
|
|
+ /// \brief Parse and return another token from the input.
|
|
|
+ ///
|
|
|
+ /// It reads a bit of the last opened source and produces another token
|
|
|
+ /// found in it.
|
|
|
+ ///
|
|
|
+ /// This method does not provide the strong exception guarantee. Generally,
|
|
|
+ /// if it throws, the object should not be used any more and should be
|
|
|
+ /// discarded. It was decided all the exceptions thrown from here are
|
|
|
+ /// serious enough that aborting the loading process is the only reasonable
|
|
|
+ /// recovery anyway, so the strong exception guarantee is not needed.
|
|
|
+ ///
|
|
|
+ /// \param options The options can be used to modify the tokenization.
|
|
|
+ /// The method can be made reporting things which are usually ignored
|
|
|
+ /// by this parameter. Multiple options can be passed at once by
|
|
|
+ /// bitwise or (eg. option1 | option 2). See description of available
|
|
|
+ /// options.
|
|
|
+ /// \return Next token found in the input. Note that the token refers to
|
|
|
+ /// some internal data in the lexer. It is valid only until
|
|
|
+ /// getNextToken or ungetToken is called. Also, the token becomes
|
|
|
+ /// invalid when the lexer is destroyed.
|
|
|
+ /// \throw isc::InvalidOperation in case the source is not available. This
|
|
|
+ /// may mean the pushSource() has not been called yet, or that the
|
|
|
+ /// current source has been read past the end.
|
|
|
+ /// \throw ReadError in case there's problem reading from the underlying
|
|
|
+ /// source (eg. I/O error in the file on the disk).
|
|
|
+ /// \throw std::bad_alloc in case allocation of some internal resources
|
|
|
+ /// or the token fail.
|
|
|
+ const MasterToken& getNextToken(Options options = NONE);
|
|
|
+
|
|
|
+ /// \brief Parse the input for the expected type of token.
|
|
|
+ ///
|
|
|
+ /// This method is a wrapper of the other version, customized for the case
|
|
|
+ /// where a particular type of token is expected as the next one.
|
|
|
+ /// More specifically, it's intended to be used to get tokens for RDATA
|
|
|
+ /// fields. Since most RDATA types of fixed format, the token type is
|
|
|
+ /// often predictable and the method interface can be simplified.
|
|
|
+ ///
|
|
|
+ /// This method basically works as follows: it gets the type of the
|
|
|
+ /// expected token, calls the other version of \c getNextToken(Options),
|
|
|
+ /// and returns the token if it's of the expected type (due to the usage
|
|
|
+ /// assumption this should be normally the case). There are some non
|
|
|
+ /// trivial details though:
|
|
|
+ ///
|
|
|
+ /// - If the expected type is MasterToken::QSTRING, both quoted and
|
|
|
+ /// unquoted strings are recognized and returned.
|
|
|
+ /// - If the optional \c eol_ok parameter is \c true (very rare case),
|
|
|
+ /// MasterToken::END_OF_LINE and MasterToken::END_OF_FILE are recognized
|
|
|
+ /// and returned if they are found instead of the expected type of
|
|
|
+ /// token.
|
|
|
+ /// - If the next token is not of the expected type (including the case
|
|
|
+ /// a number is expected but it's out of range), ungetToken() is
|
|
|
+ /// internally called so the caller can re-read that token.
|
|
|
+ /// - If other types or errors (such as unbalanced parentheses) are
|
|
|
+ /// detected, the erroneous part isn't "ungotten"; the caller can
|
|
|
+ /// continue parsing after that part.
|
|
|
+ ///
|
|
|
+ /// In some very rare cases where the RDATA has an optional trailing field,
|
|
|
+ /// the \c eol_ok parameter would be set to \c true. This way the caller
|
|
|
+ /// can handle both cases (the field does or does not exist) by a single
|
|
|
+ /// call to this method. In all other cases \c eol_ok should be set to
|
|
|
+ /// \c false, and that is the default and can be omitted.
|
|
|
+ ///
|
|
|
+ /// Unlike the other version of \c getNextToken(Options), this method
|
|
|
+ /// throws an exception of type \c LexerError for non fatal errors such as
|
|
|
+ /// broken syntax or encountering an unexpected type of token. This way
|
|
|
+ /// the caller can write RDATA parser code without bothering to handle
|
|
|
+ /// errors for each field. For example, pseudo parser code for MX RDATA
|
|
|
+ /// would look like this:
|
|
|
+ /// \code
|
|
|
+ /// const uint32_t pref =
|
|
|
+ /// lexer.getNextToken(MasterToken::NUMBER).getNumber();
|
|
|
+ /// // check if pref is the uint16_t range; no other check is needed.
|
|
|
+ /// const Name mx(lexer.getNextToken(MasterToken::STRING).getString());
|
|
|
+ /// \endcode
|
|
|
+ ///
|
|
|
+ /// In the case where \c LexerError exception is thrown, it's expected
|
|
|
+ /// to be handled comprehensively for the parser of the RDATA or at a
|
|
|
+ /// higher layer. The \c token_ member variable of the corresponding
|
|
|
+ /// \c LexerError exception object stores a token of type
|
|
|
+ /// \c MasterToken::ERROR that indicates the reason for the error.
|
|
|
+ ///
|
|
|
+ /// Due to the specific intended usage of this method, only a subset
|
|
|
+ /// of \c MasterToken::Type values are acceptable for the \c expect
|
|
|
+ /// parameter: \c MasterToken::STRING, \c MasterToken::QSTRING, and
|
|
|
+ /// \c MasterToken::NUMBER. Specifying other values will result in
|
|
|
+ /// an \c InvalidParameter exception.
|
|
|
+ ///
|
|
|
+ /// \throw InvalidParameter The expected token type is not allowed for
|
|
|
+ /// this method.
|
|
|
+ /// \throw LexerError The lexer finds non fatal error or it finds an
|
|
|
+ /// \throw other Anything the other version of getNextToken() can throw.
|
|
|
+ ///
|
|
|
+ /// \param expect Expected type of token. Must be either STRING, QSTRING,
|
|
|
+ /// or NUMBER.
|
|
|
+ /// \param eol_ok \c true iff END_OF_LINE or END_OF_FILE is acceptable.
|
|
|
+ /// \return The expected type of token.
|
|
|
+ const MasterToken& getNextToken(MasterToken::Type expect,
|
|
|
+ bool eol_ok = false);
|
|
|
+
|
|
|
+ /// \brief Return the last token back to the lexer.
|
|
|
+ ///
|
|
|
+ /// The method undoes the lasts call to getNextToken(). If you call the
|
|
|
+ /// getNextToken() again with the same options, it'll return the same
|
|
|
+ /// token. If the options are different, it may return a different token,
|
|
|
+ /// but it acts as if the previous getNextToken() was never called.
|
|
|
+ ///
|
|
|
+ /// It is possible to return only one token back in time (you can't call
|
|
|
+ /// ungetToken() twice in a row without calling getNextToken() in between
|
|
|
+ /// successfully).
|
|
|
+ ///
|
|
|
+ /// It does not work after change of source (by pushSource or popSource).
|
|
|
+ ///
|
|
|
+ /// \throw isc::InvalidOperation If called second time in a row or if
|
|
|
+ /// getNextToken() was not called since the last change of the source.
|
|
|
+ void ungetToken();
|
|
|
+
|
|
|
+private:
|
|
|
+ struct MasterLexerImpl;
|
|
|
+ MasterLexerImpl* impl_;
|
|
|
+};
|
|
|
+
|
|
|
+/// \brief Operator to combine \c MasterLexer options
|
|
|
+///
|
|
|
+/// This is a trivial shortcut so that compound options can be specified
|
|
|
+/// in an intuitive way.
|
|
|
+inline MasterLexer::Options
|
|
|
+operator|(MasterLexer::Options o1, MasterLexer::Options o2) {
|
|
|
+ return (static_cast<MasterLexer::Options>(
|
|
|
+ static_cast<unsigned>(o1) | static_cast<unsigned>(o2)));
|
|
|
+}
|
|
|
+
|
|
|
} // namespace dns
|
|
|
} // namespace isc
|
|
|
#endif // MASTER_LEXER_H
|