master_lexer.cc 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278
  1. // Copyright (C) 2012 Internet Systems Consortium, Inc. ("ISC")
  2. //
  3. // Permission to use, copy, modify, and/or distribute this software for any
  4. // purpose with or without fee is hereby granted, provided that the above
  5. // copyright notice and this permission notice appear in all copies.
  6. //
  7. // THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
  8. // REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
  9. // AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
  10. // INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
  11. // LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
  12. // OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
  13. // PERFORMANCE OF THIS SOFTWARE.
  14. #include <exceptions/exceptions.h>
  15. #include <dns/master_lexer.h>
  16. #include <dns/master_lexer_inputsource.h>
  17. #include <dns/master_lexer_state.h>
  18. #include <boost/shared_ptr.hpp>
  19. #include <cassert>
  20. #include <string>
  21. #include <sstream>
  22. #include <vector>
  23. namespace isc {
  24. namespace dns {
  25. namespace {
  26. typedef boost::shared_ptr<master_lexer_internal::InputSource> InputSourcePtr;
  27. }
  28. using namespace master_lexer_internal;
  29. struct MasterLexer::MasterLexerImpl {
  30. MasterLexerImpl() : source_(NULL), paren_count_(0), last_was_eol_(false),
  31. token_(Token::NOT_STARTED)
  32. {}
  33. int skipComment(int c) {
  34. if (c == ';') {
  35. while (true) {
  36. c = source_->getChar();
  37. if (c == '\n' || c == InputSource::END_OF_STREAM) {
  38. return (c);
  39. }
  40. }
  41. }
  42. return (c);
  43. }
  44. std::vector<InputSourcePtr> sources_;
  45. InputSource* source_; // current source
  46. size_t paren_count_;
  47. bool last_was_eol_;
  48. Token token_;
  49. };
  50. MasterLexer::MasterLexer() : impl_(new MasterLexerImpl) {
  51. }
  52. MasterLexer::~MasterLexer() {
  53. delete impl_;
  54. }
  55. bool
  56. MasterLexer::pushSource(const char* filename, std::string* error) {
  57. if (filename == NULL) {
  58. isc_throw(InvalidParameter,
  59. "NULL filename for MasterLexer::pushSource");
  60. }
  61. try {
  62. impl_->sources_.push_back(InputSourcePtr(new InputSource(filename)));
  63. } catch (const InputSource::OpenError& ex) {
  64. if (error != NULL) {
  65. *error = ex.what();
  66. }
  67. return (false);
  68. }
  69. impl_->source_ = impl_->sources_.back().get();
  70. return (true);
  71. }
  72. void
  73. MasterLexer::pushSource(std::istream& input) {
  74. impl_->sources_.push_back(InputSourcePtr(new InputSource(input)));
  75. impl_->source_ = impl_->sources_.back().get();
  76. }
  77. void
  78. MasterLexer::popSource() {
  79. if (impl_->sources_.empty()) {
  80. isc_throw(InvalidOperation,
  81. "MasterLexer::popSource on an empty source");
  82. }
  83. impl_->sources_.pop_back();
  84. impl_->source_ = impl_->sources_.empty() ? NULL :
  85. impl_->sources_.back().get();
  86. }
  87. std::string
  88. MasterLexer::getSourceName() const {
  89. if (impl_->sources_.empty()) {
  90. return (std::string());
  91. }
  92. return (impl_->sources_.back()->getName());
  93. }
  94. size_t
  95. MasterLexer::getSourceLine() const {
  96. if (impl_->sources_.empty()) {
  97. return (0);
  98. }
  99. return (impl_->sources_.back()->getCurrentLine());
  100. }
  101. namespace {
  102. const char* const error_text[] = {
  103. "lexer not started", // NOT_STARTED
  104. "unbalanced parentheses", // UNBALANCED_PAREN
  105. "unexpected end of input", // UNEXPECTED_END
  106. "unbalanced quotes" // UNBALANCED_QUOTES
  107. };
  108. const size_t error_text_max_count = sizeof(error_text) / sizeof(error_text[0]);
  109. }
  110. std::string
  111. MasterLexer::Token::getErrorText() const {
  112. if (type_ != ERROR) {
  113. isc_throw(InvalidOperation,
  114. "Token::getErrorText() for non error type");
  115. }
  116. // The class integrity ensures the following:
  117. assert(val_.error_code_ < error_text_max_count);
  118. return (error_text[val_.error_code_]);
  119. }
  120. namespace master_lexer_internal {
  121. typedef MasterLexer::Token Token; // convenience shortcut
  122. bool
  123. State::wasLastEOL(const MasterLexer& lexer) const {
  124. return (lexer.impl_->last_was_eol_);
  125. }
  126. const MasterLexer::Token&
  127. State::getToken(const MasterLexer& lexer) const {
  128. return (lexer.impl_->token_);
  129. }
  130. size_t
  131. State::getParenCount(const MasterLexer& lexer) const {
  132. return (lexer.impl_->paren_count_);
  133. }
  134. class Start : public State {
  135. public:
  136. Start() {}
  137. virtual const State* handle(MasterLexer& lexer,
  138. MasterLexer::Options options) const;
  139. };
  140. class CRLF : public State {
  141. public:
  142. CRLF() {}
  143. virtual const State* handle(MasterLexer& lexer, MasterLexer::Options) const
  144. {
  145. // We've just seen '\r'. If this is part of a sequence of '\r\n',
  146. // we combine them as a single END-OF-LINE. Otherwise we treat the
  147. // single '\r' as an EOL and continue tokeniziation from the character
  148. // immediately after '\r'. One tricky case is that there's a comment
  149. // between '\r' and '\n'. This implementation combines these
  150. // characters and treats them as a single EOL (the behavior derived
  151. // from BIND 9). Technically this may not be correct, but in practice
  152. // the caller wouldn't distinguish this case from the case it has
  153. // two EOLs, so we simplify the process.
  154. const int c = getLexerImpl(lexer)->skipComment(
  155. getLexerImpl(lexer)->source_->getChar());
  156. if (c != '\n') {
  157. getLexerImpl(lexer)->source_->ungetChar();
  158. }
  159. getLexerImpl(lexer)->token_ = Token(Token::END_OF_LINE);
  160. getLexerImpl(lexer)->last_was_eol_ = true;
  161. return (NULL);
  162. }
  163. };
  164. // Currently this is provided mostly as a place holder
  165. class String : public State {
  166. public:
  167. String() {}
  168. virtual const State* handle(MasterLexer& /*lexer*/,
  169. MasterLexer::Options /*options*/) const
  170. {
  171. return (NULL);
  172. }
  173. };
  174. namespace {
  175. const Start START_STATE;
  176. const CRLF CRLF_STATE;
  177. const String STRING_STATE;
  178. }
  179. const State&
  180. State::getInstance(ID state_id) {
  181. switch (state_id) {
  182. case Start:
  183. return (START_STATE);
  184. case CRLF:
  185. return (CRLF_STATE);
  186. case String:
  187. return (STRING_STATE);
  188. }
  189. }
  190. const State*
  191. Start::handle(MasterLexer& lexer, MasterLexer::Options options) const {
  192. size_t& paren_count = getLexerImpl(lexer)->paren_count_; // shortcut
  193. while (true) {
  194. const int c = getLexerImpl(lexer)->skipComment(
  195. getLexerImpl(lexer)->source_->getChar());
  196. if (c == InputSource::END_OF_STREAM) {
  197. getLexerImpl(lexer)->last_was_eol_ = false;
  198. if (paren_count != 0) {
  199. getLexerImpl(lexer)->token_ = Token(Token::UNBALANCED_PAREN);
  200. paren_count = 0; // reset to 0; this helps in lenient mode.
  201. return (NULL);
  202. }
  203. getLexerImpl(lexer)->token_ = Token(Token::END_OF_FILE);
  204. return (NULL);
  205. } else if (c == ' ' || c == '\t') {
  206. // If requested and we are not in (), recognize the initial space.
  207. if (getLexerImpl(lexer)->last_was_eol_ && paren_count == 0 &&
  208. (options & MasterLexer::INITIAL_WS) != 0) {
  209. getLexerImpl(lexer)->last_was_eol_ = false;
  210. getLexerImpl(lexer)->token_ = Token(Token::INITIAL_WS);
  211. return (NULL);
  212. }
  213. continue;
  214. } else if (c == '\n') {
  215. getLexerImpl(lexer)->last_was_eol_ = true;
  216. if (paren_count == 0) { // we don't recognize EOL if we are in ()
  217. getLexerImpl(lexer)->token_ = Token(Token::END_OF_LINE);
  218. return (NULL);
  219. }
  220. } else if (c == '\r') {
  221. if (paren_count == 0) { // check if we are in () (see above)
  222. return (&CRLF_STATE);
  223. }
  224. } else if (c == '(') {
  225. getLexerImpl(lexer)->last_was_eol_ = false;
  226. ++paren_count;
  227. continue;
  228. } else if (c == ')') {
  229. getLexerImpl(lexer)->last_was_eol_ = false;
  230. if (paren_count == 0) {
  231. getLexerImpl(lexer)->token_ = Token(Token::UNBALANCED_PAREN);
  232. return (NULL);
  233. }
  234. --paren_count;
  235. continue;
  236. } else {
  237. // Note: in #2373 we should probably ungetChar().
  238. getLexerImpl(lexer)->last_was_eol_ = false;
  239. return (&STRING_STATE);
  240. }
  241. }
  242. }
  243. } // namespace master_lexer_internal
  244. } // end of namespace dns
  245. } // end of namespace isc