|
@@ -20,6 +20,7 @@
|
|
|
|
|
|
#include <boost/shared_ptr.hpp>
|
|
#include <boost/shared_ptr.hpp>
|
|
|
|
|
|
|
|
+#include <bitset>
|
|
#include <cassert>
|
|
#include <cassert>
|
|
#include <string>
|
|
#include <string>
|
|
#include <vector>
|
|
#include <vector>
|
|
@@ -35,7 +36,16 @@ using namespace master_lexer_internal;
|
|
struct MasterLexer::MasterLexerImpl {
|
|
struct MasterLexer::MasterLexerImpl {
|
|
MasterLexerImpl() : source_(NULL), token_(Token::NOT_STARTED),
|
|
MasterLexerImpl() : source_(NULL), token_(Token::NOT_STARTED),
|
|
paren_count_(0), last_was_eol_(false)
|
|
paren_count_(0), last_was_eol_(false)
|
|
- {}
|
|
|
|
|
|
+ {
|
|
|
|
+ separators_.set('\r');
|
|
|
|
+ separators_.set('\n');
|
|
|
|
+ separators_.set(' ');
|
|
|
|
+ separators_.set('\t');
|
|
|
|
+ separators_.set('(');
|
|
|
|
+ separators_.set(')');
|
|
|
|
+ esc_separators_.set('\r');
|
|
|
|
+ esc_separators_.set('\n');
|
|
|
|
+ }
|
|
|
|
|
|
// A helper method to skip possible comments toward the end of EOL or EOF.
|
|
// A helper method to skip possible comments toward the end of EOL or EOF.
|
|
// commonly used by state classes. It returns the corresponding "end-of"
|
|
// commonly used by state classes. It returns the corresponding "end-of"
|
|
@@ -53,6 +63,14 @@ struct MasterLexer::MasterLexerImpl {
|
|
return (c);
|
|
return (c);
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+ bool isTokenEnd(int c, bool escaped) {
|
|
|
|
+ if (c == InputSource::END_OF_STREAM) {
|
|
|
|
+ return (true);
|
|
|
|
+ }
|
|
|
|
+ return (escaped ? esc_separators_.test(c & 0x7f) :
|
|
|
|
+ separators_.test(c & 0x7f));
|
|
|
|
+ }
|
|
|
|
+
|
|
std::vector<InputSourcePtr> sources_;
|
|
std::vector<InputSourcePtr> sources_;
|
|
InputSource* source_; // current source (NULL if sources_ is empty)
|
|
InputSource* source_; // current source (NULL if sources_ is empty)
|
|
Token token_; // currently recognized token (set by a state)
|
|
Token token_; // currently recognized token (set by a state)
|
|
@@ -62,6 +80,13 @@ struct MasterLexer::MasterLexerImpl {
|
|
// The main lexer class does not need these members.
|
|
// The main lexer class does not need these members.
|
|
size_t paren_count_; // nest count of the parentheses
|
|
size_t paren_count_; // nest count of the parentheses
|
|
bool last_was_eol_; // whether the lexer just passed an end-of-line
|
|
bool last_was_eol_; // whether the lexer just passed an end-of-line
|
|
|
|
+
|
|
|
|
+ // Bitmaps that gives whether a given (positive) character should be
|
|
|
|
+ // considered a separator of a string/number token. The esc_ version
|
|
|
|
+ // is a subset of the other, excluding characters that can be ignored
|
|
|
|
+ // if escaped by a backslash.
|
|
|
|
+ std::bitset<128> separators_;
|
|
|
|
+ std::bitset<128> esc_separators_;
|
|
};
|
|
};
|
|
|
|
|
|
MasterLexer::MasterLexer() : impl_(new MasterLexerImpl) {
|
|
MasterLexer::MasterLexer() : impl_(new MasterLexerImpl) {
|
|
@@ -305,9 +330,7 @@ String::handle(MasterLexer& lexer) const {
|
|
c = getLexerImpl(lexer)->skipComment(c);
|
|
c = getLexerImpl(lexer)->skipComment(c);
|
|
}
|
|
}
|
|
|
|
|
|
- if (c == '\r' || c == '\n' || c == EOF ||
|
|
|
|
- (!escaped &&
|
|
|
|
- (c == ' ' || c == '\t' || c == '(' || c == ')'))) {
|
|
|
|
|
|
+ if (getLexerImpl(lexer)->isTokenEnd(c, escaped)) {
|
|
getLexerImpl(lexer)->source_->ungetChar();
|
|
getLexerImpl(lexer)->source_->ungetChar();
|
|
getLexerImpl(lexer)->token_ =
|
|
getLexerImpl(lexer)->token_ =
|
|
MasterLexer::Token(&data.at(0), data.size());
|
|
MasterLexer::Token(&data.at(0), data.size());
|