Browse Source

[2372] handle crlf class

JINMEI Tatuya 12 years ago
parent
commit
0c5e6acbe2
2 changed files with 57 additions and 1 deletions
  1. 21 1
      src/lib/dns/master_lexer.cc
  2. 36 0
      src/lib/dns/tests/master_lexer_state_unittest.cc

+ 21 - 1
src/lib/dns/master_lexer.cc

@@ -166,7 +166,23 @@ public:
 class CRLF : public State {
 public:
     CRLF() {}
-    virtual const State* handle(MasterLexer& /*lexer*/) const {
+    virtual const State* handle(MasterLexer& lexer) const {
+        // We've just seen '\r'.  If this is part of a sequence of '\r\n',
+        // we combine them as a single END-OF-LINE.  Otherwise we treat the
+        // single '\r' as an EOL and continue tokeniziation from the character
+        // immediately after '\r'.  One tricky case is that there's a comment
+        // between '\r' and '\n'.  This implementation combines these
+        // characters and treats them as a single EOL (the behavior derived
+        // from BIND 9).  Technically this may not be correct, but in practice
+        // the caller wouldn't distinguish this case from the case it has
+        // two EOLs, so we simplify the process.
+        const int c = getLexerImpl(lexer)->skipComment(
+            getLexerImpl(lexer)->source_->getChar());
+        if (c != '\n') {
+            getLexerImpl(lexer)->source_->ungetChar();
+        }
+        getLexerImpl(lexer)->token_ = Token(Token::END_OF_LINE);
+        getLexerImpl(lexer)->last_was_eol_ = true;
         return (NULL);
     }
 };
@@ -243,6 +259,10 @@ Start::handle(MasterLexer& lexer) const {
                 getLexerImpl(lexer)->token_ = Token(Token::END_OF_LINE);
                 return (NULL);
             }
+        } else if (c == '\r') {
+            if ((options & MasterLexer::END_OF_LINE) != 0) {
+                return (&CRLF_STATE);
+            }
         } else if (c == '(') {
             getLexerImpl(lexer)->last_was_eol_ = false;
             adjustOptionsForParen(options);

+ 36 - 0
src/lib/dns/tests/master_lexer_state_unittest.cc

@@ -224,4 +224,40 @@ TEST_F(MasterLexerStateTest, commentAfterParen) {
     EXPECT_EQ(Token::END_OF_LINE, s_start.getToken(lexer).getType());
 }
 
+TEST_F(MasterLexerStateTest, crlf) {
+    // A sequence of \r, \n is recognized as a single 'end-of-line'
+    ss << "\r\n";
+    EXPECT_EQ(&s_crlf, s_start.handle(lexer)); // recognize '\r'
+    EXPECT_EQ(s_null, s_crlf.handle(lexer));   // recognize '\n'
+    EXPECT_EQ(Token::END_OF_LINE, s_start.getToken(lexer).getType());
+    EXPECT_TRUE(s_start.wasLastEOL(lexer));
+
+    // If EOL isn't expected to be recognized, \r is just ignored.
+    ss << "\r\na";
+    State::getStartInstance(lexer, MasterLexer::NONE);
+    EXPECT_EQ(&s_string, s_start.handle(lexer));
+    s_string.handle(lexer);     // skip
+
+    // Single '\r' (not followed by \n) is recognized as a single 'end-of-line'
+    ss << "\r ";                // then there will be "initial WS"
+    State::getStartInstance(lexer, common_options); // specify usual options
+    EXPECT_EQ(&s_crlf, s_start.handle(lexer)); // recognize '\r'
+    EXPECT_EQ(s_null, s_crlf.handle(lexer));   // recognize ' ', "unget" it
+    EXPECT_EQ(s_null, s_start.handle(lexer)); // re-recognize ' '
+    EXPECT_EQ(Token::INITIAL_WS, s_start.getToken(lexer).getType());
+
+    ss << "\r;comment\na";
+    EXPECT_EQ(&s_crlf, s_start.handle(lexer)); // recognize '\r'
+    EXPECT_EQ(s_null, s_crlf.handle(lexer));   // skip comments, recognize '\n'
+    EXPECT_EQ(Token::END_OF_LINE, s_start.getToken(lexer).getType());
+    EXPECT_EQ(&s_string, s_start.handle(lexer));
+
+    // \r then EOF
+    ss << "\r";
+    EXPECT_EQ(&s_crlf, s_start.handle(lexer)); // recognize '\r'
+    EXPECT_EQ(s_null, s_crlf.handle(lexer));   // see EOF, then "unget" it
+    EXPECT_EQ(s_null, s_start.handle(lexer));  // re-recognize EOF
+    EXPECT_EQ(Token::END_OF_FILE, s_start.getToken(lexer).getType());
+}
+
 }