forked from OSchip/llvm-project
Comment lexing: fix lexing to actually work in non-error cases.
llvm-svn: 159963
This commit is contained in:
parent
55831d8b9f
commit
17709ae8d9
|
@ -147,6 +147,11 @@ const char *skipNewline(const char *BufferPtr, const char *BufferEnd) {
|
|||
return BufferPtr;
|
||||
}
|
||||
|
||||
bool isHTMLIdentifierStartingCharacter(char C) {
|
||||
return (C >= 'a' && C <= 'z') ||
|
||||
(C >= 'A' && C <= 'Z');
|
||||
}
|
||||
|
||||
bool isHTMLIdentifierCharacter(char C) {
|
||||
return (C >= 'a' && C <= 'z') ||
|
||||
(C >= 'A' && C <= 'Z') ||
|
||||
|
@ -357,7 +362,7 @@ void Lexer::lexCommentText(Token &T) {
|
|||
return;
|
||||
}
|
||||
const char C = *TokenPtr;
|
||||
if (isHTMLIdentifierCharacter(C))
|
||||
if (isHTMLIdentifierStartingCharacter(C))
|
||||
setupAndLexHTMLOpenTag(T);
|
||||
else if (C == '/')
|
||||
setupAndLexHTMLCloseTag(T);
|
||||
|
@ -383,7 +388,7 @@ void Lexer::lexCommentText(Token &T) {
|
|||
TokenPtr++;
|
||||
if (TokenPtr == CommentEnd)
|
||||
break;
|
||||
char C = *TokenPtr;
|
||||
const char C = *TokenPtr;
|
||||
if(C == '\n' || C == '\r' ||
|
||||
C == '\\' || C == '@' || C == '<')
|
||||
break;
|
||||
|
@ -492,7 +497,8 @@ void Lexer::lexVerbatimLineText(Token &T) {
|
|||
}
|
||||
|
||||
void Lexer::setupAndLexHTMLOpenTag(Token &T) {
|
||||
assert(BufferPtr[0] == '<' && isHTMLIdentifierCharacter(BufferPtr[1]));
|
||||
assert(BufferPtr[0] == '<' &&
|
||||
isHTMLIdentifierStartingCharacter(BufferPtr[1]));
|
||||
const char *TagNameEnd = skipHTMLIdentifier(BufferPtr + 2, CommentEnd);
|
||||
|
||||
StringRef Name(BufferPtr + 1, TagNameEnd - (BufferPtr + 1));
|
||||
|
@ -501,12 +507,9 @@ void Lexer::setupAndLexHTMLOpenTag(Token &T) {
|
|||
|
||||
BufferPtr = skipWhitespace(BufferPtr, CommentEnd);
|
||||
|
||||
if (BufferPtr != CommentEnd && *BufferPtr == '>') {
|
||||
BufferPtr++;
|
||||
return;
|
||||
}
|
||||
|
||||
if (BufferPtr != CommentEnd && isHTMLIdentifierCharacter(*BufferPtr))
|
||||
const char C = *BufferPtr;
|
||||
if (BufferPtr != CommentEnd &&
|
||||
(C == '>' || isHTMLIdentifierStartingCharacter(C)))
|
||||
State = LS_HTMLOpenTag;
|
||||
}
|
||||
|
||||
|
@ -541,7 +544,8 @@ void Lexer::lexHTMLOpenTag(Token &T) {
|
|||
case '>':
|
||||
TokenPtr++;
|
||||
formTokenWithChars(T, TokenPtr, tok::html_greater);
|
||||
break;
|
||||
State = LS_Normal;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -554,7 +558,7 @@ void Lexer::lexHTMLOpenTag(Token &T) {
|
|||
}
|
||||
|
||||
C = *BufferPtr;
|
||||
if (!isHTMLIdentifierCharacter(C) &&
|
||||
if (!isHTMLIdentifierStartingCharacter(C) &&
|
||||
C != '=' && C != '\"' && C != '\'' && C != '>') {
|
||||
State = LS_Normal;
|
||||
return;
|
||||
|
@ -656,8 +660,9 @@ again:
|
|||
EndWhitespace++;
|
||||
|
||||
// Turn any whitespace between comments (and there is only whitespace
|
||||
// between them) into a newline. We have two newlines between C comments
|
||||
// in total (first one was synthesized after a comment).
|
||||
// between them -- guaranteed by comment extraction) into a newline. We
|
||||
// have two newlines between C comments in total (first one was synthesized
|
||||
// after a comment).
|
||||
formTokenWithChars(T, EndWhitespace, tok::newline);
|
||||
|
||||
CommentState = LCS_BeforeComment;
|
||||
|
|
|
@ -802,6 +802,28 @@ TEST_F(CommentLexerTest, HTML1) {
|
|||
}
|
||||
|
||||
TEST_F(CommentLexerTest, HTML2) {
|
||||
const char *Source =
|
||||
"// a<2";
|
||||
|
||||
std::vector<Token> Toks;
|
||||
|
||||
lexString(Source, Toks);
|
||||
|
||||
ASSERT_EQ(4U, Toks.size());
|
||||
|
||||
ASSERT_EQ(tok::text, Toks[0].getKind());
|
||||
ASSERT_EQ(StringRef(" a"), Toks[0].getText());
|
||||
|
||||
ASSERT_EQ(tok::text, Toks[1].getKind());
|
||||
ASSERT_EQ(StringRef("<"), Toks[1].getText());
|
||||
|
||||
ASSERT_EQ(tok::text, Toks[2].getKind());
|
||||
ASSERT_EQ(StringRef("2"), Toks[2].getText());
|
||||
|
||||
ASSERT_EQ(tok::newline, Toks[3].getKind());
|
||||
}
|
||||
|
||||
TEST_F(CommentLexerTest, HTML3) {
|
||||
const char *Source =
|
||||
"// < tag";
|
||||
|
||||
|
@ -823,7 +845,7 @@ TEST_F(CommentLexerTest, HTML2) {
|
|||
ASSERT_EQ(tok::newline, Toks[3].getKind());
|
||||
}
|
||||
|
||||
TEST_F(CommentLexerTest, HTML3) {
|
||||
TEST_F(CommentLexerTest, HTML4) {
|
||||
const char *Sources[] = {
|
||||
"// <tag",
|
||||
"// <tag "
|
||||
|
@ -846,7 +868,52 @@ TEST_F(CommentLexerTest, HTML3) {
|
|||
}
|
||||
}
|
||||
|
||||
TEST_F(CommentLexerTest, HTML4) {
|
||||
TEST_F(CommentLexerTest, HTML5) {
|
||||
const char *Source =
|
||||
"// <tag 42";
|
||||
|
||||
std::vector<Token> Toks;
|
||||
|
||||
lexString(Source, Toks);
|
||||
|
||||
ASSERT_EQ(4U, Toks.size());
|
||||
|
||||
ASSERT_EQ(tok::text, Toks[0].getKind());
|
||||
ASSERT_EQ(StringRef(" "), Toks[0].getText());
|
||||
|
||||
ASSERT_EQ(tok::html_tag_open, Toks[1].getKind());
|
||||
ASSERT_EQ(StringRef("tag"), Toks[1].getHTMLTagOpenName());
|
||||
|
||||
ASSERT_EQ(tok::text, Toks[2].getKind());
|
||||
ASSERT_EQ(StringRef("42"), Toks[2].getText());
|
||||
|
||||
ASSERT_EQ(tok::newline, Toks[3].getKind());
|
||||
}
|
||||
|
||||
TEST_F(CommentLexerTest, HTML6) {
|
||||
const char *Source = "// <tag> Meow";
|
||||
|
||||
std::vector<Token> Toks;
|
||||
|
||||
lexString(Source, Toks);
|
||||
|
||||
ASSERT_EQ(5U, Toks.size());
|
||||
|
||||
ASSERT_EQ(tok::text, Toks[0].getKind());
|
||||
ASSERT_EQ(StringRef(" "), Toks[0].getText());
|
||||
|
||||
ASSERT_EQ(tok::html_tag_open, Toks[1].getKind());
|
||||
ASSERT_EQ(StringRef("tag"), Toks[1].getHTMLTagOpenName());
|
||||
|
||||
ASSERT_EQ(tok::html_greater, Toks[2].getKind());
|
||||
|
||||
ASSERT_EQ(tok::text, Toks[3].getKind());
|
||||
ASSERT_EQ(StringRef(" Meow"), Toks[3].getText());
|
||||
|
||||
ASSERT_EQ(tok::newline, Toks[4].getKind());
|
||||
}
|
||||
|
||||
TEST_F(CommentLexerTest, HTML7) {
|
||||
const char *Source = "// <tag=";
|
||||
|
||||
std::vector<Token> Toks;
|
||||
|
@ -867,7 +934,35 @@ TEST_F(CommentLexerTest, HTML4) {
|
|||
ASSERT_EQ(tok::newline, Toks[3].getKind());
|
||||
}
|
||||
|
||||
TEST_F(CommentLexerTest, HTML5) {
|
||||
TEST_F(CommentLexerTest, HTML8) {
|
||||
const char *Source = "// <tag attr=> Meow";
|
||||
|
||||
std::vector<Token> Toks;
|
||||
|
||||
lexString(Source, Toks);
|
||||
|
||||
ASSERT_EQ(7U, Toks.size());
|
||||
|
||||
ASSERT_EQ(tok::text, Toks[0].getKind());
|
||||
ASSERT_EQ(StringRef(" "), Toks[0].getText());
|
||||
|
||||
ASSERT_EQ(tok::html_tag_open, Toks[1].getKind());
|
||||
ASSERT_EQ(StringRef("tag"), Toks[1].getHTMLTagOpenName());
|
||||
|
||||
ASSERT_EQ(tok::html_ident, Toks[2].getKind());
|
||||
ASSERT_EQ(StringRef("attr"), Toks[2].getHTMLIdent());
|
||||
|
||||
ASSERT_EQ(tok::html_equals, Toks[3].getKind());
|
||||
|
||||
ASSERT_EQ(tok::html_greater, Toks[4].getKind());
|
||||
|
||||
ASSERT_EQ(tok::text, Toks[5].getKind());
|
||||
ASSERT_EQ(StringRef(" Meow"), Toks[5].getText());
|
||||
|
||||
ASSERT_EQ(tok::newline, Toks[6].getKind());
|
||||
}
|
||||
|
||||
TEST_F(CommentLexerTest, HTML9) {
|
||||
const char *Sources[] = {
|
||||
"// <tag attr",
|
||||
"// <tag attr "
|
||||
|
@ -893,7 +988,7 @@ TEST_F(CommentLexerTest, HTML5) {
|
|||
}
|
||||
}
|
||||
|
||||
TEST_F(CommentLexerTest, HTML6) {
|
||||
TEST_F(CommentLexerTest, HTML10) {
|
||||
const char *Sources[] = {
|
||||
"// <tag attr=",
|
||||
"// <tag attr ="
|
||||
|
@ -921,7 +1016,7 @@ TEST_F(CommentLexerTest, HTML6) {
|
|||
}
|
||||
}
|
||||
|
||||
TEST_F(CommentLexerTest, HTML7) {
|
||||
TEST_F(CommentLexerTest, HTML11) {
|
||||
const char *Sources[] = {
|
||||
"// <tag attr=\"",
|
||||
"// <tag attr = \"",
|
||||
|
@ -954,7 +1049,7 @@ TEST_F(CommentLexerTest, HTML7) {
|
|||
}
|
||||
}
|
||||
|
||||
TEST_F(CommentLexerTest, HTML8) {
|
||||
TEST_F(CommentLexerTest, HTML12) {
|
||||
const char *Source = "// <tag attr=@";
|
||||
|
||||
std::vector<Token> Toks;
|
||||
|
@ -980,7 +1075,7 @@ TEST_F(CommentLexerTest, HTML8) {
|
|||
ASSERT_EQ(tok::newline, Toks[5].getKind());
|
||||
}
|
||||
|
||||
TEST_F(CommentLexerTest, HTML9) {
|
||||
TEST_F(CommentLexerTest, HTML13) {
|
||||
const char *Sources[] = {
|
||||
"// <tag attr=\"val\\\"\\'val",
|
||||
"// <tag attr=\"val\\\"\\'val\"",
|
||||
|
@ -1013,7 +1108,7 @@ TEST_F(CommentLexerTest, HTML9) {
|
|||
}
|
||||
}
|
||||
|
||||
TEST_F(CommentLexerTest, HTML10) {
|
||||
TEST_F(CommentLexerTest, HTML14) {
|
||||
const char *Sources[] = {
|
||||
"// <tag attr=\"val\\\"\\'val\">",
|
||||
"// <tag attr=\'val\\\"\\'val\'>"
|
||||
|
@ -1046,7 +1141,7 @@ TEST_F(CommentLexerTest, HTML10) {
|
|||
}
|
||||
}
|
||||
|
||||
TEST_F(CommentLexerTest, HTML11) {
|
||||
TEST_F(CommentLexerTest, HTML15) {
|
||||
const char *Source = "// </";
|
||||
|
||||
std::vector<Token> Toks;
|
||||
|
@ -1065,7 +1160,7 @@ TEST_F(CommentLexerTest, HTML11) {
|
|||
}
|
||||
|
||||
|
||||
TEST_F(CommentLexerTest, HTML12) {
|
||||
TEST_F(CommentLexerTest, HTML16) {
|
||||
const char *Source = "// </@";
|
||||
|
||||
std::vector<Token> Toks;
|
||||
|
@ -1086,7 +1181,7 @@ TEST_F(CommentLexerTest, HTML12) {
|
|||
ASSERT_EQ(tok::newline, Toks[3].getKind());
|
||||
}
|
||||
|
||||
TEST_F(CommentLexerTest, HTML13) {
|
||||
TEST_F(CommentLexerTest, HTML17) {
|
||||
const char *Source = "// </tag";
|
||||
|
||||
std::vector<Token> Toks;
|
||||
|
@ -1104,7 +1199,7 @@ TEST_F(CommentLexerTest, HTML13) {
|
|||
ASSERT_EQ(tok::newline, Toks[2].getKind());
|
||||
}
|
||||
|
||||
TEST_F(CommentLexerTest, HTML14) {
|
||||
TEST_F(CommentLexerTest, HTML18) {
|
||||
const char *Sources[] = {
|
||||
"// </tag>",
|
||||
"// </ tag>",
|
||||
|
|
Loading…
Reference in New Issue