forked from OSchip/llvm-project
Implement resolving of HTML character references (named: &, decimal: *,
hex: ) during comment parsing. Now internal representation of plain text in comment AST does not contain character references, but the characters themselves. llvm-svn: 160891
This commit is contained in:
parent
ce675c52ba
commit
4586df765e
|
@ -211,6 +211,10 @@ private:
|
|||
Lexer(const Lexer&); // DO NOT IMPLEMENT
|
||||
void operator=(const Lexer&); // DO NOT IMPLEMENT
|
||||
|
||||
/// Allocator for strings that are semantic values of tokens and have to be
|
||||
/// computed (for example, resolved decimal character references).
|
||||
llvm::BumpPtrAllocator &Allocator;
|
||||
|
||||
const char *const BufferStart;
|
||||
const char *const BufferEnd;
|
||||
SourceLocation FileLoc;
|
||||
|
@ -289,6 +293,16 @@ private:
|
|||
|
||||
bool isVerbatimLineCommand(StringRef Name) const;
|
||||
|
||||
/// Given a character reference name (e.g., "lt"), return the character that
|
||||
/// it stands for (e.g., "<").
|
||||
StringRef resolveHTMLNamedCharacterReference(StringRef Name) const;
|
||||
|
||||
/// Given a Unicode codepoint as base-10 integer, return the character.
|
||||
StringRef resolveHTMLDecimalCharacterReference(StringRef Name) const;
|
||||
|
||||
/// Given a Unicode codepoint as base-16 integer, return the character.
|
||||
StringRef resolveHTMLHexCharacterReference(StringRef Name) const;
|
||||
|
||||
void formTokenWithChars(Token &Result, const char *TokEnd,
|
||||
tok::TokenKind Kind) {
|
||||
const unsigned TokLen = TokEnd - BufferPtr;
|
||||
|
@ -302,6 +316,12 @@ private:
|
|||
BufferPtr = TokEnd;
|
||||
}
|
||||
|
||||
void formTextToken(Token &Result, const char *TokEnd) {
|
||||
StringRef Text(BufferPtr, TokEnd - BufferPtr);
|
||||
formTokenWithChars(Result, TokEnd, tok::text);
|
||||
Result.setText(Text);
|
||||
}
|
||||
|
||||
SourceLocation getSourceLocation(const char *Loc) const {
|
||||
assert(Loc >= BufferStart && Loc <= BufferEnd &&
|
||||
"Location out of range for this buffer!");
|
||||
|
@ -328,6 +348,8 @@ private:
|
|||
|
||||
void lexVerbatimLineText(Token &T);
|
||||
|
||||
void lexHTMLCharacterReference(Token &T);
|
||||
|
||||
void setupAndLexHTMLStartTag(Token &T);
|
||||
|
||||
void lexHTMLStartTag(Token &T);
|
||||
|
@ -337,7 +359,8 @@ private:
|
|||
void lexHTMLEndTag(Token &T);
|
||||
|
||||
public:
|
||||
Lexer(SourceLocation FileLoc, const CommentOptions &CommOpts,
|
||||
Lexer(llvm::BumpPtrAllocator &Allocator,
|
||||
SourceLocation FileLoc, const CommentOptions &CommOpts,
|
||||
const char *BufferStart, const char *BufferEnd);
|
||||
|
||||
void lex(Token &T);
|
||||
|
|
|
@ -216,7 +216,8 @@ comments::FullComment *ASTContext::getCommentForDecl(const Decl *D) const {
|
|||
return NULL;
|
||||
|
||||
const StringRef RawText = RC->getRawText(SourceMgr);
|
||||
comments::Lexer L(RC->getSourceRange().getBegin(), comments::CommentOptions(),
|
||||
comments::Lexer L(getAllocator(),
|
||||
RC->getSourceRange().getBegin(), comments::CommentOptions(),
|
||||
RawText.begin(), RawText.end());
|
||||
|
||||
comments::Sema S(getAllocator(), getSourceManager(), getDiagnostics());
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
#include "clang/AST/CommentLexer.h"
|
||||
#include "clang/Basic/ConvertUTF.h"
|
||||
#include "llvm/ADT/StringSwitch.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
|
||||
|
@ -87,6 +88,71 @@ bool Lexer::isVerbatimLineCommand(StringRef Name) const {
|
|||
return false;
|
||||
}
|
||||
|
||||
namespace {
|
||||
bool isHTMLNamedCharacterReferenceCharacter(char C) {
|
||||
return (C >= 'a' && C <= 'z') ||
|
||||
(C >= 'A' && C <= 'Z');
|
||||
}
|
||||
|
||||
bool isHTMLDecimalCharacterReferenceCharacter(char C) {
|
||||
return C >= '0' && C <= '9';
|
||||
}
|
||||
|
||||
bool isHTMLHexCharacterReferenceCharacter(char C) {
|
||||
return (C >= '0' && C <= '9') ||
|
||||
(C >= 'a' && C <= 'f') ||
|
||||
(C >= 'A' && C <= 'F');
|
||||
}
|
||||
} // unnamed namespace
|
||||
|
||||
StringRef Lexer::resolveHTMLNamedCharacterReference(StringRef Name) const {
|
||||
return llvm::StringSwitch<StringRef>(Name)
|
||||
.Case("amp", "&")
|
||||
.Case("lt", "<")
|
||||
.Case("gt", ">")
|
||||
.Case("quot", "\"")
|
||||
.Case("apos", "\'")
|
||||
.Default("");
|
||||
}
|
||||
|
||||
StringRef Lexer::resolveHTMLDecimalCharacterReference(StringRef Name) const {
|
||||
unsigned CodePoint = 0;
|
||||
for (unsigned i = 0, e = Name.size(); i != e; ++i) {
|
||||
assert(isHTMLDecimalCharacterReferenceCharacter(Name[i]));
|
||||
CodePoint *= 10;
|
||||
CodePoint += Name[i] - '0';
|
||||
}
|
||||
|
||||
char *Resolved = Allocator.Allocate<char>(UNI_MAX_UTF8_BYTES_PER_CODE_POINT);
|
||||
char *ResolvedPtr = Resolved;
|
||||
if (ConvertCodePointToUTF8(CodePoint, ResolvedPtr))
|
||||
return StringRef(Resolved, ResolvedPtr - Resolved);
|
||||
else
|
||||
return StringRef();
|
||||
}
|
||||
|
||||
StringRef Lexer::resolveHTMLHexCharacterReference(StringRef Name) const {
|
||||
unsigned CodePoint = 0;
|
||||
for (unsigned i = 0, e = Name.size(); i != e; ++i) {
|
||||
CodePoint *= 16;
|
||||
const char C = Name[i];
|
||||
assert(isHTMLHexCharacterReferenceCharacter(C));
|
||||
if (C >= '0' && C <= '9')
|
||||
CodePoint += Name[i] - '0';
|
||||
else if (C >= 'a' && C <= 'f')
|
||||
CodePoint += Name[i] - 'a' + 10;
|
||||
else
|
||||
CodePoint += Name[i] - 'A' + 10;
|
||||
}
|
||||
|
||||
char *Resolved = Allocator.Allocate<char>(UNI_MAX_UTF8_BYTES_PER_CODE_POINT);
|
||||
char *ResolvedPtr = Resolved;
|
||||
if (ConvertCodePointToUTF8(CodePoint, ResolvedPtr))
|
||||
return StringRef(Resolved, ResolvedPtr - Resolved);
|
||||
else
|
||||
return StringRef();
|
||||
}
|
||||
|
||||
void Lexer::skipLineStartingDecorations() {
|
||||
// This function should be called only for C comments
|
||||
assert(CommentState == LCS_InsideCComment);
|
||||
|
@ -147,6 +213,33 @@ const char *skipNewline(const char *BufferPtr, const char *BufferEnd) {
|
|||
return BufferPtr;
|
||||
}
|
||||
|
||||
const char *skipNamedCharacterReference(const char *BufferPtr,
|
||||
const char *BufferEnd) {
|
||||
for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
|
||||
if (!isHTMLNamedCharacterReferenceCharacter(*BufferPtr))
|
||||
return BufferPtr;
|
||||
}
|
||||
return BufferEnd;
|
||||
}
|
||||
|
||||
const char *skipDecimalCharacterReference(const char *BufferPtr,
|
||||
const char *BufferEnd) {
|
||||
for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
|
||||
if (!isHTMLDecimalCharacterReferenceCharacter(*BufferPtr))
|
||||
return BufferPtr;
|
||||
}
|
||||
return BufferEnd;
|
||||
}
|
||||
|
||||
const char *skipHexCharacterReference(const char *BufferPtr,
|
||||
const char *BufferEnd) {
|
||||
for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
|
||||
if (!isHTMLHexCharacterReferenceCharacter(*BufferPtr))
|
||||
return BufferPtr;
|
||||
}
|
||||
return BufferEnd;
|
||||
}
|
||||
|
||||
bool isHTMLIdentifierStartingCharacter(char C) {
|
||||
return (C >= 'a' && C <= 'z') ||
|
||||
(C >= 'A' && C <= 'Z');
|
||||
|
@ -295,9 +388,7 @@ void Lexer::lexCommentText(Token &T) {
|
|||
case '@': {
|
||||
TokenPtr++;
|
||||
if (TokenPtr == CommentEnd) {
|
||||
StringRef Text(BufferPtr, TokenPtr - BufferPtr);
|
||||
formTokenWithChars(T, TokenPtr, tok::text);
|
||||
T.setText(Text);
|
||||
formTextToken(T, TokenPtr);
|
||||
return;
|
||||
}
|
||||
char C = *TokenPtr;
|
||||
|
@ -322,9 +413,7 @@ void Lexer::lexCommentText(Token &T) {
|
|||
|
||||
// Don't make zero-length commands.
|
||||
if (!isCommandNameCharacter(*TokenPtr)) {
|
||||
StringRef Text(BufferPtr, TokenPtr - BufferPtr);
|
||||
formTokenWithChars(T, TokenPtr, tok::text);
|
||||
T.setText(Text);
|
||||
formTextToken(T, TokenPtr);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -357,12 +446,14 @@ void Lexer::lexCommentText(Token &T) {
|
|||
return;
|
||||
}
|
||||
|
||||
case '&':
|
||||
lexHTMLCharacterReference(T);
|
||||
return;
|
||||
|
||||
case '<': {
|
||||
TokenPtr++;
|
||||
if (TokenPtr == CommentEnd) {
|
||||
StringRef Text(BufferPtr, TokenPtr - BufferPtr);
|
||||
formTokenWithChars(T, TokenPtr, tok::text);
|
||||
T.setText(Text);
|
||||
formTextToken(T, TokenPtr);
|
||||
return;
|
||||
}
|
||||
const char C = *TokenPtr;
|
||||
|
@ -370,11 +461,9 @@ void Lexer::lexCommentText(Token &T) {
|
|||
setupAndLexHTMLStartTag(T);
|
||||
else if (C == '/')
|
||||
setupAndLexHTMLEndTag(T);
|
||||
else {
|
||||
StringRef Text(BufferPtr, TokenPtr - BufferPtr);
|
||||
formTokenWithChars(T, TokenPtr, tok::text);
|
||||
T.setText(Text);
|
||||
}
|
||||
else
|
||||
formTextToken(T, TokenPtr);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -394,12 +483,10 @@ void Lexer::lexCommentText(Token &T) {
|
|||
break;
|
||||
const char C = *TokenPtr;
|
||||
if(C == '\n' || C == '\r' ||
|
||||
C == '\\' || C == '@' || C == '<')
|
||||
C == '\\' || C == '@' || C == '&' || C == '<')
|
||||
break;
|
||||
}
|
||||
StringRef Text(BufferPtr, TokenPtr - BufferPtr);
|
||||
formTokenWithChars(T, TokenPtr, tok::text);
|
||||
T.setText(Text);
|
||||
formTextToken(T, TokenPtr);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
@ -506,6 +593,69 @@ void Lexer::lexVerbatimLineText(Token &T) {
|
|||
State = LS_Normal;
|
||||
}
|
||||
|
||||
void Lexer::lexHTMLCharacterReference(Token &T) {
|
||||
const char *TokenPtr = BufferPtr;
|
||||
assert(*TokenPtr == '&');
|
||||
TokenPtr++;
|
||||
if (TokenPtr == CommentEnd) {
|
||||
formTextToken(T, TokenPtr);
|
||||
return;
|
||||
}
|
||||
const char *NamePtr;
|
||||
bool isNamed = false;
|
||||
bool isDecimal = false;
|
||||
char C = *TokenPtr;
|
||||
if (isHTMLNamedCharacterReferenceCharacter(C)) {
|
||||
NamePtr = TokenPtr;
|
||||
TokenPtr = skipNamedCharacterReference(TokenPtr, CommentEnd);
|
||||
isNamed = true;
|
||||
} else if (C == '#') {
|
||||
TokenPtr++;
|
||||
if (TokenPtr == CommentEnd) {
|
||||
formTextToken(T, TokenPtr);
|
||||
return;
|
||||
}
|
||||
C = *TokenPtr;
|
||||
if (isHTMLDecimalCharacterReferenceCharacter(C)) {
|
||||
NamePtr = TokenPtr;
|
||||
TokenPtr = skipDecimalCharacterReference(TokenPtr, CommentEnd);
|
||||
isDecimal = true;
|
||||
} else if (C == 'x' || C == 'X') {
|
||||
TokenPtr++;
|
||||
NamePtr = TokenPtr;
|
||||
TokenPtr = skipHexCharacterReference(TokenPtr, CommentEnd);
|
||||
} else {
|
||||
formTextToken(T, TokenPtr);
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
formTextToken(T, TokenPtr);
|
||||
return;
|
||||
}
|
||||
if (NamePtr == TokenPtr || TokenPtr == CommentEnd ||
|
||||
*TokenPtr != ';') {
|
||||
formTextToken(T, TokenPtr);
|
||||
return;
|
||||
}
|
||||
StringRef Name(NamePtr, TokenPtr - NamePtr);
|
||||
TokenPtr++; // Skip semicolon.
|
||||
StringRef Resolved;
|
||||
if (isNamed)
|
||||
Resolved = resolveHTMLNamedCharacterReference(Name);
|
||||
else if (isDecimal)
|
||||
Resolved = resolveHTMLDecimalCharacterReference(Name);
|
||||
else
|
||||
Resolved = resolveHTMLHexCharacterReference(Name);
|
||||
|
||||
if (Resolved.empty()) {
|
||||
formTextToken(T, TokenPtr);
|
||||
return;
|
||||
}
|
||||
formTokenWithChars(T, TokenPtr, tok::text);
|
||||
T.setText(Resolved);
|
||||
return;
|
||||
}
|
||||
|
||||
void Lexer::setupAndLexHTMLStartTag(Token &T) {
|
||||
assert(BufferPtr[0] == '<' &&
|
||||
isHTMLIdentifierStartingCharacter(BufferPtr[1]));
|
||||
|
@ -561,11 +711,9 @@ void Lexer::lexHTMLStartTag(Token &T) {
|
|||
if (TokenPtr != CommentEnd && *TokenPtr == '>') {
|
||||
TokenPtr++;
|
||||
formTokenWithChars(T, TokenPtr, tok::html_slash_greater);
|
||||
} else {
|
||||
StringRef Text(BufferPtr, TokenPtr - BufferPtr);
|
||||
formTokenWithChars(T, TokenPtr, tok::text);
|
||||
T.setText(Text);
|
||||
}
|
||||
} else
|
||||
formTextToken(T, TokenPtr);
|
||||
|
||||
State = LS_Normal;
|
||||
return;
|
||||
}
|
||||
|
@ -609,8 +757,10 @@ void Lexer::lexHTMLEndTag(Token &T) {
|
|||
State = LS_Normal;
|
||||
}
|
||||
|
||||
Lexer::Lexer(SourceLocation FileLoc, const CommentOptions &CommOpts,
|
||||
Lexer::Lexer(llvm::BumpPtrAllocator &Allocator,
|
||||
SourceLocation FileLoc, const CommentOptions &CommOpts,
|
||||
const char *BufferStart, const char *BufferEnd):
|
||||
Allocator(Allocator),
|
||||
BufferStart(BufferStart), BufferEnd(BufferEnd),
|
||||
FileLoc(FileLoc), CommOpts(CommOpts), BufferPtr(BufferStart),
|
||||
CommentState(LCS_BeforeComment), State(LS_Normal) {
|
||||
|
|
|
@ -134,7 +134,13 @@ const char *RawComment::extractBriefText(const ASTContext &Context) const {
|
|||
// Make sure that RawText is valid.
|
||||
getRawText(Context.getSourceManager());
|
||||
|
||||
comments::Lexer L(Range.getBegin(), comments::CommentOptions(),
|
||||
// Since we will be copying the resulting text, all allocations made during
|
||||
// parsing are garbage after resulting string is formed. Thus we can use
|
||||
// a separate allocator for all temporary stuff.
|
||||
llvm::BumpPtrAllocator Allocator;
|
||||
|
||||
comments::Lexer L(Allocator,
|
||||
Range.getBegin(), comments::CommentOptions(),
|
||||
RawText.begin(), RawText.end());
|
||||
comments::BriefParser P(L);
|
||||
|
||||
|
|
|
@ -323,6 +323,9 @@ void comment_to_html_conversion_23();
|
|||
/// & < > "
|
||||
void comment_to_html_conversion_24();
|
||||
|
||||
/// <em>0<i</em>
|
||||
void comment_to_html_conversion_25();
|
||||
|
||||
#endif
|
||||
|
||||
// RUN: rm -rf %t
|
||||
|
@ -642,9 +645,26 @@ void comment_to_html_conversion_24();
|
|||
// CHECK-NEXT: (CXComment_Text Text=[.])
|
||||
// CHECK-NEXT: (CXComment_Text Text=[ ] IsWhitespace)
|
||||
// CHECK-NEXT: (CXComment_Text Text=[::])))]
|
||||
// CHECK: annotate-comments.cpp:324:6: FunctionDecl=comment_to_html_conversion_24:{{.*}} FullCommentAsHTML=[<p class="para-brief"> &amp; &lt; &gt; &quot;</p>]
|
||||
// CHECK: CommentAST=[
|
||||
// CHECK: (CXComment_FullComment
|
||||
// CHECK: (CXComment_Paragraph
|
||||
// CHECK: (CXComment_Text Text=[ & < > "])))]
|
||||
// CHECK: annotate-comments.cpp:324:6: FunctionDecl=comment_to_html_conversion_24:{{.*}} FullCommentAsHTML=[<p class="para-brief"> & < > "</p>]
|
||||
// CHECK-NEXT: CommentAST=[
|
||||
// CHECK-NEXT: (CXComment_FullComment
|
||||
// CHECK-NEXT: (CXComment_Paragraph
|
||||
// CHECK-NEXT: (CXComment_Text Text=[ ] IsWhitespace)
|
||||
// CHECK-NEXT: (CXComment_Text Text=[&])
|
||||
// CHECK-NEXT: (CXComment_Text Text=[ ] IsWhitespace)
|
||||
// CHECK-NEXT: (CXComment_Text Text=[<])
|
||||
// CHECK-NEXT: (CXComment_Text Text=[ ] IsWhitespace)
|
||||
// CHECK-NEXT: (CXComment_Text Text=[>])
|
||||
// CHECK-NEXT: (CXComment_Text Text=[ ] IsWhitespace)
|
||||
// CHECK-NEXT: (CXComment_Text Text=["])))]
|
||||
// CHECK: annotate-comments.cpp:327:6: FunctionDecl=comment_to_html_conversion_25:{{.*}} FullCommentAsHTML=[<p class="para-brief"> <em>0<i</em></p>]
|
||||
// CHECK-NEXT: CommentAST=[
|
||||
// CHECK-NEXT: (CXComment_FullComment
|
||||
// CHECK-NEXT: (CXComment_Paragraph
|
||||
// CHECK-NEXT: (CXComment_Text Text=[ ] IsWhitespace)
|
||||
// CHECK-NEXT: (CXComment_HTMLStartTag Name=[em])
|
||||
// CHECK-NEXT: (CXComment_Text Text=[0])
|
||||
// CHECK-NEXT: (CXComment_Text Text=[<])
|
||||
// CHECK-NEXT: (CXComment_Text Text=[i])
|
||||
// CHECK-NEXT: (CXComment_HTMLEndTag Name=[em])))]
|
||||
|
||||
|
|
|
@ -37,6 +37,7 @@ protected:
|
|||
IntrusiveRefCntPtr<DiagnosticIDs> DiagID;
|
||||
DiagnosticsEngine Diags;
|
||||
SourceManager SourceMgr;
|
||||
llvm::BumpPtrAllocator Allocator;
|
||||
|
||||
void lexString(const char *Source, std::vector<Token> &Toks);
|
||||
};
|
||||
|
@ -47,7 +48,7 @@ void CommentLexerTest::lexString(const char *Source,
|
|||
FileID File = SourceMgr.createFileIDForMemBuffer(Buf);
|
||||
SourceLocation Begin = SourceMgr.getLocForStartOfFile(File);
|
||||
|
||||
comments::Lexer L(Begin, CommentOptions(),
|
||||
comments::Lexer L(Allocator, Begin, CommentOptions(),
|
||||
Source, Source + strlen(Source));
|
||||
|
||||
while (1) {
|
||||
|
@ -1272,6 +1273,324 @@ TEST_F(CommentLexerTest, HTML20) {
|
|||
}
|
||||
}
|
||||
|
||||
TEST_F(CommentLexerTest, HTMLCharacterReferences1) {
|
||||
const char *Source = "// &";
|
||||
|
||||
std::vector<Token> Toks;
|
||||
|
||||
lexString(Source, Toks);
|
||||
|
||||
ASSERT_EQ(3U, Toks.size());
|
||||
|
||||
ASSERT_EQ(tok::text, Toks[0].getKind());
|
||||
ASSERT_EQ(StringRef(" "), Toks[0].getText());
|
||||
|
||||
ASSERT_EQ(tok::text, Toks[1].getKind());
|
||||
ASSERT_EQ(StringRef("&"), Toks[1].getText());
|
||||
|
||||
ASSERT_EQ(tok::newline, Toks[2].getKind());
|
||||
}
|
||||
|
||||
TEST_F(CommentLexerTest, HTMLCharacterReferences2) {
|
||||
const char *Source = "// &!";
|
||||
|
||||
std::vector<Token> Toks;
|
||||
|
||||
lexString(Source, Toks);
|
||||
|
||||
ASSERT_EQ(4U, Toks.size());
|
||||
|
||||
ASSERT_EQ(tok::text, Toks[0].getKind());
|
||||
ASSERT_EQ(StringRef(" "), Toks[0].getText());
|
||||
|
||||
ASSERT_EQ(tok::text, Toks[1].getKind());
|
||||
ASSERT_EQ(StringRef("&"), Toks[1].getText());
|
||||
|
||||
ASSERT_EQ(tok::text, Toks[2].getKind());
|
||||
ASSERT_EQ(StringRef("!"), Toks[2].getText());
|
||||
|
||||
ASSERT_EQ(tok::newline, Toks[3].getKind());
|
||||
}
|
||||
|
||||
TEST_F(CommentLexerTest, HTMLCharacterReferences3) {
|
||||
const char *Source = "// &";
|
||||
|
||||
std::vector<Token> Toks;
|
||||
|
||||
lexString(Source, Toks);
|
||||
|
||||
ASSERT_EQ(3U, Toks.size());
|
||||
|
||||
ASSERT_EQ(tok::text, Toks[0].getKind());
|
||||
ASSERT_EQ(StringRef(" "), Toks[0].getText());
|
||||
|
||||
ASSERT_EQ(tok::text, Toks[1].getKind());
|
||||
ASSERT_EQ(StringRef("&"), Toks[1].getText());
|
||||
|
||||
ASSERT_EQ(tok::newline, Toks[2].getKind());
|
||||
}
|
||||
|
||||
TEST_F(CommentLexerTest, HTMLCharacterReferences4) {
|
||||
const char *Source = "// &!";
|
||||
|
||||
std::vector<Token> Toks;
|
||||
|
||||
lexString(Source, Toks);
|
||||
|
||||
ASSERT_EQ(4U, Toks.size());
|
||||
|
||||
ASSERT_EQ(tok::text, Toks[0].getKind());
|
||||
ASSERT_EQ(StringRef(" "), Toks[0].getText());
|
||||
|
||||
ASSERT_EQ(tok::text, Toks[1].getKind());
|
||||
ASSERT_EQ(StringRef("&"), Toks[1].getText());
|
||||
|
||||
ASSERT_EQ(tok::text, Toks[2].getKind());
|
||||
ASSERT_EQ(StringRef("!"), Toks[2].getText());
|
||||
|
||||
ASSERT_EQ(tok::newline, Toks[3].getKind());
|
||||
}
|
||||
|
||||
TEST_F(CommentLexerTest, HTMLCharacterReferences5) {
|
||||
const char *Source = "// &#";
|
||||
|
||||
std::vector<Token> Toks;
|
||||
|
||||
lexString(Source, Toks);
|
||||
|
||||
ASSERT_EQ(3U, Toks.size());
|
||||
|
||||
ASSERT_EQ(tok::text, Toks[0].getKind());
|
||||
ASSERT_EQ(StringRef(" "), Toks[0].getText());
|
||||
|
||||
ASSERT_EQ(tok::text, Toks[1].getKind());
|
||||
ASSERT_EQ(StringRef("&#"), Toks[1].getText());
|
||||
|
||||
ASSERT_EQ(tok::newline, Toks[2].getKind());
|
||||
}
|
||||
|
||||
TEST_F(CommentLexerTest, HTMLCharacterReferences6) {
|
||||
const char *Source = "// &#a";
|
||||
|
||||
std::vector<Token> Toks;
|
||||
|
||||
lexString(Source, Toks);
|
||||
|
||||
ASSERT_EQ(4U, Toks.size());
|
||||
|
||||
ASSERT_EQ(tok::text, Toks[0].getKind());
|
||||
ASSERT_EQ(StringRef(" "), Toks[0].getText());
|
||||
|
||||
ASSERT_EQ(tok::text, Toks[1].getKind());
|
||||
ASSERT_EQ(StringRef("&#"), Toks[1].getText());
|
||||
|
||||
ASSERT_EQ(tok::text, Toks[2].getKind());
|
||||
ASSERT_EQ(StringRef("a"), Toks[2].getText());
|
||||
|
||||
ASSERT_EQ(tok::newline, Toks[3].getKind());
|
||||
}
|
||||
|
||||
TEST_F(CommentLexerTest, HTMLCharacterReferences7) {
|
||||
const char *Source = "// *";
|
||||
|
||||
std::vector<Token> Toks;
|
||||
|
||||
lexString(Source, Toks);
|
||||
|
||||
ASSERT_EQ(3U, Toks.size());
|
||||
|
||||
ASSERT_EQ(tok::text, Toks[0].getKind());
|
||||
ASSERT_EQ(StringRef(" "), Toks[0].getText());
|
||||
|
||||
ASSERT_EQ(tok::text, Toks[1].getKind());
|
||||
ASSERT_EQ(StringRef("*"), Toks[1].getText());
|
||||
|
||||
ASSERT_EQ(tok::newline, Toks[2].getKind());
|
||||
}
|
||||
|
||||
TEST_F(CommentLexerTest, HTMLCharacterReferences8) {
|
||||
const char *Source = "// *a";
|
||||
|
||||
std::vector<Token> Toks;
|
||||
|
||||
lexString(Source, Toks);
|
||||
|
||||
ASSERT_EQ(4U, Toks.size());
|
||||
|
||||
ASSERT_EQ(tok::text, Toks[0].getKind());
|
||||
ASSERT_EQ(StringRef(" "), Toks[0].getText());
|
||||
|
||||
ASSERT_EQ(tok::text, Toks[1].getKind());
|
||||
ASSERT_EQ(StringRef("*"), Toks[1].getText());
|
||||
|
||||
ASSERT_EQ(tok::text, Toks[2].getKind());
|
||||
ASSERT_EQ(StringRef("a"), Toks[2].getText());
|
||||
|
||||
ASSERT_EQ(tok::newline, Toks[3].getKind());
|
||||
}
|
||||
|
||||
TEST_F(CommentLexerTest, HTMLCharacterReferences9) {
|
||||
const char *Source = "// &#x";
|
||||
|
||||
std::vector<Token> Toks;
|
||||
|
||||
lexString(Source, Toks);
|
||||
|
||||
ASSERT_EQ(3U, Toks.size());
|
||||
|
||||
ASSERT_EQ(tok::text, Toks[0].getKind());
|
||||
ASSERT_EQ(StringRef(" "), Toks[0].getText());
|
||||
|
||||
ASSERT_EQ(tok::text, Toks[1].getKind());
|
||||
ASSERT_EQ(StringRef("&#x"), Toks[1].getText());
|
||||
|
||||
ASSERT_EQ(tok::newline, Toks[2].getKind());
|
||||
}
|
||||
|
||||
TEST_F(CommentLexerTest, HTMLCharacterReferences10) {
|
||||
const char *Source = "// &#xz";
|
||||
|
||||
std::vector<Token> Toks;
|
||||
|
||||
lexString(Source, Toks);
|
||||
|
||||
ASSERT_EQ(4U, Toks.size());
|
||||
|
||||
ASSERT_EQ(tok::text, Toks[0].getKind());
|
||||
ASSERT_EQ(StringRef(" "), Toks[0].getText());
|
||||
|
||||
ASSERT_EQ(tok::text, Toks[1].getKind());
|
||||
ASSERT_EQ(StringRef("&#x"), Toks[1].getText());
|
||||
|
||||
ASSERT_EQ(tok::text, Toks[2].getKind());
|
||||
ASSERT_EQ(StringRef("z"), Toks[2].getText());
|
||||
|
||||
ASSERT_EQ(tok::newline, Toks[3].getKind());
|
||||
}
|
||||
|
||||
TEST_F(CommentLexerTest, HTMLCharacterReferences11) {
|
||||
const char *Source = "// «";
|
||||
|
||||
std::vector<Token> Toks;
|
||||
|
||||
lexString(Source, Toks);
|
||||
|
||||
ASSERT_EQ(3U, Toks.size());
|
||||
|
||||
ASSERT_EQ(tok::text, Toks[0].getKind());
|
||||
ASSERT_EQ(StringRef(" "), Toks[0].getText());
|
||||
|
||||
ASSERT_EQ(tok::text, Toks[1].getKind());
|
||||
ASSERT_EQ(StringRef("«"), Toks[1].getText());
|
||||
|
||||
ASSERT_EQ(tok::newline, Toks[2].getKind());
|
||||
}
|
||||
|
||||
TEST_F(CommentLexerTest, HTMLCharacterReferences12) {
|
||||
const char *Source = "// «z";
|
||||
|
||||
std::vector<Token> Toks;
|
||||
|
||||
lexString(Source, Toks);
|
||||
|
||||
ASSERT_EQ(4U, Toks.size());
|
||||
|
||||
ASSERT_EQ(tok::text, Toks[0].getKind());
|
||||
ASSERT_EQ(StringRef(" "), Toks[0].getText());
|
||||
|
||||
ASSERT_EQ(tok::text, Toks[1].getKind());
|
||||
ASSERT_EQ(StringRef("«"), Toks[1].getText());
|
||||
|
||||
ASSERT_EQ(tok::text, Toks[2].getKind());
|
||||
ASSERT_EQ(StringRef("z"), Toks[2].getText());
|
||||
|
||||
ASSERT_EQ(tok::newline, Toks[3].getKind());
|
||||
}
|
||||
|
||||
TEST_F(CommentLexerTest, HTMLCharacterReferences13) {
|
||||
const char *Source = "// &";
|
||||
|
||||
std::vector<Token> Toks;
|
||||
|
||||
lexString(Source, Toks);
|
||||
|
||||
ASSERT_EQ(3U, Toks.size());
|
||||
|
||||
ASSERT_EQ(tok::text, Toks[0].getKind());
|
||||
ASSERT_EQ(StringRef(" "), Toks[0].getText());
|
||||
|
||||
ASSERT_EQ(tok::text, Toks[1].getKind());
|
||||
ASSERT_EQ(StringRef("&"), Toks[1].getText());
|
||||
|
||||
ASSERT_EQ(tok::newline, Toks[2].getKind());
|
||||
}
|
||||
|
||||
TEST_F(CommentLexerTest, HTMLCharacterReferences14) {
|
||||
const char *Source = "// &<";
|
||||
|
||||
std::vector<Token> Toks;
|
||||
|
||||
lexString(Source, Toks);
|
||||
|
||||
ASSERT_EQ(4U, Toks.size());
|
||||
|
||||
ASSERT_EQ(tok::text, Toks[0].getKind());
|
||||
ASSERT_EQ(StringRef(" "), Toks[0].getText());
|
||||
|
||||
ASSERT_EQ(tok::text, Toks[1].getKind());
|
||||
ASSERT_EQ(StringRef("&"), Toks[1].getText());
|
||||
|
||||
ASSERT_EQ(tok::text, Toks[2].getKind());
|
||||
ASSERT_EQ(StringRef("<"), Toks[2].getText());
|
||||
|
||||
ASSERT_EQ(tok::newline, Toks[3].getKind());
|
||||
}
|
||||
|
||||
TEST_F(CommentLexerTest, HTMLCharacterReferences15) {
|
||||
const char *Source = "// & meow";
|
||||
|
||||
std::vector<Token> Toks;
|
||||
|
||||
lexString(Source, Toks);
|
||||
|
||||
ASSERT_EQ(4U, Toks.size());
|
||||
|
||||
ASSERT_EQ(tok::text, Toks[0].getKind());
|
||||
ASSERT_EQ(StringRef(" "), Toks[0].getText());
|
||||
|
||||
ASSERT_EQ(tok::text, Toks[1].getKind());
|
||||
ASSERT_EQ(StringRef("&"), Toks[1].getText());
|
||||
|
||||
ASSERT_EQ(tok::text, Toks[2].getKind());
|
||||
ASSERT_EQ(StringRef(" meow"), Toks[2].getText());
|
||||
|
||||
ASSERT_EQ(tok::newline, Toks[3].getKind());
|
||||
}
|
||||
|
||||
TEST_F(CommentLexerTest, HTMLCharacterReferences16) {
|
||||
const char *Sources[] = {
|
||||
"// =",
|
||||
"// =",
|
||||
"// ="
|
||||
};
|
||||
|
||||
for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
|
||||
std::vector<Token> Toks;
|
||||
|
||||
lexString(Sources[i], Toks);
|
||||
|
||||
ASSERT_EQ(3U, Toks.size());
|
||||
|
||||
ASSERT_EQ(tok::text, Toks[0].getKind());
|
||||
ASSERT_EQ(StringRef(" "), Toks[0].getText());
|
||||
|
||||
ASSERT_EQ(tok::text, Toks[1].getKind());
|
||||
ASSERT_EQ(StringRef("="), Toks[1].getText());
|
||||
|
||||
ASSERT_EQ(tok::newline, Toks[2].getKind());
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(CommentLexerTest, MultipleComments) {
|
||||
const char *Source =
|
||||
"// Aaa\n"
|
||||
|
|
|
@ -54,7 +54,7 @@ FullComment *CommentParserTest::parseString(const char *Source) {
|
|||
FileID File = SourceMgr.createFileIDForMemBuffer(Buf);
|
||||
SourceLocation Begin = SourceMgr.getLocForStartOfFile(File);
|
||||
|
||||
comments::Lexer L(Begin, CommentOptions(),
|
||||
comments::Lexer L(Allocator, Begin, CommentOptions(),
|
||||
Source, Source + strlen(Source));
|
||||
|
||||
comments::Sema S(Allocator, SourceMgr, Diags);
|
||||
|
|
Loading…
Reference in New Issue