forked from OSchip/llvm-project
[llvm-rc] Have the tokenizer discard single & block comments.
This allows rc files to have comments. Eventually we should just use clang's c preprocessor, but that's a bit larger effort for minimal gain, and this is straightforward. Differential Revision: https://reviews.llvm.org/D38651 llvm-svn: 315207
This commit is contained in:
parent
2a61a821a0
commit
bd3a9dbabb
|
@ -3,6 +3,14 @@ He11o LLVM
|
|||
|
||||
"RC string test.",L"Another RC string test.'&{",42,100
|
||||
|
||||
Block Comment Ident /*block /* // comment */ ifier
|
||||
|
||||
Line Comment // Identifier /*
|
||||
|
||||
/* Multi line
|
||||
block
|
||||
comment */
|
||||
|
||||
Multiple /* comments */ on /* a */ single // line
|
||||
|
||||
":))"
|
||||
|
|
|
@ -34,4 +34,13 @@
|
|||
; CHECK-NEXT: Int: 42; int value = 42
|
||||
; CHECK-NEXT: Comma: ,
|
||||
; CHECK-NEXT: Int: 100; int value = 100
|
||||
; CHECK-NEXT: Identifier: Block
|
||||
; CHECK-NEXT: Identifier: Comment
|
||||
; CHECK-NEXT: Identifier: Ident
|
||||
; CHECK-NEXT: Identifier: ifier
|
||||
; CHECK-NEXT: Identifier: Line
|
||||
; CHECK-NEXT: Identifier: Comment
|
||||
; CHECK-NEXT: Identifier: Multiple
|
||||
; CHECK-NEXT: Identifier: on
|
||||
; CHECK-NEXT: Identifier: single
|
||||
; CHECK-NEXT: String: ":))"
|
||||
|
|
|
@ -121,6 +121,17 @@ private:
|
|||
|
||||
bool canStartString() const;
|
||||
|
||||
// Check if tokenizer can start reading a single line comment (e.g. a comment
|
||||
// that begins with '//')
|
||||
bool canStartLineComment() const;
|
||||
|
||||
// Check if tokenizer can start or finish reading a block comment (e.g. a
|
||||
// comment that begins with '/*' and ends with '*/')
|
||||
bool canStartBlockComment() const;
|
||||
|
||||
// Throw away all remaining characters on the current line.
|
||||
void skipCurrentLine();
|
||||
|
||||
bool streamEof() const;
|
||||
|
||||
// Classify the token that is about to be read from the current position.
|
||||
|
@ -134,6 +145,14 @@ private:
|
|||
size_t DataLength, Pos;
|
||||
};
|
||||
|
||||
void Tokenizer::skipCurrentLine() {
|
||||
Pos = Data.find_first_of("\r\n", Pos);
|
||||
Pos = Data.find_first_not_of("\r\n", Pos);
|
||||
|
||||
if (Pos == StringRef::npos)
|
||||
Pos = DataLength;
|
||||
}
|
||||
|
||||
Expected<std::vector<RCToken>> Tokenizer::run() {
|
||||
Pos = 0;
|
||||
std::vector<RCToken> Result;
|
||||
|
@ -154,6 +173,10 @@ Expected<std::vector<RCToken>> Tokenizer::run() {
|
|||
if (Error TokenError = consumeToken(TokenKind))
|
||||
return std::move(TokenError);
|
||||
|
||||
// Comments are just deleted, don't bother saving them.
|
||||
if (TokenKind == Kind::LineComment || TokenKind == Kind::StartComment)
|
||||
continue;
|
||||
|
||||
RCToken Token(TokenKind, Data.take_front(Pos).drop_front(TokenStart));
|
||||
if (TokenKind == Kind::Identifier) {
|
||||
processIdentifier(Token);
|
||||
|
@ -195,6 +218,21 @@ Error Tokenizer::consumeToken(const Kind TokenKind) {
|
|||
advance();
|
||||
return Error::success();
|
||||
|
||||
case Kind::LineComment:
|
||||
advance(2);
|
||||
skipCurrentLine();
|
||||
return Error::success();
|
||||
|
||||
case Kind::StartComment: {
|
||||
advance(2);
|
||||
auto EndPos = Data.find("*/", Pos);
|
||||
if (EndPos == StringRef::npos)
|
||||
return getStringError(
|
||||
"Unclosed multi-line comment beginning at position " + Twine(Pos));
|
||||
advance(EndPos - Pos);
|
||||
advance(2);
|
||||
return Error::success();
|
||||
}
|
||||
case Kind::Identifier:
|
||||
while (!streamEof() && canContinueIdentifier())
|
||||
advance();
|
||||
|
@ -259,6 +297,16 @@ bool Tokenizer::canStartInt() const {
|
|||
return std::isdigit(Data[Pos]);
|
||||
}
|
||||
|
||||
bool Tokenizer::canStartBlockComment() const {
|
||||
assert(!streamEof());
|
||||
return Data.drop_front(Pos).startswith("/*");
|
||||
}
|
||||
|
||||
bool Tokenizer::canStartLineComment() const {
|
||||
assert(!streamEof());
|
||||
return Data.drop_front(Pos).startswith("//");
|
||||
}
|
||||
|
||||
bool Tokenizer::canContinueInt() const {
|
||||
assert(!streamEof());
|
||||
return std::isalnum(Data[Pos]);
|
||||
|
@ -271,6 +319,11 @@ bool Tokenizer::canStartString() const {
|
|||
bool Tokenizer::streamEof() const { return Pos == DataLength; }
|
||||
|
||||
Kind Tokenizer::classifyCurrentToken() const {
|
||||
if (canStartBlockComment())
|
||||
return Kind::StartComment;
|
||||
if (canStartLineComment())
|
||||
return Kind::LineComment;
|
||||
|
||||
if (canStartInt())
|
||||
return Kind::Int;
|
||||
if (canStartString())
|
||||
|
|
|
@ -18,6 +18,8 @@ TOKEN(Invalid) // Invalid token. Should not occur in a valid script.
|
|||
TOKEN(Int) // Integer (decimal, octal or hexadecimal).
|
||||
TOKEN(String) // String value.
|
||||
TOKEN(Identifier) // Script identifier (resource name or type).
|
||||
TOKEN(LineComment) // Beginning of single-line comment.
|
||||
TOKEN(StartComment) // Beginning of multi-line comment.
|
||||
|
||||
// Short tokens. They usually consist of exactly one character.
|
||||
// The definitions are of the form SHORT_TOKEN(TokenName, TokenChar).
|
||||
|
|
Loading…
Reference in New Issue