teach clang to recover gracefully from conflict markers left in source

files: PR5238.

llvm-svn: 91270
This commit is contained in:
Chris Lattner 2009-12-14 06:16:57 +00:00
parent 67dfd4236a
commit 7c027ee4c2
4 changed files with 170 additions and 15 deletions

View File

@ -51,6 +51,9 @@ def err_empty_character : Error<"empty character constant">;
def err_unterminated_block_comment : Error<"unterminated /* comment">;
def err_invalid_character_to_charify : Error<
"invalid argument to convert to character">;
def err_conflict_marker : Error<"version control conflict marker in file">;
def ext_multichar_character_literal : ExtWarn<
"multi-character character constant">, InGroup<MultiChar>;
def ext_four_char_character_literal : Extension<

View File

@ -38,7 +38,8 @@ class Lexer : public PreprocessorLexer {
const char *BufferEnd; // End of the buffer.
SourceLocation FileLoc; // Location for start of file.
LangOptions Features; // Features enabled by this language (cache).
bool Is_PragmaLexer; // True if lexer for _Pragma handling.
bool Is_PragmaLexer : 1; // True if lexer for _Pragma handling.
bool IsInConflictMarker : 1; // True if in a VCS conflict marker '<<<<<<<'
//===--------------------------------------------------------------------===//
// Context-specific lexing flags set by the preprocessor.
@ -369,6 +370,9 @@ private:
bool SkipBCPLComment (Token &Result, const char *CurPtr);
bool SkipBlockComment (Token &Result, const char *CurPtr);
bool SaveBCPLComment (Token &Result, const char *CurPtr);
bool IsStartOfConflictMarker(const char *CurPtr);
bool HandleEndOfConflictMarker(const char *CurPtr);
};

View File

@ -70,6 +70,7 @@ void Lexer::InitLexer(const char *BufStart, const char *BufPtr,
" to simplify lexing!");
Is_PragmaLexer = false;
IsInConflictMarker = false;
// Start of the file is a start of line.
IsAtStartOfLine = true;
@ -1390,6 +1391,105 @@ unsigned Lexer::isNextPPTokenLParen() {
return Tok.is(tok::l_paren);
}
/// FindConflictEnd - Find the end of a version control conflict marker.
static const char *FindConflictEnd(const char *CurPtr, const char *BufferEnd) {
llvm::StringRef RestOfBuffer(CurPtr+7, BufferEnd-CurPtr-7);
size_t Pos = RestOfBuffer.find(">>>>>>>");
while (Pos != llvm::StringRef::npos) {
// Must occur at start of line.
if (RestOfBuffer[Pos-1] != '\r' &&
RestOfBuffer[Pos-1] != '\n') {
RestOfBuffer = RestOfBuffer.substr(Pos+7);
continue;
}
return RestOfBuffer.data()+Pos;
}
return 0;
}
/// IsStartOfConflictMarker - If the specified pointer is the start of a version
/// control conflict marker like '<<<<<<<', recognize it as such, emit an error
/// and recover nicely. This returns true if it is a conflict marker and false
/// if not.
bool Lexer::IsStartOfConflictMarker(const char *CurPtr) {
// Only a conflict marker if it starts at the beginning of a line.
if (CurPtr != BufferStart &&
CurPtr[-1] != '\n' && CurPtr[-1] != '\r')
return false;
// Check to see if we have <<<<<<<.
if (BufferEnd-CurPtr < 8 ||
llvm::StringRef(CurPtr, 7) != "<<<<<<<")
return false;
// If we have a situation where we don't care about conflict markers, ignore
// it.
if (IsInConflictMarker || isLexingRawMode())
return false;
// Check to see if there is a >>>>>>> somewhere in the buffer at the start of
// a line to terminate this conflict marker.
if (FindConflictEnd(CurPtr+7, BufferEnd)) {
// We found a match. We are really in a conflict marker.
// Diagnose this, and ignore to the end of line.
Diag(CurPtr, diag::err_conflict_marker);
IsInConflictMarker = true;
// Skip ahead to the end of line. We know this exists because the
// end-of-conflict marker starts with \r or \n.
while (*CurPtr != '\r' && *CurPtr != '\n') {
assert(CurPtr != BufferEnd && "Didn't find end of line");
++CurPtr;
}
BufferPtr = CurPtr;
return true;
}
// No end of conflict marker found.
return false;
}
/// HandleEndOfConflictMarker - If this is a '=======' or '|||||||' or '>>>>>>>'
/// marker, then it is the end of a conflict marker. Handle it by ignoring up
/// until the end of the line. This returns true if it is a conflict marker and
/// false if not.
bool Lexer::HandleEndOfConflictMarker(const char *CurPtr) {
// Only a conflict marker if it starts at the beginning of a line.
if (CurPtr != BufferStart &&
CurPtr[-1] != '\n' && CurPtr[-1] != '\r')
return false;
// If we have a situation where we don't care about conflict markers, ignore
// it.
if (!IsInConflictMarker || isLexingRawMode())
return false;
// Check to see if we have the marker (7 characters in a row).
for (unsigned i = 1; i != 7; ++i)
if (CurPtr[i] != CurPtr[0])
return false;
// If we do have it, search for the end of the conflict marker. This could
// fail if it got skipped with a '#if 0' or something. Note that CurPtr might
// be the end of conflict marker.
if (const char *End = FindConflictEnd(CurPtr, BufferEnd)) {
CurPtr = End;
// Skip ahead to the end of line.
while (CurPtr != BufferEnd && *CurPtr != '\r' && *CurPtr != '\n')
++CurPtr;
BufferPtr = CurPtr;
// No longer in the conflict marker.
IsInConflictMarker = false;
return true;
}
return false;
}
/// LexTokenInternal - This implements a simple C family lexer. It is an
/// extremely performance critical piece of code. This assumes that the buffer
@ -1756,14 +1856,20 @@ LexNextToken:
Char = getCharAndSize(CurPtr, SizeTmp);
if (ParsingFilename) {
return LexAngledStringLiteral(Result, CurPtr);
} else if (Char == '<' &&
getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == '=') {
Kind = tok::lesslessequal;
CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
SizeTmp2, Result);
} else if (Char == '<') {
CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
Kind = tok::lessless;
char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
if (After == '=') {
Kind = tok::lesslessequal;
CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
SizeTmp2, Result);
} else if (After == '<' && IsStartOfConflictMarker(CurPtr-1)) {
// If this is actually a '<<<<<<<' version control conflict marker,
// recognize it as such and recover nicely.
goto LexNextToken;
} else {
CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
Kind = tok::lessless;
}
} else if (Char == '=') {
CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
Kind = tok::lessequal;
@ -1782,14 +1888,20 @@ LexNextToken:
if (Char == '=') {
CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
Kind = tok::greaterequal;
} else if (Char == '>' &&
getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == '=') {
CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
SizeTmp2, Result);
Kind = tok::greatergreaterequal;
} else if (Char == '>') {
CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
Kind = tok::greatergreater;
char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
if (After == '=') {
CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
SizeTmp2, Result);
Kind = tok::greatergreaterequal;
} else if (After == '>' && HandleEndOfConflictMarker(CurPtr-1)) {
// If this is '>>>>>>>' and we're in a conflict marker, ignore it.
goto LexNextToken;
} else {
CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
Kind = tok::greatergreater;
}
} else {
Kind = tok::greater;
}
@ -1809,6 +1921,9 @@ LexNextToken:
Kind = tok::pipeequal;
CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
} else if (Char == '|') {
// If this is '|||||||' and we're in a conflict marker, ignore it.
if (CurPtr[1] == '|' && HandleEndOfConflictMarker(CurPtr-1))
goto LexNextToken;
Kind = tok::pipepipe;
CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
} else {
@ -1833,6 +1948,10 @@ LexNextToken:
case '=':
Char = getCharAndSize(CurPtr, SizeTmp);
if (Char == '=') {
// If this is '=======' and we're in a conflict marker, ignore it.
if (CurPtr[1] == '=' && HandleEndOfConflictMarker(CurPtr-1))
goto LexNextToken;
Kind = tok::equalequal;
CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
} else {

View File

@ -0,0 +1,29 @@
// RUN: clang-cc %s -verify -fsyntax-only
// Test that we recover gracefully from conflict markers left in input files.
// PR5238
// diff3 style
<<<<<<< .mine // expected-error {{version control conflict marker in file}}
int x = 4;
|||||||
int x = 123;
=======
float x = 17;
>>>>>>> .r91107
// normal style.
<<<<<<< .mine // expected-error {{version control conflict marker in file}}
typedef int y;
=======
typedef struct foo *y;
>>>>>>> .r91107
;
y b;
int foo() {
y a = x;
return x + a;
}