forked from OSchip/llvm-project
teach clang to recover gracefully from conflict markers left in source
files: PR5238. llvm-svn: 91270
This commit is contained in:
parent
67dfd4236a
commit
7c027ee4c2
|
@ -51,6 +51,9 @@ def err_empty_character : Error<"empty character constant">;
|
|||
def err_unterminated_block_comment : Error<"unterminated /* comment">;
|
||||
def err_invalid_character_to_charify : Error<
|
||||
"invalid argument to convert to character">;
|
||||
|
||||
def err_conflict_marker : Error<"version control conflict marker in file">;
|
||||
|
||||
def ext_multichar_character_literal : ExtWarn<
|
||||
"multi-character character constant">, InGroup<MultiChar>;
|
||||
def ext_four_char_character_literal : Extension<
|
||||
|
|
|
@ -38,7 +38,8 @@ class Lexer : public PreprocessorLexer {
|
|||
const char *BufferEnd; // End of the buffer.
|
||||
SourceLocation FileLoc; // Location for start of file.
|
||||
LangOptions Features; // Features enabled by this language (cache).
|
||||
bool Is_PragmaLexer; // True if lexer for _Pragma handling.
|
||||
bool Is_PragmaLexer : 1; // True if lexer for _Pragma handling.
|
||||
bool IsInConflictMarker : 1; // True if in a VCS conflict marker '<<<<<<<'
|
||||
|
||||
//===--------------------------------------------------------------------===//
|
||||
// Context-specific lexing flags set by the preprocessor.
|
||||
|
@ -369,6 +370,9 @@ private:
|
|||
bool SkipBCPLComment (Token &Result, const char *CurPtr);
|
||||
bool SkipBlockComment (Token &Result, const char *CurPtr);
|
||||
bool SaveBCPLComment (Token &Result, const char *CurPtr);
|
||||
|
||||
bool IsStartOfConflictMarker(const char *CurPtr);
|
||||
bool HandleEndOfConflictMarker(const char *CurPtr);
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -70,6 +70,7 @@ void Lexer::InitLexer(const char *BufStart, const char *BufPtr,
|
|||
" to simplify lexing!");
|
||||
|
||||
Is_PragmaLexer = false;
|
||||
IsInConflictMarker = false;
|
||||
|
||||
// Start of the file is a start of line.
|
||||
IsAtStartOfLine = true;
|
||||
|
@ -1390,6 +1391,105 @@ unsigned Lexer::isNextPPTokenLParen() {
|
|||
return Tok.is(tok::l_paren);
|
||||
}
|
||||
|
||||
/// FindConflictEnd - Find the end of a version control conflict marker.
|
||||
static const char *FindConflictEnd(const char *CurPtr, const char *BufferEnd) {
|
||||
llvm::StringRef RestOfBuffer(CurPtr+7, BufferEnd-CurPtr-7);
|
||||
size_t Pos = RestOfBuffer.find(">>>>>>>");
|
||||
while (Pos != llvm::StringRef::npos) {
|
||||
// Must occur at start of line.
|
||||
if (RestOfBuffer[Pos-1] != '\r' &&
|
||||
RestOfBuffer[Pos-1] != '\n') {
|
||||
RestOfBuffer = RestOfBuffer.substr(Pos+7);
|
||||
continue;
|
||||
}
|
||||
return RestOfBuffer.data()+Pos;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/// IsStartOfConflictMarker - If the specified pointer is the start of a version
|
||||
/// control conflict marker like '<<<<<<<', recognize it as such, emit an error
|
||||
/// and recover nicely. This returns true if it is a conflict marker and false
|
||||
/// if not.
|
||||
bool Lexer::IsStartOfConflictMarker(const char *CurPtr) {
|
||||
// Only a conflict marker if it starts at the beginning of a line.
|
||||
if (CurPtr != BufferStart &&
|
||||
CurPtr[-1] != '\n' && CurPtr[-1] != '\r')
|
||||
return false;
|
||||
|
||||
// Check to see if we have <<<<<<<.
|
||||
if (BufferEnd-CurPtr < 8 ||
|
||||
llvm::StringRef(CurPtr, 7) != "<<<<<<<")
|
||||
return false;
|
||||
|
||||
// If we have a situation where we don't care about conflict markers, ignore
|
||||
// it.
|
||||
if (IsInConflictMarker || isLexingRawMode())
|
||||
return false;
|
||||
|
||||
// Check to see if there is a >>>>>>> somewhere in the buffer at the start of
|
||||
// a line to terminate this conflict marker.
|
||||
if (FindConflictEnd(CurPtr+7, BufferEnd)) {
|
||||
// We found a match. We are really in a conflict marker.
|
||||
// Diagnose this, and ignore to the end of line.
|
||||
Diag(CurPtr, diag::err_conflict_marker);
|
||||
IsInConflictMarker = true;
|
||||
|
||||
// Skip ahead to the end of line. We know this exists because the
|
||||
// end-of-conflict marker starts with \r or \n.
|
||||
while (*CurPtr != '\r' && *CurPtr != '\n') {
|
||||
assert(CurPtr != BufferEnd && "Didn't find end of line");
|
||||
++CurPtr;
|
||||
}
|
||||
BufferPtr = CurPtr;
|
||||
return true;
|
||||
}
|
||||
|
||||
// No end of conflict marker found.
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/// HandleEndOfConflictMarker - If this is a '=======' or '|||||||' or '>>>>>>>'
|
||||
/// marker, then it is the end of a conflict marker. Handle it by ignoring up
|
||||
/// until the end of the line. This returns true if it is a conflict marker and
|
||||
/// false if not.
|
||||
bool Lexer::HandleEndOfConflictMarker(const char *CurPtr) {
|
||||
// Only a conflict marker if it starts at the beginning of a line.
|
||||
if (CurPtr != BufferStart &&
|
||||
CurPtr[-1] != '\n' && CurPtr[-1] != '\r')
|
||||
return false;
|
||||
|
||||
// If we have a situation where we don't care about conflict markers, ignore
|
||||
// it.
|
||||
if (!IsInConflictMarker || isLexingRawMode())
|
||||
return false;
|
||||
|
||||
// Check to see if we have the marker (7 characters in a row).
|
||||
for (unsigned i = 1; i != 7; ++i)
|
||||
if (CurPtr[i] != CurPtr[0])
|
||||
return false;
|
||||
|
||||
// If we do have it, search for the end of the conflict marker. This could
|
||||
// fail if it got skipped with a '#if 0' or something. Note that CurPtr might
|
||||
// be the end of conflict marker.
|
||||
if (const char *End = FindConflictEnd(CurPtr, BufferEnd)) {
|
||||
CurPtr = End;
|
||||
|
||||
// Skip ahead to the end of line.
|
||||
while (CurPtr != BufferEnd && *CurPtr != '\r' && *CurPtr != '\n')
|
||||
++CurPtr;
|
||||
|
||||
BufferPtr = CurPtr;
|
||||
|
||||
// No longer in the conflict marker.
|
||||
IsInConflictMarker = false;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/// LexTokenInternal - This implements a simple C family lexer. It is an
|
||||
/// extremely performance critical piece of code. This assumes that the buffer
|
||||
|
@ -1756,14 +1856,20 @@ LexNextToken:
|
|||
Char = getCharAndSize(CurPtr, SizeTmp);
|
||||
if (ParsingFilename) {
|
||||
return LexAngledStringLiteral(Result, CurPtr);
|
||||
} else if (Char == '<' &&
|
||||
getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == '=') {
|
||||
Kind = tok::lesslessequal;
|
||||
CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
|
||||
SizeTmp2, Result);
|
||||
} else if (Char == '<') {
|
||||
CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
|
||||
Kind = tok::lessless;
|
||||
char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
|
||||
if (After == '=') {
|
||||
Kind = tok::lesslessequal;
|
||||
CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
|
||||
SizeTmp2, Result);
|
||||
} else if (After == '<' && IsStartOfConflictMarker(CurPtr-1)) {
|
||||
// If this is actually a '<<<<<<<' version control conflict marker,
|
||||
// recognize it as such and recover nicely.
|
||||
goto LexNextToken;
|
||||
} else {
|
||||
CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
|
||||
Kind = tok::lessless;
|
||||
}
|
||||
} else if (Char == '=') {
|
||||
CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
|
||||
Kind = tok::lessequal;
|
||||
|
@ -1782,14 +1888,20 @@ LexNextToken:
|
|||
if (Char == '=') {
|
||||
CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
|
||||
Kind = tok::greaterequal;
|
||||
} else if (Char == '>' &&
|
||||
getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == '=') {
|
||||
CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
|
||||
SizeTmp2, Result);
|
||||
Kind = tok::greatergreaterequal;
|
||||
} else if (Char == '>') {
|
||||
CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
|
||||
Kind = tok::greatergreater;
|
||||
char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
|
||||
if (After == '=') {
|
||||
CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
|
||||
SizeTmp2, Result);
|
||||
Kind = tok::greatergreaterequal;
|
||||
} else if (After == '>' && HandleEndOfConflictMarker(CurPtr-1)) {
|
||||
// If this is '>>>>>>>' and we're in a conflict marker, ignore it.
|
||||
goto LexNextToken;
|
||||
} else {
|
||||
CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
|
||||
Kind = tok::greatergreater;
|
||||
}
|
||||
|
||||
} else {
|
||||
Kind = tok::greater;
|
||||
}
|
||||
|
@ -1809,6 +1921,9 @@ LexNextToken:
|
|||
Kind = tok::pipeequal;
|
||||
CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
|
||||
} else if (Char == '|') {
|
||||
// If this is '|||||||' and we're in a conflict marker, ignore it.
|
||||
if (CurPtr[1] == '|' && HandleEndOfConflictMarker(CurPtr-1))
|
||||
goto LexNextToken;
|
||||
Kind = tok::pipepipe;
|
||||
CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
|
||||
} else {
|
||||
|
@ -1833,6 +1948,10 @@ LexNextToken:
|
|||
case '=':
|
||||
Char = getCharAndSize(CurPtr, SizeTmp);
|
||||
if (Char == '=') {
|
||||
// If this is '=======' and we're in a conflict marker, ignore it.
|
||||
if (CurPtr[1] == '=' && HandleEndOfConflictMarker(CurPtr-1))
|
||||
goto LexNextToken;
|
||||
|
||||
Kind = tok::equalequal;
|
||||
CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
|
||||
} else {
|
||||
|
|
|
@ -0,0 +1,29 @@
|
|||
// RUN: clang-cc %s -verify -fsyntax-only
|
||||
|
||||
// Test that we recover gracefully from conflict markers left in input files.
|
||||
// PR5238
|
||||
|
||||
// diff3 style
|
||||
<<<<<<< .mine // expected-error {{version control conflict marker in file}}
|
||||
int x = 4;
|
||||
|||||||
|
||||
int x = 123;
|
||||
=======
|
||||
float x = 17;
|
||||
>>>>>>> .r91107
|
||||
|
||||
// normal style.
|
||||
<<<<<<< .mine // expected-error {{version control conflict marker in file}}
|
||||
typedef int y;
|
||||
=======
|
||||
typedef struct foo *y;
|
||||
>>>>>>> .r91107
|
||||
|
||||
;
|
||||
y b;
|
||||
|
||||
int foo() {
|
||||
y a = x;
|
||||
return x + a;
|
||||
}
|
||||
|
Loading…
Reference in New Issue