diff --git a/clang/include/clang/Basic/DiagnosticLexKinds.td b/clang/include/clang/Basic/DiagnosticLexKinds.td index 9dca2fd5e1b5..d38f0f7ac82e 100644 --- a/clang/include/clang/Basic/DiagnosticLexKinds.td +++ b/clang/include/clang/Basic/DiagnosticLexKinds.td @@ -51,6 +51,9 @@ def err_empty_character : Error<"empty character constant">; def err_unterminated_block_comment : Error<"unterminated /* comment">; def err_invalid_character_to_charify : Error< "invalid argument to convert to character">; + +def err_conflict_marker : Error<"version control conflict marker in file">; + def ext_multichar_character_literal : ExtWarn< "multi-character character constant">, InGroup<MultiChar>; def ext_four_char_character_literal : Extension< diff --git a/clang/include/clang/Lex/Lexer.h b/clang/include/clang/Lex/Lexer.h index eac197afd653..fc65b1fc5449 100644 --- a/clang/include/clang/Lex/Lexer.h +++ b/clang/include/clang/Lex/Lexer.h @@ -38,7 +38,8 @@ class Lexer : public PreprocessorLexer { const char *BufferEnd; // End of the buffer. SourceLocation FileLoc; // Location for start of file. LangOptions Features; // Features enabled by this language (cache). - bool Is_PragmaLexer; // True if lexer for _Pragma handling. + bool Is_PragmaLexer : 1; // True if lexer for _Pragma handling. + bool IsInConflictMarker : 1; // True if in a VCS conflict marker '<<<<<<<' //===--------------------------------------------------------------------===// // Context-specific lexing flags set by the preprocessor. @@ -369,6 +370,9 @@ private: bool SkipBCPLComment (Token &Result, const char *CurPtr); bool SkipBlockComment (Token &Result, const char *CurPtr); bool SaveBCPLComment (Token &Result, const char *CurPtr); + + bool IsStartOfConflictMarker(const char *CurPtr); + bool HandleEndOfConflictMarker(const char *CurPtr); }; diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index dc8609d4efc6..a91e40435cb0 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -70,6 +70,7 @@ void Lexer::InitLexer(const char *BufStart, const char *BufPtr, " to simplify lexing!"); Is_PragmaLexer = false; + IsInConflictMarker = false; // Start of the file is a start of line. IsAtStartOfLine = true; @@ -1390,6 +1391,105 @@ unsigned Lexer::isNextPPTokenLParen() { return Tok.is(tok::l_paren); } +/// FindConflictEnd - Find the end of a version control conflict marker. +static const char *FindConflictEnd(const char *CurPtr, const char *BufferEnd) { + llvm::StringRef RestOfBuffer(CurPtr+7, BufferEnd-CurPtr-7); + size_t Pos = RestOfBuffer.find(">>>>>>>"); + while (Pos != llvm::StringRef::npos) { + // Must occur at start of line. + if (RestOfBuffer[Pos-1] != '\r' && + RestOfBuffer[Pos-1] != '\n') { + RestOfBuffer = RestOfBuffer.substr(Pos+7); + continue; + } + return RestOfBuffer.data()+Pos; + } + return 0; +} + +/// IsStartOfConflictMarker - If the specified pointer is the start of a version +/// control conflict marker like '<<<<<<<', recognize it as such, emit an error +/// and recover nicely. This returns true if it is a conflict marker and false +/// if not. +bool Lexer::IsStartOfConflictMarker(const char *CurPtr) { + // Only a conflict marker if it starts at the beginning of a line. + if (CurPtr != BufferStart && + CurPtr[-1] != '\n' && CurPtr[-1] != '\r') + return false; + + // Check to see if we have <<<<<<<. + if (BufferEnd-CurPtr < 8 || + llvm::StringRef(CurPtr, 7) != "<<<<<<<") + return false; + + // If we have a situation where we don't care about conflict markers, ignore + // it. + if (IsInConflictMarker || isLexingRawMode()) + return false; + + // Check to see if there is a >>>>>>> somewhere in the buffer at the start of + // a line to terminate this conflict marker. + if (FindConflictEnd(CurPtr+7, BufferEnd)) { + // We found a match. We are really in a conflict marker. + // Diagnose this, and ignore to the end of line. + Diag(CurPtr, diag::err_conflict_marker); + IsInConflictMarker = true; + + // Skip ahead to the end of line. We know this exists because the + // end-of-conflict marker starts with \r or \n. + while (*CurPtr != '\r' && *CurPtr != '\n') { + assert(CurPtr != BufferEnd && "Didn't find end of line"); + ++CurPtr; + } + BufferPtr = CurPtr; + return true; + } + + // No end of conflict marker found. + return false; +} + + +/// HandleEndOfConflictMarker - If this is a '=======' or '|||||||' or '>>>>>>>' +/// marker, then it is the end of a conflict marker. Handle it by ignoring up +/// until the end of the line. This returns true if it is a conflict marker and +/// false if not. +bool Lexer::HandleEndOfConflictMarker(const char *CurPtr) { + // Only a conflict marker if it starts at the beginning of a line. + if (CurPtr != BufferStart && + CurPtr[-1] != '\n' && CurPtr[-1] != '\r') + return false; + + // If we have a situation where we don't care about conflict markers, ignore + // it. + if (!IsInConflictMarker || isLexingRawMode()) + return false; + + // Check to see if we have the marker (7 characters in a row). + for (unsigned i = 1; i != 7; ++i) + if (CurPtr[i] != CurPtr[0]) + return false; + + // If we do have it, search for the end of the conflict marker. This could + // fail if it got skipped with a '#if 0' or something. Note that CurPtr might + // be the end of conflict marker. + if (const char *End = FindConflictEnd(CurPtr, BufferEnd)) { + CurPtr = End; + + // Skip ahead to the end of line. + while (CurPtr != BufferEnd && *CurPtr != '\r' && *CurPtr != '\n') + ++CurPtr; + + BufferPtr = CurPtr; + + // No longer in the conflict marker. + IsInConflictMarker = false; + return true; + } + + return false; +} + /// LexTokenInternal - This implements a simple C family lexer. It is an /// extremely performance critical piece of code. This assumes that the buffer @@ -1756,14 +1856,20 @@ LexNextToken: Char = getCharAndSize(CurPtr, SizeTmp); if (ParsingFilename) { return LexAngledStringLiteral(Result, CurPtr); - } else if (Char == '<' && - getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == '=') { - Kind = tok::lesslessequal; - CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), - SizeTmp2, Result); } else if (Char == '<') { - CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); - Kind = tok::lessless; + char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2); + if (After == '=') { + Kind = tok::lesslessequal; + CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), + SizeTmp2, Result); + } else if (After == '<' && IsStartOfConflictMarker(CurPtr-1)) { + // If this is actually a '<<<<<<<' version control conflict marker, + // recognize it as such and recover nicely. + goto LexNextToken; + } else { + CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); + Kind = tok::lessless; + } } else if (Char == '=') { CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); Kind = tok::lessequal; @@ -1782,14 +1888,20 @@ LexNextToken: if (Char == '=') { CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); Kind = tok::greaterequal; - } else if (Char == '>' && - getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == '=') { - CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), - SizeTmp2, Result); - Kind = tok::greatergreaterequal; } else if (Char == '>') { - CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); - Kind = tok::greatergreater; + char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2); + if (After == '=') { + CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), + SizeTmp2, Result); + Kind = tok::greatergreaterequal; + } else if (After == '>' && HandleEndOfConflictMarker(CurPtr-1)) { + // If this is '>>>>>>>' and we're in a conflict marker, ignore it. + goto LexNextToken; + } else { + CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); + Kind = tok::greatergreater; + } + } else { Kind = tok::greater; } @@ -1809,6 +1921,9 @@ LexNextToken: Kind = tok::pipeequal; CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); } else if (Char == '|') { + // If this is '|||||||' and we're in a conflict marker, ignore it. + if (CurPtr[1] == '|' && HandleEndOfConflictMarker(CurPtr-1)) + goto LexNextToken; Kind = tok::pipepipe; CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); } else { @@ -1833,6 +1948,10 @@ LexNextToken: case '=': Char = getCharAndSize(CurPtr, SizeTmp); if (Char == '=') { + // If this is '=======' and we're in a conflict marker, ignore it. + if (CurPtr[1] == '=' && HandleEndOfConflictMarker(CurPtr-1)) + goto LexNextToken; + Kind = tok::equalequal; CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); } else { diff --git a/clang/test/Lexer/conflict-marker.c b/clang/test/Lexer/conflict-marker.c new file mode 100644 index 000000000000..be2672b033b0 --- /dev/null +++ b/clang/test/Lexer/conflict-marker.c @@ -0,0 +1,29 @@ +// RUN: clang-cc %s -verify -fsyntax-only + +// Test that we recover gracefully from conflict markers left in input files. +// PR5238 + +// diff3 style +<<<<<<< .mine // expected-error {{version control conflict marker in file}} +int x = 4; +||||||| +int x = 123; +======= +float x = 17; +>>>>>>> .r91107 + +// normal style. +<<<<<<< .mine // expected-error {{version control conflict marker in file}} +typedef int y; +======= +typedef struct foo *y; +>>>>>>> .r91107 + +; +y b; + +int foo() { + y a = x; + return x + a; +} +