teach clang to recover gracefully from conflict markers left in source

files: PR5238. llvm-svn: 91270
2009-12-14 06:16:57 +00:00 · 2009-12-14 06:16:57 +00:00 · 7c027ee4c2
parent 67dfd4236a
commit 7c027ee4c2
4 changed files with 170 additions and 15 deletions
--- a/clang/include/clang/Basic/DiagnosticLexKinds.td
+++ b/clang/include/clang/Basic/DiagnosticLexKinds.td
@ -51,6 +51,9 @@ def err_empty_character : Error<"empty character constant">;
 def err_unterminated_block_comment : Error<"unterminated /* comment">;
 def err_invalid_character_to_charify : Error<
  "invalid argument to convert to character">;
+
+def err_conflict_marker : Error<"version control conflict marker in file">;
+
 def ext_multichar_character_literal : ExtWarn<
  "multi-character character constant">, InGroup<MultiChar>;
 def ext_four_char_character_literal : Extension<
--- a/clang/include/clang/Lex/Lexer.h
+++ b/clang/include/clang/Lex/Lexer.h
@ -38,7 +38,8 @@ class Lexer : public PreprocessorLexer {
  const char *BufferEnd;         // End of the buffer.
  SourceLocation FileLoc;        // Location for start of file.
  LangOptions Features;          // Features enabled by this language (cache).
-  bool Is_PragmaLexer;           // True if lexer for _Pragma handling.
+  bool Is_PragmaLexer : 1;       // True if lexer for _Pragma handling.
+  bool IsInConflictMarker : 1;   // True if in a VCS conflict marker '<<<<<<<'
  
  //===--------------------------------------------------------------------===//
  // Context-specific lexing flags set by the preprocessor.
@ -369,6 +370,9 @@ private:
  bool SkipBCPLComment       (Token &Result, const char *CurPtr);
  bool SkipBlockComment      (Token &Result, const char *CurPtr);
  bool SaveBCPLComment       (Token &Result, const char *CurPtr);
+  
+  bool IsStartOfConflictMarker(const char *CurPtr);
+  bool HandleEndOfConflictMarker(const char *CurPtr);
 };


--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@ -70,6 +70,7 @@ void Lexer::InitLexer(const char *BufStart, const char *BufPtr,
         " to simplify lexing!");

  Is_PragmaLexer = false;
+  IsInConflictMarker = false;
  
  // Start of the file is a start of line.
  IsAtStartOfLine = true;
@ -1390,6 +1391,105 @@ unsigned Lexer::isNextPPTokenLParen() {
  return Tok.is(tok::l_paren);
 }

+/// FindConflictEnd - Find the end of a version control conflict marker.
+static const char *FindConflictEnd(const char *CurPtr, const char *BufferEnd) {
+  llvm::StringRef RestOfBuffer(CurPtr+7, BufferEnd-CurPtr-7);
+  size_t Pos = RestOfBuffer.find(">>>>>>>");
+  while (Pos != llvm::StringRef::npos) {
+    // Must occur at start of line.
+    if (RestOfBuffer[Pos-1] != '\r' &&
+        RestOfBuffer[Pos-1] != '\n') {
+      RestOfBuffer = RestOfBuffer.substr(Pos+7);
+      continue;
+    }
+    return RestOfBuffer.data()+Pos;
+  }
+  return 0;
+}
+
+/// IsStartOfConflictMarker - If the specified pointer is the start of a version
+/// control conflict marker like '<<<<<<<', recognize it as such, emit an error
+/// and recover nicely.  This returns true if it is a conflict marker and false
+/// if not.
+bool Lexer::IsStartOfConflictMarker(const char *CurPtr) {
+  // Only a conflict marker if it starts at the beginning of a line.
+  if (CurPtr != BufferStart &&
+      CurPtr[-1] != '\n' && CurPtr[-1] != '\r')
+    return false;
+  
+  // Check to see if we have <<<<<<<.
+  if (BufferEnd-CurPtr < 8 ||
+      llvm::StringRef(CurPtr, 7) != "<<<<<<<")
+    return false;
+
+  // If we have a situation where we don't care about conflict markers, ignore
+  // it.
+  if (IsInConflictMarker || isLexingRawMode())
+    return false;
+  
+  // Check to see if there is a >>>>>>> somewhere in the buffer at the start of
+  // a line to terminate this conflict marker.
+  if (FindConflictEnd(CurPtr+7, BufferEnd)) {
+    // We found a match.  We are really in a conflict marker.
+    // Diagnose this, and ignore to the end of line.
+    Diag(CurPtr, diag::err_conflict_marker);
+    IsInConflictMarker = true;
+    
+    // Skip ahead to the end of line.  We know this exists because the
+    // end-of-conflict marker starts with \r or \n.
+    while (*CurPtr != '\r' && *CurPtr != '\n') {
+      assert(CurPtr != BufferEnd && "Didn't find end of line");
+      ++CurPtr;
+    }
+    BufferPtr = CurPtr;
+    return true;
+  }
+  
+  // No end of conflict marker found.
+  return false;
+}
+
+
+/// HandleEndOfConflictMarker - If this is a '=======' or '|||||||' or '>>>>>>>'
+/// marker, then it is the end of a conflict marker.  Handle it by ignoring up
+/// until the end of the line.  This returns true if it is a conflict marker and
+/// false if not.
+bool Lexer::HandleEndOfConflictMarker(const char *CurPtr) {
+  // Only a conflict marker if it starts at the beginning of a line.
+  if (CurPtr != BufferStart &&
+      CurPtr[-1] != '\n' && CurPtr[-1] != '\r')
+    return false;
+  
+  // If we have a situation where we don't care about conflict markers, ignore
+  // it.
+  if (!IsInConflictMarker || isLexingRawMode())
+    return false;
+  
+  // Check to see if we have the marker (7 characters in a row).
+  for (unsigned i = 1; i != 7; ++i)
+    if (CurPtr[i] != CurPtr[0])
+      return false;
+  
+  // If we do have it, search for the end of the conflict marker.  This could
+  // fail if it got skipped with a '#if 0' or something.  Note that CurPtr might
+  // be the end of conflict marker.
+  if (const char *End = FindConflictEnd(CurPtr, BufferEnd)) {
+    CurPtr = End;
+    
+    // Skip ahead to the end of line.
+    while (CurPtr != BufferEnd && *CurPtr != '\r' && *CurPtr != '\n')
+      ++CurPtr;
+    
+    BufferPtr = CurPtr;
+    
+    // No longer in the conflict marker.
+    IsInConflictMarker = false;
+    return true;
+  }
+  
+  return false;
+}
+

 /// LexTokenInternal - This implements a simple C family lexer.  It is an
 /// extremely performance critical piece of code.  This assumes that the buffer
@ -1756,14 +1856,20 @@ LexNextToken:
    Char = getCharAndSize(CurPtr, SizeTmp);
    if (ParsingFilename) {
      return LexAngledStringLiteral(Result, CurPtr);
-    } else if (Char == '<' &&
-               getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == '=') {
-      Kind = tok::lesslessequal;
-      CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
-                           SizeTmp2, Result);
    } else if (Char == '<') {
-      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
-      Kind = tok::lessless;
+      char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
+      if (After == '=') {
+        Kind = tok::lesslessequal;
+        CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
+                             SizeTmp2, Result);
+      } else if (After == '<' && IsStartOfConflictMarker(CurPtr-1)) {
+        // If this is actually a '<<<<<<<' version control conflict marker,
+        // recognize it as such and recover nicely.
+        goto LexNextToken;
+      } else {
+        CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+        Kind = tok::lessless;
+      }
    } else if (Char == '=') {
      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
      Kind = tok::lessequal;
@ -1782,14 +1888,20 @@ LexNextToken:
    if (Char == '=') {
      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
      Kind = tok::greaterequal;
-    } else if (Char == '>' &&
-               getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == '=') {
-      CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
-                           SizeTmp2, Result);
-      Kind = tok::greatergreaterequal;
    } else if (Char == '>') {
-      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
-      Kind = tok::greatergreater;
+      char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
+      if (After == '=') {
+        CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
+                             SizeTmp2, Result);
+        Kind = tok::greatergreaterequal;
+      } else if (After == '>' && HandleEndOfConflictMarker(CurPtr-1)) {
+        // If this is '>>>>>>>' and we're in a conflict marker, ignore it.
+        goto LexNextToken;
+      } else {
+        CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+        Kind = tok::greatergreater;
+      }
+      
    } else {
      Kind = tok::greater;
    }
@ -1809,6 +1921,9 @@ LexNextToken:
      Kind = tok::pipeequal;
      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
    } else if (Char == '|') {
+      // If this is '|||||||' and we're in a conflict marker, ignore it.
+      if (CurPtr[1] == '|' && HandleEndOfConflictMarker(CurPtr-1))
+        goto LexNextToken;
      Kind = tok::pipepipe;
      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
    } else {
@ -1833,6 +1948,10 @@ LexNextToken:
  case '=':
    Char = getCharAndSize(CurPtr, SizeTmp);
    if (Char == '=') {
+      // If this is '=======' and we're in a conflict marker, ignore it.
+      if (CurPtr[1] == '=' && HandleEndOfConflictMarker(CurPtr-1))
+        goto LexNextToken;
+      
      Kind = tok::equalequal;
      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
    } else {
--- a/clang/test/Lexer/conflict-marker.c
+++ b/clang/test/Lexer/conflict-marker.c
@ -0,0 +1,29 @@
+// RUN: clang-cc %s -verify -fsyntax-only
+
+// Test that we recover gracefully from conflict markers left in input files.
+// PR5238
+
+// diff3 style
+<<<<<<< .mine             // expected-error {{version control conflict marker in file}}
+int x = 4;
+|||||||
+int x = 123;
+=======
+float x = 17;
+>>>>>>> .r91107
+
+// normal style.
+<<<<<<< .mine             // expected-error {{version control conflict marker in file}}
+typedef int y;
+=======
+typedef struct foo *y;
+>>>>>>> .r91107
+
+;
+y b;
+
+int foo() {
+  y a = x;
+  return x + a;
+}
+