forked from OSchip/llvm-project
[PCH] Fixed preamble breaking with BOM presence (and particularly, fluctuating BOM presence)
This patch fixes broken preamble-skipping when the preamble region includes a byte order mark (BOM). Previously, parsing would fail if preamble PCH generation was enabled and a BOM was present. This also fixes preamble invalidation when a BOM appears or disappears. This may seem to be an obscure edge case, but it happens regularly with IDEs that pass buffer overrides that never (or always) have a BOM, yet the underlying file from the initial parse that generated a PCH might (or might not) have a BOM. I've included a test case for these scenarios. Differential Revision: https://reviews.llvm.org/D37491 llvm-svn: 313796
This commit is contained in:
parent
d95ed959d8
commit
84fd064ef9
|
@ -36,21 +36,6 @@ class CompilerInvocation;
|
||||||
class DeclGroupRef;
|
class DeclGroupRef;
|
||||||
class PCHContainerOperations;
|
class PCHContainerOperations;
|
||||||
|
|
||||||
/// A size of the preamble and a flag required by
|
|
||||||
/// PreprocessorOptions::PrecompiledPreambleBytes.
|
|
||||||
struct PreambleBounds {
|
|
||||||
PreambleBounds(unsigned Size, bool PreambleEndsAtStartOfLine)
|
|
||||||
: Size(Size), PreambleEndsAtStartOfLine(PreambleEndsAtStartOfLine) {}
|
|
||||||
|
|
||||||
/// \brief Size of the preamble in bytes.
|
|
||||||
unsigned Size;
|
|
||||||
/// \brief Whether the preamble ends at the start of a new line.
|
|
||||||
///
|
|
||||||
/// Used to inform the lexer as to whether it's starting at the beginning of
|
|
||||||
/// a line after skipping the preamble.
|
|
||||||
bool PreambleEndsAtStartOfLine;
|
|
||||||
};
|
|
||||||
|
|
||||||
/// \brief Runs lexer to compute suggested preamble bounds.
|
/// \brief Runs lexer to compute suggested preamble bounds.
|
||||||
PreambleBounds ComputePreambleBounds(const LangOptions &LangOpts,
|
PreambleBounds ComputePreambleBounds(const LangOptions &LangOpts,
|
||||||
llvm::MemoryBuffer *Buffer,
|
llvm::MemoryBuffer *Buffer,
|
||||||
|
|
|
@ -39,6 +39,23 @@ enum ConflictMarkerKind {
|
||||||
CMK_Perforce
|
CMK_Perforce
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/// Describes the bounds (start, size) of the preamble and a flag required by
|
||||||
|
/// PreprocessorOptions::PrecompiledPreambleBytes.
|
||||||
|
/// The preamble includes the BOM, if any.
|
||||||
|
struct PreambleBounds {
|
||||||
|
PreambleBounds(unsigned Size, bool PreambleEndsAtStartOfLine)
|
||||||
|
: Size(Size),
|
||||||
|
PreambleEndsAtStartOfLine(PreambleEndsAtStartOfLine) {}
|
||||||
|
|
||||||
|
/// \brief Size of the preamble in bytes.
|
||||||
|
unsigned Size;
|
||||||
|
/// \brief Whether the preamble ends at the start of a new line.
|
||||||
|
///
|
||||||
|
/// Used to inform the lexer as to whether it's starting at the beginning of
|
||||||
|
/// a line after skipping the preamble.
|
||||||
|
bool PreambleEndsAtStartOfLine;
|
||||||
|
};
|
||||||
|
|
||||||
/// Lexer - This provides a simple interface that turns a text buffer into a
|
/// Lexer - This provides a simple interface that turns a text buffer into a
|
||||||
/// stream of tokens. This provides no support for file reading or buffering,
|
/// stream of tokens. This provides no support for file reading or buffering,
|
||||||
/// or buffering/seeking of tokens, only forward lexing is supported. It relies
|
/// or buffering/seeking of tokens, only forward lexing is supported. It relies
|
||||||
|
@ -445,7 +462,7 @@ public:
|
||||||
/// \returns The offset into the file where the preamble ends and the rest
|
/// \returns The offset into the file where the preamble ends and the rest
|
||||||
/// of the file begins along with a boolean value indicating whether
|
/// of the file begins along with a boolean value indicating whether
|
||||||
/// the preamble ends at the beginning of a new line.
|
/// the preamble ends at the beginning of a new line.
|
||||||
static std::pair<unsigned, bool> ComputePreamble(StringRef Buffer,
|
static PreambleBounds ComputePreamble(StringRef Buffer,
|
||||||
const LangOptions &LangOpts,
|
const LangOptions &LangOpts,
|
||||||
unsigned MaxLines = 0);
|
unsigned MaxLines = 0);
|
||||||
|
|
||||||
|
@ -618,7 +635,7 @@ private:
|
||||||
//===--------------------------------------------------------------------===//
|
//===--------------------------------------------------------------------===//
|
||||||
// Other lexer functions.
|
// Other lexer functions.
|
||||||
|
|
||||||
void SkipBytes(unsigned Bytes, bool StartOfLine);
|
void SetByteOffset(unsigned Offset, bool StartOfLine);
|
||||||
|
|
||||||
void PropagateLineStartLeadingSpaceInfo(Token &Result);
|
void PropagateLineStartLeadingSpaceInfo(Token &Result);
|
||||||
|
|
||||||
|
|
|
@ -160,7 +160,7 @@ public:
|
||||||
DisablePCHValidation(false),
|
DisablePCHValidation(false),
|
||||||
AllowPCHWithCompilerErrors(false),
|
AllowPCHWithCompilerErrors(false),
|
||||||
DumpDeserializedPCHDecls(false),
|
DumpDeserializedPCHDecls(false),
|
||||||
PrecompiledPreambleBytes(0, true),
|
PrecompiledPreambleBytes(0, false),
|
||||||
GeneratePreamble(false),
|
GeneratePreamble(false),
|
||||||
RemappedFilesKeepOriginalName(true),
|
RemappedFilesKeepOriginalName(true),
|
||||||
RetainRemappedFileBuffers(false),
|
RetainRemappedFileBuffers(false),
|
||||||
|
@ -195,7 +195,7 @@ public:
|
||||||
LexEditorPlaceholders = true;
|
LexEditorPlaceholders = true;
|
||||||
RetainRemappedFileBuffers = true;
|
RetainRemappedFileBuffers = true;
|
||||||
PrecompiledPreambleBytes.first = 0;
|
PrecompiledPreambleBytes.first = 0;
|
||||||
PrecompiledPreambleBytes.second = 0;
|
PrecompiledPreambleBytes.second = false;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -591,7 +591,7 @@ void PrintPreambleAction::ExecuteAction() {
|
||||||
auto Buffer = CI.getFileManager().getBufferForFile(getCurrentFile());
|
auto Buffer = CI.getFileManager().getBufferForFile(getCurrentFile());
|
||||||
if (Buffer) {
|
if (Buffer) {
|
||||||
unsigned Preamble =
|
unsigned Preamble =
|
||||||
Lexer::ComputePreamble((*Buffer)->getBuffer(), CI.getLangOpts()).first;
|
Lexer::ComputePreamble((*Buffer)->getBuffer(), CI.getLangOpts()).Size;
|
||||||
llvm::outs().write((*Buffer)->getBufferStart(), Preamble);
|
llvm::outs().write((*Buffer)->getBufferStart(), Preamble);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -195,8 +195,7 @@ template <class T> bool moveOnNoError(llvm::ErrorOr<T> Val, T &Output) {
|
||||||
PreambleBounds clang::ComputePreambleBounds(const LangOptions &LangOpts,
|
PreambleBounds clang::ComputePreambleBounds(const LangOptions &LangOpts,
|
||||||
llvm::MemoryBuffer *Buffer,
|
llvm::MemoryBuffer *Buffer,
|
||||||
unsigned MaxLines) {
|
unsigned MaxLines) {
|
||||||
auto Pre = Lexer::ComputePreamble(Buffer->getBuffer(), LangOpts, MaxLines);
|
return Lexer::ComputePreamble(Buffer->getBuffer(), LangOpts, MaxLines);
|
||||||
return PreambleBounds(Pre.first, Pre.second);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
llvm::ErrorOr<PrecompiledPreamble> PrecompiledPreamble::Build(
|
llvm::ErrorOr<PrecompiledPreamble> PrecompiledPreamble::Build(
|
||||||
|
|
|
@ -552,7 +552,7 @@ namespace {
|
||||||
|
|
||||||
} // end anonymous namespace
|
} // end anonymous namespace
|
||||||
|
|
||||||
std::pair<unsigned, bool> Lexer::ComputePreamble(StringRef Buffer,
|
PreambleBounds Lexer::ComputePreamble(StringRef Buffer,
|
||||||
const LangOptions &LangOpts,
|
const LangOptions &LangOpts,
|
||||||
unsigned MaxLines) {
|
unsigned MaxLines) {
|
||||||
// Create a lexer starting at the beginning of the file. Note that we use a
|
// Create a lexer starting at the beginning of the file. Note that we use a
|
||||||
|
@ -688,7 +688,7 @@ std::pair<unsigned, bool> Lexer::ComputePreamble(StringRef Buffer,
|
||||||
else
|
else
|
||||||
End = TheTok.getLocation();
|
End = TheTok.getLocation();
|
||||||
|
|
||||||
return std::make_pair(End.getRawEncoding() - StartLoc.getRawEncoding(),
|
return PreambleBounds(End.getRawEncoding() - FileLoc.getRawEncoding(),
|
||||||
TheTok.isAtStartOfLine());
|
TheTok.isAtStartOfLine());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1394,9 +1394,9 @@ Slash:
|
||||||
// Helper methods for lexing.
|
// Helper methods for lexing.
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
/// \brief Routine that indiscriminately skips bytes in the source file.
|
/// \brief Routine that indiscriminately sets the offset into the source file.
|
||||||
void Lexer::SkipBytes(unsigned Bytes, bool StartOfLine) {
|
void Lexer::SetByteOffset(unsigned Offset, bool StartOfLine) {
|
||||||
BufferPtr += Bytes;
|
BufferPtr = BufferStart + Offset;
|
||||||
if (BufferPtr > BufferEnd)
|
if (BufferPtr > BufferEnd)
|
||||||
BufferPtr = BufferEnd;
|
BufferPtr = BufferEnd;
|
||||||
// FIXME: What exactly does the StartOfLine bit mean? There are two
|
// FIXME: What exactly does the StartOfLine bit mean? There are two
|
||||||
|
|
|
@ -516,7 +516,7 @@ void Preprocessor::EnterMainSourceFile() {
|
||||||
// If we've been asked to skip bytes in the main file (e.g., as part of a
|
// If we've been asked to skip bytes in the main file (e.g., as part of a
|
||||||
// precompiled preamble), do so now.
|
// precompiled preamble), do so now.
|
||||||
if (SkipMainFilePreamble.first > 0)
|
if (SkipMainFilePreamble.first > 0)
|
||||||
CurLexer->SkipBytes(SkipMainFilePreamble.first,
|
CurLexer->SetByteOffset(SkipMainFilePreamble.first,
|
||||||
SkipMainFilePreamble.second);
|
SkipMainFilePreamble.second);
|
||||||
|
|
||||||
// Tell the header info that the main file was entered. If the file is later
|
// Tell the header info that the main file was entered. If the file is later
|
||||||
|
|
|
@ -153,4 +153,48 @@ TEST_F(PCHPreambleTest, ReparseWithOverriddenFileDoesNotInvalidatePreamble) {
|
||||||
ASSERT_EQ(initialCounts[2], GetFileReadCount(Header2));
|
ASSERT_EQ(initialCounts[2], GetFileReadCount(Header2));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(PCHPreambleTest, ParseWithBom) {
|
||||||
|
std::string Header = "//./header.h";
|
||||||
|
std::string Main = "//./main.cpp";
|
||||||
|
AddFile(Header, "int random() { return 4; }");
|
||||||
|
AddFile(Main,
|
||||||
|
"\xef\xbb\xbf"
|
||||||
|
"#include \"//./header.h\"\n"
|
||||||
|
"int main() { return random() -2; }");
|
||||||
|
|
||||||
|
std::unique_ptr<ASTUnit> AST(ParseAST(Main));
|
||||||
|
ASSERT_TRUE(AST.get());
|
||||||
|
ASSERT_FALSE(AST->getDiagnostics().hasErrorOccurred());
|
||||||
|
|
||||||
|
unsigned HeaderReadCount = GetFileReadCount(Header);
|
||||||
|
|
||||||
|
ASSERT_TRUE(ReparseAST(AST));
|
||||||
|
ASSERT_FALSE(AST->getDiagnostics().hasErrorOccurred());
|
||||||
|
|
||||||
|
// Check preamble PCH was really reused
|
||||||
|
ASSERT_EQ(HeaderReadCount, GetFileReadCount(Header));
|
||||||
|
|
||||||
|
// Remove BOM
|
||||||
|
RemapFile(Main,
|
||||||
|
"#include \"//./header.h\"\n"
|
||||||
|
"int main() { return random() -2; }");
|
||||||
|
|
||||||
|
ASSERT_TRUE(ReparseAST(AST));
|
||||||
|
ASSERT_FALSE(AST->getDiagnostics().hasErrorOccurred());
|
||||||
|
|
||||||
|
ASSERT_LE(HeaderReadCount, GetFileReadCount(Header));
|
||||||
|
HeaderReadCount = GetFileReadCount(Header);
|
||||||
|
|
||||||
|
// Add BOM back
|
||||||
|
RemapFile(Main,
|
||||||
|
"\xef\xbb\xbf"
|
||||||
|
"#include \"//./header.h\"\n"
|
||||||
|
"int main() { return random() -2; }");
|
||||||
|
|
||||||
|
ASSERT_TRUE(ReparseAST(AST));
|
||||||
|
ASSERT_FALSE(AST->getDiagnostics().hasErrorOccurred());
|
||||||
|
|
||||||
|
ASSERT_LE(HeaderReadCount, GetFileReadCount(Header));
|
||||||
|
}
|
||||||
|
|
||||||
} // anonymous namespace
|
} // anonymous namespace
|
||||||
|
|
Loading…
Reference in New Issue