forked from OSchip/llvm-project
[PCH] Fixed preamble breaking with BOM presence (and particularly, fluctuating BOM presence)
This patch fixes broken preamble-skipping when the preamble region includes a byte order mark (BOM). Previously, parsing would fail if preamble PCH generation was enabled and a BOM was present. This also fixes preamble invalidation when a BOM appears or disappears. This may seem to be an obscure edge case, but it happens regularly with IDEs that pass buffer overrides that never (or always) have a BOM, yet the underlying file from the initial parse that generated a PCH might (or might not) have a BOM. I've included a test case for these scenarios. Differential Revision: https://reviews.llvm.org/D37491 llvm-svn: 313796
This commit is contained in:
parent
d95ed959d8
commit
84fd064ef9
|
@ -36,21 +36,6 @@ class CompilerInvocation;
|
|||
class DeclGroupRef;
|
||||
class PCHContainerOperations;
|
||||
|
||||
/// A size of the preamble and a flag required by
|
||||
/// PreprocessorOptions::PrecompiledPreambleBytes.
|
||||
struct PreambleBounds {
|
||||
PreambleBounds(unsigned Size, bool PreambleEndsAtStartOfLine)
|
||||
: Size(Size), PreambleEndsAtStartOfLine(PreambleEndsAtStartOfLine) {}
|
||||
|
||||
/// \brief Size of the preamble in bytes.
|
||||
unsigned Size;
|
||||
/// \brief Whether the preamble ends at the start of a new line.
|
||||
///
|
||||
/// Used to inform the lexer as to whether it's starting at the beginning of
|
||||
/// a line after skipping the preamble.
|
||||
bool PreambleEndsAtStartOfLine;
|
||||
};
|
||||
|
||||
/// \brief Runs lexer to compute suggested preamble bounds.
|
||||
PreambleBounds ComputePreambleBounds(const LangOptions &LangOpts,
|
||||
llvm::MemoryBuffer *Buffer,
|
||||
|
|
|
@ -39,6 +39,23 @@ enum ConflictMarkerKind {
|
|||
CMK_Perforce
|
||||
};
|
||||
|
||||
/// Describes the bounds (start, size) of the preamble and a flag required by
|
||||
/// PreprocessorOptions::PrecompiledPreambleBytes.
|
||||
/// The preamble includes the BOM, if any.
|
||||
struct PreambleBounds {
|
||||
PreambleBounds(unsigned Size, bool PreambleEndsAtStartOfLine)
|
||||
: Size(Size),
|
||||
PreambleEndsAtStartOfLine(PreambleEndsAtStartOfLine) {}
|
||||
|
||||
/// \brief Size of the preamble in bytes.
|
||||
unsigned Size;
|
||||
/// \brief Whether the preamble ends at the start of a new line.
|
||||
///
|
||||
/// Used to inform the lexer as to whether it's starting at the beginning of
|
||||
/// a line after skipping the preamble.
|
||||
bool PreambleEndsAtStartOfLine;
|
||||
};
|
||||
|
||||
/// Lexer - This provides a simple interface that turns a text buffer into a
|
||||
/// stream of tokens. This provides no support for file reading or buffering,
|
||||
/// or buffering/seeking of tokens, only forward lexing is supported. It relies
|
||||
|
@ -445,7 +462,7 @@ public:
|
|||
/// \returns The offset into the file where the preamble ends and the rest
|
||||
/// of the file begins along with a boolean value indicating whether
|
||||
/// the preamble ends at the beginning of a new line.
|
||||
static std::pair<unsigned, bool> ComputePreamble(StringRef Buffer,
|
||||
static PreambleBounds ComputePreamble(StringRef Buffer,
|
||||
const LangOptions &LangOpts,
|
||||
unsigned MaxLines = 0);
|
||||
|
||||
|
@ -618,7 +635,7 @@ private:
|
|||
//===--------------------------------------------------------------------===//
|
||||
// Other lexer functions.
|
||||
|
||||
void SkipBytes(unsigned Bytes, bool StartOfLine);
|
||||
void SetByteOffset(unsigned Offset, bool StartOfLine);
|
||||
|
||||
void PropagateLineStartLeadingSpaceInfo(Token &Result);
|
||||
|
||||
|
|
|
@ -160,7 +160,7 @@ public:
|
|||
DisablePCHValidation(false),
|
||||
AllowPCHWithCompilerErrors(false),
|
||||
DumpDeserializedPCHDecls(false),
|
||||
PrecompiledPreambleBytes(0, true),
|
||||
PrecompiledPreambleBytes(0, false),
|
||||
GeneratePreamble(false),
|
||||
RemappedFilesKeepOriginalName(true),
|
||||
RetainRemappedFileBuffers(false),
|
||||
|
@ -195,7 +195,7 @@ public:
|
|||
LexEditorPlaceholders = true;
|
||||
RetainRemappedFileBuffers = true;
|
||||
PrecompiledPreambleBytes.first = 0;
|
||||
PrecompiledPreambleBytes.second = 0;
|
||||
PrecompiledPreambleBytes.second = false;
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -591,7 +591,7 @@ void PrintPreambleAction::ExecuteAction() {
|
|||
auto Buffer = CI.getFileManager().getBufferForFile(getCurrentFile());
|
||||
if (Buffer) {
|
||||
unsigned Preamble =
|
||||
Lexer::ComputePreamble((*Buffer)->getBuffer(), CI.getLangOpts()).first;
|
||||
Lexer::ComputePreamble((*Buffer)->getBuffer(), CI.getLangOpts()).Size;
|
||||
llvm::outs().write((*Buffer)->getBufferStart(), Preamble);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -195,8 +195,7 @@ template <class T> bool moveOnNoError(llvm::ErrorOr<T> Val, T &Output) {
|
|||
PreambleBounds clang::ComputePreambleBounds(const LangOptions &LangOpts,
|
||||
llvm::MemoryBuffer *Buffer,
|
||||
unsigned MaxLines) {
|
||||
auto Pre = Lexer::ComputePreamble(Buffer->getBuffer(), LangOpts, MaxLines);
|
||||
return PreambleBounds(Pre.first, Pre.second);
|
||||
return Lexer::ComputePreamble(Buffer->getBuffer(), LangOpts, MaxLines);
|
||||
}
|
||||
|
||||
llvm::ErrorOr<PrecompiledPreamble> PrecompiledPreamble::Build(
|
||||
|
|
|
@ -552,7 +552,7 @@ namespace {
|
|||
|
||||
} // end anonymous namespace
|
||||
|
||||
std::pair<unsigned, bool> Lexer::ComputePreamble(StringRef Buffer,
|
||||
PreambleBounds Lexer::ComputePreamble(StringRef Buffer,
|
||||
const LangOptions &LangOpts,
|
||||
unsigned MaxLines) {
|
||||
// Create a lexer starting at the beginning of the file. Note that we use a
|
||||
|
@ -688,7 +688,7 @@ std::pair<unsigned, bool> Lexer::ComputePreamble(StringRef Buffer,
|
|||
else
|
||||
End = TheTok.getLocation();
|
||||
|
||||
return std::make_pair(End.getRawEncoding() - StartLoc.getRawEncoding(),
|
||||
return PreambleBounds(End.getRawEncoding() - FileLoc.getRawEncoding(),
|
||||
TheTok.isAtStartOfLine());
|
||||
}
|
||||
|
||||
|
@ -1394,9 +1394,9 @@ Slash:
|
|||
// Helper methods for lexing.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
/// \brief Routine that indiscriminately skips bytes in the source file.
|
||||
void Lexer::SkipBytes(unsigned Bytes, bool StartOfLine) {
|
||||
BufferPtr += Bytes;
|
||||
/// \brief Routine that indiscriminately sets the offset into the source file.
|
||||
void Lexer::SetByteOffset(unsigned Offset, bool StartOfLine) {
|
||||
BufferPtr = BufferStart + Offset;
|
||||
if (BufferPtr > BufferEnd)
|
||||
BufferPtr = BufferEnd;
|
||||
// FIXME: What exactly does the StartOfLine bit mean? There are two
|
||||
|
|
|
@ -516,7 +516,7 @@ void Preprocessor::EnterMainSourceFile() {
|
|||
// If we've been asked to skip bytes in the main file (e.g., as part of a
|
||||
// precompiled preamble), do so now.
|
||||
if (SkipMainFilePreamble.first > 0)
|
||||
CurLexer->SkipBytes(SkipMainFilePreamble.first,
|
||||
CurLexer->SetByteOffset(SkipMainFilePreamble.first,
|
||||
SkipMainFilePreamble.second);
|
||||
|
||||
// Tell the header info that the main file was entered. If the file is later
|
||||
|
|
|
@ -153,4 +153,48 @@ TEST_F(PCHPreambleTest, ReparseWithOverriddenFileDoesNotInvalidatePreamble) {
|
|||
ASSERT_EQ(initialCounts[2], GetFileReadCount(Header2));
|
||||
}
|
||||
|
||||
TEST_F(PCHPreambleTest, ParseWithBom) {
|
||||
std::string Header = "//./header.h";
|
||||
std::string Main = "//./main.cpp";
|
||||
AddFile(Header, "int random() { return 4; }");
|
||||
AddFile(Main,
|
||||
"\xef\xbb\xbf"
|
||||
"#include \"//./header.h\"\n"
|
||||
"int main() { return random() -2; }");
|
||||
|
||||
std::unique_ptr<ASTUnit> AST(ParseAST(Main));
|
||||
ASSERT_TRUE(AST.get());
|
||||
ASSERT_FALSE(AST->getDiagnostics().hasErrorOccurred());
|
||||
|
||||
unsigned HeaderReadCount = GetFileReadCount(Header);
|
||||
|
||||
ASSERT_TRUE(ReparseAST(AST));
|
||||
ASSERT_FALSE(AST->getDiagnostics().hasErrorOccurred());
|
||||
|
||||
// Check preamble PCH was really reused
|
||||
ASSERT_EQ(HeaderReadCount, GetFileReadCount(Header));
|
||||
|
||||
// Remove BOM
|
||||
RemapFile(Main,
|
||||
"#include \"//./header.h\"\n"
|
||||
"int main() { return random() -2; }");
|
||||
|
||||
ASSERT_TRUE(ReparseAST(AST));
|
||||
ASSERT_FALSE(AST->getDiagnostics().hasErrorOccurred());
|
||||
|
||||
ASSERT_LE(HeaderReadCount, GetFileReadCount(Header));
|
||||
HeaderReadCount = GetFileReadCount(Header);
|
||||
|
||||
// Add BOM back
|
||||
RemapFile(Main,
|
||||
"\xef\xbb\xbf"
|
||||
"#include \"//./header.h\"\n"
|
||||
"int main() { return random() -2; }");
|
||||
|
||||
ASSERT_TRUE(ReparseAST(AST));
|
||||
ASSERT_FALSE(AST->getDiagnostics().hasErrorOccurred());
|
||||
|
||||
ASSERT_LE(HeaderReadCount, GetFileReadCount(Header));
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
|
Loading…
Reference in New Issue