Bitcode: Introduce initial multi-module reader API.

Implement getLazyBitcodeModule() and parseBitcodeFile() in terms of it.

Differential Revision: https://reviews.llvm.org/D26719

llvm-svn: 287156
This commit is contained in:
Peter Collingbourne 2016-11-16 21:44:45 +00:00
parent a49c2019f1
commit 7a74803abf
4 changed files with 149 additions and 61 deletions

View File

@ -40,6 +40,44 @@ namespace llvm {
return std::move(*Val);
}
/// Represents a module in a bitcode file.
class BitcodeModule {
ArrayRef<uint8_t> Buffer;
StringRef ModuleIdentifier;
// The bitstream location of the IDENTIFICATION_BLOCK.
uint64_t IdentificationBit;
// The bitstream location of this module's MODULE_BLOCK.
uint64_t ModuleBit;
BitcodeModule(ArrayRef<uint8_t> Buffer, StringRef ModuleIdentifier,
uint64_t IdentificationBit, uint64_t ModuleBit)
: Buffer(Buffer), ModuleIdentifier(ModuleIdentifier),
IdentificationBit(IdentificationBit), ModuleBit(ModuleBit) {}
// Calls the ctor.
friend Expected<std::vector<BitcodeModule>>
getBitcodeModuleList(MemoryBufferRef Buffer);
Expected<std::unique_ptr<Module>>
getModuleImpl(LLVMContext &Context, bool MaterializeAll,
bool ShouldLazyLoadMetadata);
public:
/// Read the bitcode module and prepare for lazy deserialization of function
/// bodies. If ShouldLazyLoadMetadata is true, lazily load metadata as well.
Expected<std::unique_ptr<Module>>
getLazyModule(LLVMContext &Context, bool ShouldLazyLoadMetadata);
/// Read the entire bitcode module and return it.
Expected<std::unique_ptr<Module>> parseModule(LLVMContext &Context);
};
/// Returns a list of modules in the specified bitcode buffer.
Expected<std::vector<BitcodeModule>>
getBitcodeModuleList(MemoryBufferRef Buffer);
/// Read the header of the specified bitcode buffer and prepare for lazy
/// deserialization of function bodies. If ShouldLazyLoadMetadata is true,
/// lazily load metadata as well.

View File

@ -607,7 +607,8 @@ class BitcodeReader : public BitcodeReaderBase, public GVMaterializer {
std::vector<std::string> BundleTags;
public:
BitcodeReader(BitstreamCursor Stream, LLVMContext &Context);
BitcodeReader(BitstreamCursor Stream, StringRef ProducerIdentification,
LLVMContext &Context);
Error materializeForwardReferencedFunctions();
@ -841,9 +842,13 @@ std::error_code llvm::errorToErrorCodeAndEmitErrors(LLVMContext &Ctx,
return std::error_code();
}
BitcodeReader::BitcodeReader(BitstreamCursor Stream, LLVMContext &Context)
: BitcodeReaderBase(std::move(Stream)), Context(Context), ValueList(Context),
MetadataList(Context) {}
BitcodeReader::BitcodeReader(BitstreamCursor Stream,
StringRef ProducerIdentification,
LLVMContext &Context)
: BitcodeReaderBase(std::move(Stream)), Context(Context),
ValueList(Context), MetadataList(Context) {
this->ProducerIdentification = ProducerIdentification;
}
Error BitcodeReader::materializeForwardReferencedFunctions() {
if (WillMaterializeAllForwardRefs)
@ -4365,36 +4370,7 @@ Error BitcodeReader::parseModule(uint64_t ResumeBit,
Error BitcodeReader::parseBitcodeInto(Module *M, bool ShouldLazyLoadMetadata) {
TheModule = M;
// We expect a number of well-defined blocks, though we don't necessarily
// need to understand them all.
while (true) {
if (Stream.AtEndOfStream()) {
// We didn't really read a proper Module.
return error("Malformed IR file");
}
BitstreamEntry Entry =
Stream.advance(BitstreamCursor::AF_DontAutoprocessAbbrevs);
if (Entry.Kind != BitstreamEntry::SubBlock)
return error("Malformed block");
if (Entry.ID == bitc::IDENTIFICATION_BLOCK_ID) {
Expected<std::string> ProducerIdentificationOrErr =
readIdentificationBlock(Stream);
if (!ProducerIdentificationOrErr)
return ProducerIdentificationOrErr.takeError();
ProducerIdentification = *ProducerIdentificationOrErr;
continue;
}
if (Entry.ID == bitc::MODULE_BLOCK_ID)
return parseModule(0, ShouldLazyLoadMetadata);
if (Stream.SkipBlock())
return error("Invalid record");
}
return parseModule(0, ShouldLazyLoadMetadata);
}
Error BitcodeReader::parseGlobalObjectAttachment(GlobalObject &GO,
@ -6566,26 +6542,76 @@ const std::error_category &llvm::BitcodeErrorCategory() {
// External interface
//===----------------------------------------------------------------------===//
Expected<std::vector<BitcodeModule>>
llvm::getBitcodeModuleList(MemoryBufferRef Buffer) {
Expected<BitstreamCursor> StreamOrErr = initStream(Buffer);
if (!StreamOrErr)
return StreamOrErr.takeError();
BitstreamCursor &Stream = *StreamOrErr;
uint64_t IdentificationBit = -1ull;
std::vector<BitcodeModule> Modules;
while (true) {
// We may be consuming bitcode from a client that leaves garbage at the end
// of the bitcode stream (e.g. Apple's ar tool). If we are close enough to
// the end that there cannot possibly be another module, stop looking.
if (Stream.getCurrentByteNo() + 8 >= Stream.getBitcodeBytes().size())
return Modules;
BitstreamEntry Entry = Stream.advance();
switch (Entry.Kind) {
case BitstreamEntry::EndBlock:
case BitstreamEntry::Error:
return error("Malformed block");
case BitstreamEntry::SubBlock:
if (Entry.ID == bitc::IDENTIFICATION_BLOCK_ID)
IdentificationBit = Stream.GetCurrentBitNo();
else if (Entry.ID == bitc::MODULE_BLOCK_ID)
Modules.push_back({Stream.getBitcodeBytes(),
Buffer.getBufferIdentifier(), IdentificationBit,
Stream.GetCurrentBitNo()});
if (Stream.SkipBlock())
return error("Malformed block");
continue;
case BitstreamEntry::Record:
Stream.skipRecord(Entry.ID);
continue;
}
}
}
/// \brief Get a lazy one-at-time loading module from bitcode.
///
/// This isn't always used in a lazy context. In particular, it's also used by
/// \a parseBitcodeFile(). If this is truly lazy, then we need to eagerly pull
/// \a parseModule(). If this is truly lazy, then we need to eagerly pull
/// in forward-referenced functions from block address references.
///
/// \param[in] MaterializeAll Set to \c true if we should materialize
/// everything.
static Expected<std::unique_ptr<Module>>
getLazyBitcodeModuleImpl(MemoryBufferRef Buffer, LLVMContext &Context,
bool MaterializeAll,
bool ShouldLazyLoadMetadata = false) {
Expected<BitstreamCursor> StreamOrErr = initStream(Buffer);
if (!StreamOrErr)
return StreamOrErr.takeError();
Expected<std::unique_ptr<Module>>
BitcodeModule::getModuleImpl(LLVMContext &Context, bool MaterializeAll,
bool ShouldLazyLoadMetadata) {
BitstreamCursor Stream(Buffer);
BitcodeReader *R = new BitcodeReader(std::move(*StreamOrErr), Context);
std::string ProducerIdentification;
if (IdentificationBit != -1ull) {
Stream.JumpToBit(IdentificationBit);
Expected<std::string> ProducerIdentificationOrErr =
readIdentificationBlock(Stream);
if (!ProducerIdentificationOrErr)
return ProducerIdentificationOrErr.takeError();
ProducerIdentification = *ProducerIdentificationOrErr;
}
Stream.JumpToBit(ModuleBit);
auto *R =
new BitcodeReader(std::move(Stream), ProducerIdentification, Context);
std::unique_ptr<Module> M =
llvm::make_unique<Module>(Buffer.getBufferIdentifier(), Context);
llvm::make_unique<Module>(ModuleIdentifier, Context);
M->setMaterializer(R);
// Delay parsing Metadata if ShouldLazyLoadMetadata is true.
@ -6604,11 +6630,23 @@ getLazyBitcodeModuleImpl(MemoryBufferRef Buffer, LLVMContext &Context,
return std::move(M);
}
Expected<std::unique_ptr<Module>>
BitcodeModule::getLazyModule(LLVMContext &Context,
bool ShouldLazyLoadMetadata) {
return getModuleImpl(Context, false, ShouldLazyLoadMetadata);
}
Expected<std::unique_ptr<Module>>
llvm::getLazyBitcodeModule(MemoryBufferRef Buffer,
LLVMContext &Context, bool ShouldLazyLoadMetadata) {
return getLazyBitcodeModuleImpl(Buffer, Context, false,
ShouldLazyLoadMetadata);
Expected<std::vector<BitcodeModule>> MsOrErr = getBitcodeModuleList(Buffer);
if (!MsOrErr)
return MsOrErr.takeError();
if (MsOrErr->size() != 1)
return error("Expected a single module");
return (*MsOrErr)[0].getLazyModule(Context, ShouldLazyLoadMetadata);
}
Expected<std::unique_ptr<Module>>
@ -6621,13 +6659,25 @@ llvm::getOwningLazyBitcodeModule(std::unique_ptr<MemoryBuffer> &&Buffer,
return MOrErr;
}
Expected<std::unique_ptr<Module>> llvm::parseBitcodeFile(MemoryBufferRef Buffer,
LLVMContext &Context) {
return getLazyBitcodeModuleImpl(Buffer, Context, true);
Expected<std::unique_ptr<Module>>
BitcodeModule::parseModule(LLVMContext &Context) {
return getModuleImpl(Context, true, false);
// TODO: Restore the use-lists to the in-memory state when the bitcode was
// written. We must defer until the Module has been fully materialized.
}
Expected<std::unique_ptr<Module>> llvm::parseBitcodeFile(MemoryBufferRef Buffer,
LLVMContext &Context) {
Expected<std::vector<BitcodeModule>> MsOrErr = getBitcodeModuleList(Buffer);
if (!MsOrErr)
return MsOrErr.takeError();
if (MsOrErr->size() != 1)
return error("Expected a single module");
return (*MsOrErr)[0].parseModule(Context);
}
Expected<std::string> llvm::getBitcodeTargetTriple(MemoryBufferRef Buffer) {
Expected<BitstreamCursor> StreamOrErr = initStream(Buffer);
if (!StreamOrErr)

View File

@ -31,11 +31,11 @@ RUN: FileCheck --check-prefix=NON-FUNCTION-EXPLICIT-INVOKE %s
INVALID-EMPTY: Invalid bitcode signature
INVALID-ENCODING: Invalid encoding
BAD-ABBREV: Abbreviation starts with an Array or a Blob
UNEXPECTED-EOF: Unexpected end of file
BAD-ABBREV-NUMBER: Invalid abbrev number
BAD-ABBREV: Malformed block
UNEXPECTED-EOF: Malformed block
BAD-ABBREV-NUMBER: Malformed block
BAD-TYPE-TABLE-FORWARD-REF: Invalid TYPE table: Only named structs can be forward referenced
BAD-BITWIDTH: Bitwidth for integer type out of range
BAD-BITWIDTH: Malformed block
BAD-ALIGN: Invalid alignment value
MISMATCHED-EXPLICIT-GEP: Explicit gep type does not match pointee type of pointer operand
MISMATCHED-EXPLICIT-LOAD: Explicit load/store type does not match pointee type of pointer operand
@ -69,7 +69,7 @@ INSERT-IDXS: INSERTVAL: Invalid type
RUN: not llvm-dis -disable-output %p/Inputs/invalid-no-proper-module.bc 2>&1 | \
RUN: FileCheck --check-prefix=NO-MODULE %s
NO-MODULE: Malformed IR file
NO-MODULE: Expected a single module
RUN: not llvm-dis -disable-output %p/Inputs/invalid-fp-shift.bc 2>&1 | \
RUN: FileCheck --check-prefix=FP-SHIFT %s
@ -105,7 +105,7 @@ FWDREF-TYPE: Invalid record
RUN: not llvm-dis -disable-output %p/Inputs/invalid-fwdref-type-mismatch-2.bc 2>&1 | \
RUN: FileCheck --check-prefix=FWDREF-TYPE-MISMATCH %s
FWDREF-TYPE-MISMATCH: Type mismatch in constant table!
FWDREF-TYPE-MISMATCH: Malformed block
RUN: not llvm-dis -disable-output %p/Inputs/invalid-array-element-type.bc 2>&1 | \
RUN: FileCheck --check-prefix=ELEMENT-TYPE %s
@ -154,7 +154,7 @@ EXTRACT-0-IDXS: EXTRACTVAL: Invalid instruction with 0 indices
RUN: not llvm-dis -disable-output %p/Inputs/invalid-load-ptr-type.bc 2>&1 | \
RUN: FileCheck --check-prefix=BAD-LOAD-PTR-TYPE %s
BAD-LOAD-PTR-TYPE: Cannot load/store from pointer
BAD-LOAD-PTR-TYPE: Malformed block
RUN: not llvm-dis -disable-output %p/Inputs/invalid-inserted-value-type-mismatch.bc 2>&1 | \
RUN: FileCheck --check-prefix=INSERT-TYPE-MISMATCH %s
@ -174,7 +174,7 @@ INVALID-ARGUMENT-TYPE: Invalid function argument type
RUN: not llvm-dis -disable-output %p/Inputs/invalid-function-comdat-id.bc 2>&1 | \
RUN: FileCheck --check-prefix=INVALID-FCOMDAT-ID %s
INVALID-FCOMDAT-ID: Invalid function comdat ID
INVALID-FCOMDAT-ID: Malformed block
RUN: not llvm-dis -disable-output %p/Inputs/invalid-global-var-comdat-id.bc 2>&1 | \
RUN: FileCheck --check-prefix=INVALID-GVCOMDAT-ID %s
@ -189,12 +189,12 @@ ABBREV-NO-OPS: Abbrev record with no operands
RUN: not llvm-dis -disable-output %p/Inputs/invalid-array-operand-encoding.bc 2>&1 | \
RUN: FileCheck --check-prefix=ARRAY-OP-ENC %s
ARRAY-OP-ENC: Array element type has to be an encoding of a type
ARRAY-OP-ENC: Malformed block
RUN: not llvm-dis -disable-output %p/Inputs/invalid-metadata-not-followed-named-node.bc 2>&1 | \
RUN: FileCheck --check-prefix=META-NOT-FOLLOWED-BY-NAMED-META %s
META-NOT-FOLLOWED-BY-NAMED-META: METADATA_NAME not followed by METADATA_NAMED_NODE
META-NOT-FOLLOWED-BY-NAMED-META: Malformed block
RUN: not llvm-dis -disable-output %p/Inputs/invalid-vector-length.bc 2>&1 | \
RUN: FileCheck --check-prefix=VECTOR-LENGTH %s
@ -214,7 +214,7 @@ NO-FUNCTION-BLOCK: Trying to materialize functions before seeing function blocks
RUN: not llvm-dis -disable-output %p/Inputs/invalid-name-with-0-byte.bc 2>&1 | \
RUN: FileCheck --check-prefix=NAME-WITH-0 %s
NAME-WITH-0: Invalid value name
NAME-WITH-0: Malformed block
RUN: not llvm-dis -disable-output %p/Inputs/invalid-void-constant.bc 2>&1 | \
RUN: FileCheck --check-prefix=VOID-CONSTANT-TYPE %s

View File

@ -1,4 +1,4 @@
; RUN: not llvm-dis < %s.bc 2>&1 | FileCheck %s
; PR8494
; CHECK: Invalid record
; CHECK: Malformed block