[CodeView] Add support for content hashing CodeView type records.

Currently nothing uses this, but this at least gets the core
algorithm in, and adds some test to demonstrate correctness.

Differential Revision: https://reviews.llvm.org/D40736

llvm-svn: 319854
This commit is contained in:
Zachary Turner 2017-12-05 23:08:58 +00:00
parent 0328d0083e
commit 87b78e9d1e
8 changed files with 366 additions and 46 deletions

View File

@ -16,6 +16,7 @@
#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/SimpleTypeSerializer.h"
#include "llvm/DebugInfo/CodeView/TypeCollection.h"
#include "llvm/DebugInfo/CodeView/TypeHashing.h"
#include "llvm/DebugInfo/CodeView/TypeIndex.h"
#include "llvm/Support/Allocator.h"
#include <cassert>
@ -27,13 +28,6 @@ namespace llvm {
namespace codeview {
class ContinuationRecordBuilder;
class TypeHasher;
struct HashedType {
hash_code Hash;
ArrayRef<uint8_t> Data;
TypeIndex Index;
};
class MergingTypeTableBuilder : public TypeCollection {
/// Storage for records. These need to outlive the TypeTableBuilder.
@ -45,14 +39,11 @@ class MergingTypeTableBuilder : public TypeCollection {
SimpleTypeSerializer SimpleSerializer;
/// Hash table.
DenseSet<HashedType> HashedRecords;
DenseMap<LocallyHashedType, TypeIndex> HashedRecords;
/// Contains a list of all records indexed by TypeIndex.toArrayIndex().
SmallVector<ArrayRef<uint8_t>, 2> SeenRecords;
/// Contains a list of all hash codes index by TypeIndex.toArrayIndex().
SmallVector<hash_code, 2> SeenHashes;
public:
explicit MergingTypeTableBuilder(BumpPtrAllocator &Storage);
~MergingTypeTableBuilder();
@ -73,7 +64,6 @@ public:
BumpPtrAllocator &getAllocator() { return RecordStorage; }
ArrayRef<ArrayRef<uint8_t>> records() const;
ArrayRef<hash_code> hashes() const;
TypeIndex insertRecordAs(hash_code Hash, ArrayRef<uint8_t> &Record);
TypeIndex insertRecordBytes(ArrayRef<uint8_t> &Record);

View File

@ -0,0 +1,119 @@
//===- TypeHashing.h ---------------------------------------------*- C++-*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_DEBUGINFO_CODEVIEW_TYPEHASHING_H
#define LLVM_DEBUGINFO_CODEVIEW_TYPEHASHING_H
#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/TypeIndex.h"
#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/ADT/Hashing.h"
namespace llvm {
namespace codeview {
/// A locally hashed type represents a straightforward hash code of a serialized
/// record. The record is simply serialized, and then the bytes are hashed by
/// a standard algorithm. This is sufficient for the case of de-duplicating
/// records within a single sequence of types, because if two records both have
/// a back-reference to the same type in the same stream, they will both have
/// the same numeric value for the TypeIndex of the back reference.
struct LocallyHashedType {
hash_code Hash;
ArrayRef<uint8_t> RecordData;
static LocallyHashedType hashType(ArrayRef<uint8_t> RecordData);
};
/// A globally hashed type represents a hash value that is sufficient to
/// uniquely identify a record across multiple type streams or type sequences.
/// This works by, for any given record A which references B, replacing the
/// TypeIndex that refers to B with a previously-computed global hash for B. As
/// this is a recursive algorithm (e.g. the global hash of B also depends on the
/// global hashes of the types that B refers to), a global hash can uniquely
/// identify identify that A occurs in another stream that has a completely
/// different graph structure. Although the hash itself is slower to compute,
/// probing is much faster with a globally hashed type, because the hash itself
/// is considered "as good as" the original type. Since type records can be
/// quite large, this makes the equality comparison of the hash much faster than
/// equality comparison of a full record.
struct GloballyHashedType {
GloballyHashedType() = default;
GloballyHashedType(StringRef H)
: GloballyHashedType(ArrayRef<uint8_t>(H.bytes_begin(), H.bytes_end())) {}
GloballyHashedType(ArrayRef<uint8_t> H) {
assert(H.size() == 20);
::memcpy(Hash.data(), H.data(), 20);
}
std::array<uint8_t, 20> Hash;
/// Given a sequence of bytes representing a record, compute a global hash for
/// this record. Due to the nature of global hashes incorporating the hashes
/// of referenced records, this function requires a list of types and ids
/// that RecordData might reference, indexable by TypeIndex.
static GloballyHashedType hashType(ArrayRef<uint8_t> RecordData,
ArrayRef<GloballyHashedType> PreviousTypes,
ArrayRef<GloballyHashedType> PreviousIds);
/// Given a sequence of combined type and ID records, compute global hashes
/// for each of them, returning the results in a vector of hashed types.
template <typename Range>
static std::vector<GloballyHashedType> hashTypes(Range &&Records) {
std::vector<GloballyHashedType> Hashes;
Hashes.reserve(std::distance(std::begin(Records), std::end(Records)));
for (const auto &R : Records)
Hashes.push_back(hashType(R, Hashes, Hashes));
return Hashes;
}
};
} // namespace codeview
template <> struct DenseMapInfo<codeview::LocallyHashedType> {
static codeview::LocallyHashedType Empty;
static codeview::LocallyHashedType Tombstone;
static codeview::LocallyHashedType getEmptyKey() { return Empty; }
static codeview::LocallyHashedType getTombstoneKey() { return Tombstone; }
static unsigned getHashValue(codeview::LocallyHashedType Val) {
return Val.Hash;
}
static bool isEqual(codeview::LocallyHashedType LHS,
codeview::LocallyHashedType RHS) {
if (LHS.Hash != RHS.Hash)
return false;
return LHS.RecordData == RHS.RecordData;
}
};
template <> struct DenseMapInfo<codeview::GloballyHashedType> {
static codeview::GloballyHashedType Empty;
static codeview::GloballyHashedType Tombstone;
static codeview::GloballyHashedType getEmptyKey() { return Empty; }
static codeview::GloballyHashedType getTombstoneKey() { return Tombstone; }
static unsigned getHashValue(codeview::GloballyHashedType Val) {
return *reinterpret_cast<const unsigned *>(Val.Hash.data());
}
static bool isEqual(codeview::GloballyHashedType LHS,
codeview::GloballyHashedType RHS) {
return LHS.Hash == RHS.Hash;
}
};
} // namespace llvm
#endif

View File

@ -334,6 +334,11 @@ public:
uint32_t Attrs;
Optional<MemberPointerInfo> MemberInfo;
void setAttrs(PointerKind PK, PointerMode PM, PointerOptions PO,
uint8_t Size) {
Attrs = calcAttrs(PK, PM, PO, Size);
}
private:
static uint32_t calcAttrs(PointerKind PK, PointerMode PM, PointerOptions PO,
uint8_t Size) {

View File

@ -32,6 +32,7 @@ add_llvm_library(LLVMDebugInfoCodeView
TypeDumpVisitor.cpp
TypeIndex.cpp
TypeIndexDiscovery.cpp
TypeHashing.cpp
TypeRecordMapping.cpp
TypeStreamMerger.cpp
TypeTableCollection.cpp

View File

@ -28,27 +28,6 @@
using namespace llvm;
using namespace llvm::codeview;
static HashedType Empty{0, {}, TypeIndex::None()};
static HashedType Tombstone{hash_code(-1), {}, TypeIndex::None()};
namespace llvm {
template <> struct DenseMapInfo<HashedType> {
static inline HashedType getEmptyKey() { return Empty; }
static inline HashedType getTombstoneKey() { return Tombstone; }
static unsigned getHashValue(HashedType Val) { return Val.Hash; }
static bool isEqual(HashedType LHS, HashedType RHS) {
if (RHS.Hash != LHS.Hash)
return false;
return RHS.Data == LHS.Data;
}
};
} // end namespace llvm
TypeIndex MergingTypeTableBuilder::nextTypeIndex() const {
return TypeIndex::fromArrayIndex(SeenRecords.size());
}
@ -56,7 +35,6 @@ TypeIndex MergingTypeTableBuilder::nextTypeIndex() const {
MergingTypeTableBuilder::MergingTypeTableBuilder(BumpPtrAllocator &Storage)
: RecordStorage(Storage) {
SeenRecords.reserve(4096);
SeenHashes.reserve(4096);
}
MergingTypeTableBuilder::~MergingTypeTableBuilder() = default;
@ -102,13 +80,8 @@ ArrayRef<ArrayRef<uint8_t>> MergingTypeTableBuilder::records() const {
return SeenRecords;
}
ArrayRef<hash_code> MergingTypeTableBuilder::hashes() const {
return SeenHashes;
}
void MergingTypeTableBuilder::reset() {
HashedRecords.clear();
SeenHashes.clear();
SeenRecords.clear();
}
@ -124,18 +97,19 @@ TypeIndex MergingTypeTableBuilder::insertRecordAs(hash_code Hash,
assert(Record.size() < UINT32_MAX && "Record too big");
assert(Record.size() % 4 == 0 && "Record is not aligned to 4 bytes!");
HashedType TempHashedType = {Hash, Record, nextTypeIndex()};
auto Result = HashedRecords.insert(TempHashedType);
LocallyHashedType WeakHash{Hash, Record};
auto Result = HashedRecords.try_emplace(WeakHash, nextTypeIndex());
if (Result.second) {
Result.first->Data = stabilize(RecordStorage, Record);
SeenRecords.push_back(Result.first->Data);
SeenHashes.push_back(Result.first->Hash);
ArrayRef<uint8_t> RecordData = stabilize(RecordStorage, Record);
Result.first->first.RecordData = RecordData;
SeenRecords.push_back(RecordData);
}
// Update the caller's copy of Record to point a stable copy.
Record = Result.first->Data;
return Result.first->Index;
TypeIndex ActualTI = Result.first->second;
Record = SeenRecords[ActualTI.toArrayIndex()];
return ActualTI;
}
TypeIndex

View File

@ -0,0 +1,74 @@
//===- TypeHashing.cpp -------------------------------------------*- C++-*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/CodeView/TypeHashing.h"
#include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h"
#include "llvm/Support/SHA1.h"
using namespace llvm;
using namespace llvm::codeview;
LocallyHashedType DenseMapInfo<LocallyHashedType>::Empty{0, {}};
LocallyHashedType DenseMapInfo<LocallyHashedType>::Tombstone{hash_code(-1), {}};
static std::array<uint8_t, 20> EmptyHash;
static std::array<uint8_t, 20> TombstoneHash = {
0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
GloballyHashedType DenseMapInfo<GloballyHashedType>::Empty{EmptyHash};
GloballyHashedType DenseMapInfo<GloballyHashedType>::Tombstone{TombstoneHash};
LocallyHashedType LocallyHashedType::hashType(ArrayRef<uint8_t> RecordData) {
return {llvm::hash_value(RecordData), RecordData};
}
GloballyHashedType
GloballyHashedType::hashType(ArrayRef<uint8_t> RecordData,
ArrayRef<GloballyHashedType> PreviousTypes,
ArrayRef<GloballyHashedType> PreviousIds) {
SmallVector<TiReference, 4> Refs;
discoverTypeIndices(RecordData, Refs);
SHA1 S;
S.init();
uint32_t Off = 0;
RecordData = RecordData.drop_front(sizeof(RecordPrefix));
for (const auto &Ref : Refs) {
// Hash any data that comes before this TiRef.
uint32_t PreLen = Ref.Offset - Off;
ArrayRef<uint8_t> PreData = RecordData.slice(Off, PreLen);
S.update(PreData);
auto Prev = (Ref.Kind == TiRefKind::IndexRef) ? PreviousIds : PreviousTypes;
auto RefData = RecordData.slice(Ref.Offset, Ref.Count * sizeof(TypeIndex));
// For each type index referenced, add in the previously computed hash
// value of that type.
ArrayRef<TypeIndex> Indices(
reinterpret_cast<const TypeIndex *>(RefData.data()), Ref.Count);
for (TypeIndex TI : Indices) {
ArrayRef<uint8_t> BytesToHash;
if (TI.isSimple() || TI.isNoneType()) {
const uint8_t *IndexBytes = reinterpret_cast<const uint8_t *>(&TI);
BytesToHash = makeArrayRef(IndexBytes, sizeof(TypeIndex));
} else {
BytesToHash = Prev[TI.toArrayIndex()].Hash;
}
S.update(BytesToHash);
}
Off = Ref.Offset + Ref.Count * sizeof(TypeIndex);
}
// Don't forget to add in any trailing bytes.
auto TrailingBytes = RecordData.drop_front(Off);
S.update(TrailingBytes);
return {S.final()};
}

View File

@ -4,6 +4,7 @@ set(LLVM_LINK_COMPONENTS
set(DebugInfoCodeViewSources
RandomAccessVisitorTest.cpp
TypeHashingTest.cpp
TypeIndexDiscoveryTest.cpp
)

View File

@ -0,0 +1,156 @@
//===- llvm/unittest/DebugInfo/CodeView/TypeHashingTest.cpp ---------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/CodeView/TypeHashing.h"
#include "llvm/DebugInfo/CodeView/AppendingTypeTableBuilder.h"
#include "gtest/gtest.h"
using namespace llvm;
using namespace llvm::codeview;
static TypeIndex createPointerRecord(AppendingTypeTableBuilder &Builder,
TypeIndex TI) {
PointerRecord PR(TypeRecordKind::Pointer);
PR.setAttrs(PointerKind::Near32, PointerMode::Pointer, PointerOptions::None,
4);
PR.ReferentType = TI;
return Builder.writeLeafType(PR);
}
static TypeIndex createArgListRecord(AppendingTypeTableBuilder &Builder,
TypeIndex Q, TypeIndex R) {
ArgListRecord AR(TypeRecordKind::ArgList);
AR.ArgIndices.push_back(Q);
AR.ArgIndices.push_back(R);
return Builder.writeLeafType(AR);
}
static TypeIndex createProcedureRecord(AppendingTypeTableBuilder &Builder,
uint32_t ParamCount, TypeIndex Return,
TypeIndex ArgList) {
ProcedureRecord PR(TypeRecordKind::Procedure);
PR.ArgumentList = ArgList;
PR.CallConv = CallingConvention::NearC;
PR.Options = FunctionOptions::None;
PR.ParameterCount = ParamCount;
PR.ReturnType = Return;
return Builder.writeLeafType(PR);
}
static ArrayRef<uint8_t> hash_of(ArrayRef<GloballyHashedType> Hashes,
TypeIndex TI) {
return Hashes[TI.toArrayIndex()].Hash;
}
static void verifyHashUniqueness(ArrayRef<GloballyHashedType> Hashes) {
assert(!Hashes.empty());
for (size_t I = 0; I < Hashes.size() - 1; ++I) {
for (size_t J = I + 1; J < Hashes.size(); ++J) {
EXPECT_NE(Hashes[I].Hash, Hashes[J].Hash);
}
}
}
TEST(TypeHashingTest, ContentHash) {
SimpleTypeSerializer Serializer;
TypeIndex CharStar(SimpleTypeKind::SignedCharacter,
SimpleTypeMode::NearPointer32);
BumpPtrAllocator Alloc;
AppendingTypeTableBuilder Ordering1(Alloc);
AppendingTypeTableBuilder Ordering2(Alloc);
TypeIndex CharP(SimpleTypeKind::SignedCharacter, SimpleTypeMode::NearPointer);
TypeIndex IntP(SimpleTypeKind::Int32, SimpleTypeMode::NearPointer);
TypeIndex DoubleP(SimpleTypeKind::Float64, SimpleTypeMode::NearPointer);
// We're going to the same type sequence with two different orderings, and
// then confirm all records are hashed the same.
TypeIndex CharPP[2];
TypeIndex IntPP[2];
TypeIndex IntPPP[2];
TypeIndex DoublePP[2];
TypeIndex Args[2];
TypeIndex Proc[2];
// Ordering 1
// ----------------------------------------
// LF_POINTER 0x1000 {char**}
// Referent = char*
// LF_POINTER 0x1001 {int**}
// Referent = int*
// LF_POINTER 0x1002 {int***}
// Referent = 0x1001
// LF_ARGLIST 0x1003 {(char**, int***)}
// Arg[0] = 0x1000
// Arg[1] = 0x1002
// LF_PROCEDURE 0x1004 {int** func(char**, int***)}
// ArgList = 0x1003
// ReturnType = 0x1001
std::vector<GloballyHashedType> Ordering1Hashes;
CharPP[0] = createPointerRecord(Ordering1, CharP);
IntPP[0] = createPointerRecord(Ordering1, IntP);
IntPPP[0] = createPointerRecord(Ordering1, IntPP[0]);
Args[0] = createArgListRecord(Ordering1, CharPP[0], IntPPP[0]);
Proc[0] = createProcedureRecord(Ordering1, 2, IntPP[0], Args[0]);
ASSERT_EQ(0x1000U, CharPP[0].getIndex());
ASSERT_EQ(0x1001U, IntPP[0].getIndex());
ASSERT_EQ(0x1002U, IntPPP[0].getIndex());
ASSERT_EQ(0x1003U, Args[0].getIndex());
ASSERT_EQ(0x1004U, Proc[0].getIndex());
auto Hashes1 = GloballyHashedType::hashTypes(Ordering1.records());
// Ordering 2
// ----------------------------------------
// LF_POINTER 0x1000 {int**}
// Referent = int*
// LF_POINTER 0x1001 {int***}
// Referent = 0x1000
// LF_POINTER 0x1002 {char**}
// Referent = char*
// LF_POINTER 0x1003 {double**}
// Referent = double*
// LF_ARGLIST 0x1004 {(char**, int***)}
// Arg[0] = 0x1002
// Arg[1] = 0x1001
// LF_PROCEDURE 0x1005 {int** func(char**, int***)}
// ArgList = 0x1004
// ReturnType = 0x1000
IntPP[1] = createPointerRecord(Ordering2, IntP);
IntPPP[1] = createPointerRecord(Ordering2, IntPP[1]);
CharPP[1] = createPointerRecord(Ordering2, CharP);
DoublePP[1] = createPointerRecord(Ordering2, DoubleP);
Args[1] = createArgListRecord(Ordering2, CharPP[1], IntPPP[1]);
Proc[1] = createProcedureRecord(Ordering2, 2, IntPP[1], Args[1]);
auto Hashes2 = GloballyHashedType::hashTypes(Ordering2.records());
ASSERT_EQ(0x1000U, IntPP[1].getIndex());
ASSERT_EQ(0x1001U, IntPPP[1].getIndex());
ASSERT_EQ(0x1002U, CharPP[1].getIndex());
ASSERT_EQ(0x1003U, DoublePP[1].getIndex());
ASSERT_EQ(0x1004U, Args[1].getIndex());
ASSERT_EQ(0x1005U, Proc[1].getIndex());
// Sanity check to make sure all same-ordering hashes are different
// from each other.
verifyHashUniqueness(Hashes1);
verifyHashUniqueness(Hashes2);
EXPECT_EQ(hash_of(Hashes1, IntPP[0]), hash_of(Hashes2, IntPP[1]));
EXPECT_EQ(hash_of(Hashes1, IntPPP[0]), hash_of(Hashes2, IntPPP[1]));
EXPECT_EQ(hash_of(Hashes1, CharPP[0]), hash_of(Hashes2, CharPP[1]));
EXPECT_EQ(hash_of(Hashes1, Args[0]), hash_of(Hashes2, Args[1]));
EXPECT_EQ(hash_of(Hashes1, Proc[0]), hash_of(Hashes2, Proc[1]));
}