Lex: Add some unit tests for corrupt header maps

Split the implementation of `HeaderMap` into `HeaderMapImpl` so that we
can write unit tests that don't depend on the `FileManager`, and then
write a few tests that cover the types of corrupt header maps already
detected.

This also moves type and constant definitions from HeaderMap.cpp to
HeaderMapTypes.h so that the test can access them.

llvm-svn: 261446
This commit is contained in:
Duncan P. N. Exon Smith 2016-02-20 20:39:51 +00:00
parent 223838aea2
commit 9ab99eecc3
5 changed files with 199 additions and 68 deletions

View File

@ -26,39 +26,27 @@ class FileManager;
struct HMapBucket;
struct HMapHeader;
/// This class represents an Apple concept known as a 'header map'. To the
/// \#include file resolution process, it basically acts like a directory of
/// symlinks to files. Its advantages are that it is dense and more efficient
/// to create and process than a directory of symlinks.
class HeaderMap {
/// Implementation for \a HeaderMap that doesn't depend on \a FileManager.
class HeaderMapImpl {
std::unique_ptr<const llvm::MemoryBuffer> FileBuffer;
bool NeedsBSwap;
HeaderMap(std::unique_ptr<const llvm::MemoryBuffer> File, bool BSwap)
: FileBuffer(std::move(File)), NeedsBSwap(BSwap) {}
public:
/// HeaderMap::Create - This attempts to load the specified file as a header
/// map. If it doesn't look like a HeaderMap, it gives up and returns null.
static const HeaderMap *Create(const FileEntry *FE, FileManager &FM);
HeaderMapImpl(std::unique_ptr<const llvm::MemoryBuffer> File, bool NeedsBSwap)
: FileBuffer(std::move(File)), NeedsBSwap(NeedsBSwap) {}
/// LookupFile - Check to see if the specified relative filename is located in
/// this HeaderMap. If so, open it and return its FileEntry.
/// If RawPath is not NULL and the file is found, RawPath will be set to the
/// raw path at which the file was found in the file system. For example,
/// for a search path ".." and a filename "../file.h" this would be
/// "../../file.h".
const FileEntry *LookupFile(StringRef Filename, FileManager &FM) const;
// Check for a valid header and extract the byte swap.
static bool checkHeader(const llvm::MemoryBuffer &File, bool &NeedsByteSwap);
/// If the specified relative filename is located in this HeaderMap return
/// the filename it is mapped to, otherwise return an empty StringRef.
StringRef lookupFilename(StringRef Filename,
SmallVectorImpl<char> &DestPath) const;
/// getFileName - Return the filename of the headermap.
/// Return the filename of the headermap.
const char *getFileName() const;
/// dump - Print the contents of this headermap to stderr.
/// Print the contents of this headermap to stderr.
void dump() const;
private:
@ -68,6 +56,31 @@ private:
const char *getString(unsigned StrTabIdx) const;
};
/// This class represents an Apple concept known as a 'header map'. To the
/// \#include file resolution process, it basically acts like a directory of
/// symlinks to files. Its advantages are that it is dense and more efficient
/// to create and process than a directory of symlinks.
class HeaderMap : private HeaderMapImpl {
HeaderMap(std::unique_ptr<const llvm::MemoryBuffer> File, bool BSwap)
: HeaderMapImpl(std::move(File), BSwap) {}
public:
/// This attempts to load the specified file as a header map. If it doesn't
/// look like a HeaderMap, it gives up and returns null.
static const HeaderMap *Create(const FileEntry *FE, FileManager &FM);
/// Check to see if the specified relative filename is located in this
/// HeaderMap. If so, open it and return its FileEntry. If RawPath is not
/// NULL and the file is found, RawPath will be set to the raw path at which
/// the file was found in the file system. For example, for a search path
/// ".." and a filename "../file.h" this would be "../../file.h".
const FileEntry *LookupFile(StringRef Filename, FileManager &FM) const;
using HeaderMapImpl::lookupFilename;
using HeaderMapImpl::getFileName;
using HeaderMapImpl::dump;
};
} // end namespace clang.
#endif

View File

@ -0,0 +1,43 @@
//===- HeaderMapTypes.h - Types for the header map format -------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_LEX_HEADERMAPTYPES_H
#define LLVM_CLANG_LEX_HEADERMAPTYPES_H
#include <cstdint>
namespace clang {
enum {
HMAP_HeaderMagicNumber = ('h' << 24) | ('m' << 16) | ('a' << 8) | 'p',
HMAP_HeaderVersion = 1,
HMAP_EmptyBucketKey = 0
};
struct HMapBucket {
uint32_t Key; // Offset (into strings) of key.
uint32_t Prefix; // Offset (into strings) of value prefix.
uint32_t Suffix; // Offset (into strings) of value suffix.
};
struct HMapHeader {
uint32_t Magic; // Magic word, also indicates byte order.
uint16_t Version; // Version number -- currently 1.
uint16_t Reserved; // Reserved for future use - zero for now.
uint32_t StringsOffset; // Offset to start of string pool.
uint32_t NumEntries; // Number of entries in the string table.
uint32_t NumBuckets; // Number of buckets (always a power of 2).
uint32_t MaxValueLength; // Length of longest result path (excluding nul).
// An array of 'NumBuckets' HMapBucket objects follows this header.
// Strings follow the buckets, at StringsOffset.
};
} // end namespace clang.
#endif

View File

@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "clang/Lex/HeaderMap.h"
#include "clang/Lex/HeaderMapTypes.h"
#include "clang/Basic/CharInfo.h"
#include "clang/Basic/FileManager.h"
#include "llvm/ADT/SmallString.h"
@ -22,38 +23,6 @@
#include <memory>
using namespace clang;
//===----------------------------------------------------------------------===//
// Data Structures and Manifest Constants
//===----------------------------------------------------------------------===//
enum {
HMAP_HeaderMagicNumber = ('h' << 24) | ('m' << 16) | ('a' << 8) | 'p',
HMAP_HeaderVersion = 1,
HMAP_EmptyBucketKey = 0
};
namespace clang {
struct HMapBucket {
uint32_t Key; // Offset (into strings) of key.
uint32_t Prefix; // Offset (into strings) of value prefix.
uint32_t Suffix; // Offset (into strings) of value suffix.
};
struct HMapHeader {
uint32_t Magic; // Magic word, also indicates byte order.
uint16_t Version; // Version number -- currently 1.
uint16_t Reserved; // Reserved for future use - zero for now.
uint32_t StringsOffset; // Offset to start of string pool.
uint32_t NumEntries; // Number of entries in the string table.
uint32_t NumBuckets; // Number of buckets (always a power of 2).
uint32_t MaxValueLength; // Length of longest result path (excluding nul).
// An array of 'NumBuckets' HMapBucket objects follows this header.
// Strings follow the buckets, at StringsOffset.
};
} // end namespace clang.
/// HashHMapKey - This is the 'well known' hash function required by the file
/// format, used to look up keys in the hash table. The hash table uses simple
/// linear probing based on this function.
@ -82,15 +51,25 @@ const HeaderMap *HeaderMap::Create(const FileEntry *FE, FileManager &FM) {
if (FileSize <= sizeof(HMapHeader)) return nullptr;
auto FileBuffer = FM.getBufferForFile(FE);
if (!FileBuffer) return nullptr; // Unreadable file?
const char *FileStart = (*FileBuffer)->getBufferStart();
if (!FileBuffer || !*FileBuffer)
return nullptr;
bool NeedsByteSwap;
if (!checkHeader(**FileBuffer, NeedsByteSwap))
return nullptr;
return new HeaderMap(std::move(*FileBuffer), NeedsByteSwap);
}
bool HeaderMapImpl::checkHeader(const llvm::MemoryBuffer &File,
bool &NeedsByteSwap) {
if (File.getBufferSize() <= sizeof(HMapHeader))
return false;
const char *FileStart = File.getBufferStart();
// We know the file is at least as big as the header, check it now.
const HMapHeader *Header = reinterpret_cast<const HMapHeader*>(FileStart);
// Sniff it to see if it's a headermap by checking the magic number and
// version.
bool NeedsByteSwap;
if (Header->Magic == HMAP_HeaderMagicNumber &&
Header->Version == HMAP_HeaderVersion)
NeedsByteSwap = false;
@ -98,12 +77,13 @@ const HeaderMap *HeaderMap::Create(const FileEntry *FE, FileManager &FM) {
Header->Version == llvm::ByteSwap_16(HMAP_HeaderVersion))
NeedsByteSwap = true; // Mixed endianness headermap.
else
return nullptr; // Not a header map.
return false; // Not a header map.
if (Header->Reserved != 0) return nullptr;
if (Header->Reserved != 0)
return false;
// Okay, everything looks good, create the header map.
return new HeaderMap(std::move(*FileBuffer), NeedsByteSwap);
// Okay, everything looks good.
return true;
}
//===----------------------------------------------------------------------===//
@ -112,18 +92,18 @@ const HeaderMap *HeaderMap::Create(const FileEntry *FE, FileManager &FM) {
/// getFileName - Return the filename of the headermap.
const char *HeaderMap::getFileName() const {
const char *HeaderMapImpl::getFileName() const {
return FileBuffer->getBufferIdentifier();
}
unsigned HeaderMap::getEndianAdjustedWord(unsigned X) const {
unsigned HeaderMapImpl::getEndianAdjustedWord(unsigned X) const {
if (!NeedsBSwap) return X;
return llvm::ByteSwap_32(X);
}
/// getHeader - Return a reference to the file header, in unbyte-swapped form.
/// This method cannot fail.
const HMapHeader &HeaderMap::getHeader() const {
const HMapHeader &HeaderMapImpl::getHeader() const {
// We know the file is at least as big as the header. Return it.
return *reinterpret_cast<const HMapHeader*>(FileBuffer->getBufferStart());
}
@ -131,7 +111,7 @@ const HMapHeader &HeaderMap::getHeader() const {
/// getBucket - Return the specified hash table bucket from the header map,
/// bswap'ing its fields as appropriate. If the bucket number is not valid,
/// this return a bucket with an empty key (0).
HMapBucket HeaderMap::getBucket(unsigned BucketNo) const {
HMapBucket HeaderMapImpl::getBucket(unsigned BucketNo) const {
HMapBucket Result;
Result.Key = HMAP_EmptyBucketKey;
@ -155,7 +135,7 @@ HMapBucket HeaderMap::getBucket(unsigned BucketNo) const {
/// getString - Look up the specified string in the string table. If the string
/// index is not valid, it returns an empty string.
const char *HeaderMap::getString(unsigned StrTabIdx) const {
const char *HeaderMapImpl::getString(unsigned StrTabIdx) const {
// Add the start of the string table to the idx.
StrTabIdx += getEndianAdjustedWord(getHeader().StringsOffset);
@ -174,7 +154,7 @@ const char *HeaderMap::getString(unsigned StrTabIdx) const {
//===----------------------------------------------------------------------===//
/// dump - Print the contents of this headermap to stderr.
LLVM_DUMP_METHOD void HeaderMap::dump() const {
LLVM_DUMP_METHOD void HeaderMapImpl::dump() const {
const HMapHeader &Hdr = getHeader();
unsigned NumBuckets = getEndianAdjustedWord(Hdr.NumBuckets);
@ -199,15 +179,15 @@ const FileEntry *HeaderMap::LookupFile(
StringRef Filename, FileManager &FM) const {
SmallString<1024> Path;
StringRef Dest = lookupFilename(Filename, Path);
StringRef Dest = HeaderMapImpl::lookupFilename(Filename, Path);
if (Dest.empty())
return nullptr;
return FM.getFile(Dest);
}
StringRef HeaderMap::lookupFilename(StringRef Filename,
SmallVectorImpl<char> &DestPath) const {
StringRef HeaderMapImpl::lookupFilename(StringRef Filename,
SmallVectorImpl<char> &DestPath) const {
const HMapHeader &Hdr = getHeader();
unsigned NumBuckets = getEndianAdjustedWord(Hdr.NumBuckets);

View File

@ -3,6 +3,7 @@ set(LLVM_LINK_COMPONENTS
)
add_clang_unittest(LexTests
HeaderMapTest.cpp
LexerTest.cpp
PPCallbacksTest.cpp
PPConditionalDirectiveRecordTest.cpp

View File

@ -0,0 +1,94 @@
//===- unittests/Lex/HeaderMapTest.cpp - HeaderMap tests ----------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===--------------------------------------------------------------===//
#include "clang/Lex/HeaderMap.h"
#include "clang/Lex/HeaderMapTypes.h"
#include "llvm/Support/SwapByteOrder.h"
#include "gtest/gtest.h"
using namespace clang;
using namespace llvm;
namespace {
// Lay out a header file for testing.
template <unsigned NumBuckets, unsigned NumBytes> struct MapFile {
HMapHeader Header;
HMapBucket Buckets[NumBuckets];
unsigned char Bytes[NumBytes];
void init() {
memset(this, 0, sizeof(MapFile));
Header.Magic = HMAP_HeaderMagicNumber;
Header.Version = HMAP_HeaderVersion;
Header.NumBuckets = NumBuckets;
Header.StringsOffset = sizeof(Header) + sizeof(Buckets);
}
void swapBytes() {
using llvm::sys::getSwappedBytes;
Header.Magic = getSwappedBytes(Header.Magic);
Header.Version = getSwappedBytes(Header.Version);
Header.NumBuckets = getSwappedBytes(Header.NumBuckets);
Header.StringsOffset = getSwappedBytes(Header.StringsOffset);
}
std::unique_ptr<const MemoryBuffer> getBuffer() const {
return MemoryBuffer::getMemBuffer(
StringRef(reinterpret_cast<const char *>(this), sizeof(MapFile)),
"header",
/* RequresNullTerminator */ false);
}
};
TEST(HeaderMapTest, checkHeaderEmpty) {
bool NeedsSwap;
ASSERT_FALSE(HeaderMapImpl::checkHeader(
*MemoryBuffer::getMemBufferCopy("", "empty"), NeedsSwap));
ASSERT_FALSE(HeaderMapImpl::checkHeader(
*MemoryBuffer::getMemBufferCopy("", "empty"), NeedsSwap));
}
TEST(HeaderMapTest, checkHeaderMagic) {
MapFile<1, 1> File;
File.init();
File.Header.Magic = 0;
bool NeedsSwap;
ASSERT_FALSE(HeaderMapImpl::checkHeader(*File.getBuffer(), NeedsSwap));
}
TEST(HeaderMapTest, checkHeaderReserved) {
MapFile<1, 1> File;
File.init();
File.Header.Reserved = 1;
bool NeedsSwap;
ASSERT_FALSE(HeaderMapImpl::checkHeader(*File.getBuffer(), NeedsSwap));
}
TEST(HeaderMapTest, checkHeaderVersion) {
MapFile<1, 1> File;
File.init();
++File.Header.Version;
bool NeedsSwap;
ASSERT_FALSE(HeaderMapImpl::checkHeader(*File.getBuffer(), NeedsSwap));
}
TEST(HeaderMapTest, checkHeaderValidButEmpty) {
MapFile<1, 1> File;
File.init();
bool NeedsSwap;
ASSERT_TRUE(HeaderMapImpl::checkHeader(*File.getBuffer(), NeedsSwap));
ASSERT_FALSE(NeedsSwap);
File.swapBytes();
ASSERT_TRUE(HeaderMapImpl::checkHeader(*File.getBuffer(), NeedsSwap));
ASSERT_TRUE(NeedsSwap);
}
} // end namespace