llvm-project/clang/lib/Basic/FileManager.cpp

596 lines
20 KiB
C++

//===--- FileManager.cpp - File System Probing and Caching ----------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements the FileManager interface.
//
//===----------------------------------------------------------------------===//
//
// TODO: This should index all interesting directories with dirent calls.
// getdirentries ?
// opendir/readdir_r/closedir ?
//
//===----------------------------------------------------------------------===//
#include "clang/Basic/FileManager.h"
#include "clang/Basic/FileSystemStatCache.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/system_error.h"
#include <map>
#include <set>
#include <string>
// FIXME: This is terrible, we need this for ::close.
#if !defined(_MSC_VER) && !defined(__MINGW32__)
#include <unistd.h>
#include <sys/uio.h>
#else
#include <io.h>
#ifndef S_ISFIFO
#define S_ISFIFO(x) (0)
#endif
#endif
#if defined(LLVM_ON_UNIX)
#include <limits.h>
#endif
using namespace clang;
// FIXME: Enhance libsystem to support inode and other fields.
#include <sys/stat.h>
/// NON_EXISTENT_DIR - A special value distinct from null that is used to
/// represent a dir name that doesn't exist on the disk.
#define NON_EXISTENT_DIR reinterpret_cast<DirectoryEntry*>((intptr_t)-1)
/// NON_EXISTENT_FILE - A special value distinct from null that is used to
/// represent a filename that doesn't exist on the disk.
#define NON_EXISTENT_FILE reinterpret_cast<FileEntry*>((intptr_t)-1)
FileEntry::~FileEntry() {
// If this FileEntry owns an open file descriptor that never got used, close
// it.
if (FD != -1) ::close(FD);
}
class FileManager::UniqueDirContainer {
/// UniqueDirs - Cache from ID's to existing directories/files.
std::map<llvm::sys::fs::UniqueID, DirectoryEntry> UniqueDirs;
public:
/// getDirectory - Return an existing DirectoryEntry with the given
/// ID's if there is already one; otherwise create and return a
/// default-constructed DirectoryEntry.
DirectoryEntry &getDirectory(const llvm::sys::fs::UniqueID &UniqueID) {
return UniqueDirs[UniqueID];
}
size_t size() const { return UniqueDirs.size(); }
};
class FileManager::UniqueFileContainer {
/// UniqueFiles - Cache from ID's to existing directories/files.
std::set<FileEntry> UniqueFiles;
public:
/// getFile - Return an existing FileEntry with the given ID's if
/// there is already one; otherwise create and return a
/// default-constructed FileEntry.
FileEntry &getFile(llvm::sys::fs::UniqueID UniqueID, bool IsNamedPipe,
bool InPCH) {
return const_cast<FileEntry &>(
*UniqueFiles.insert(FileEntry(UniqueID, IsNamedPipe, InPCH)).first);
}
size_t size() const { return UniqueFiles.size(); }
void erase(const FileEntry *Entry) { UniqueFiles.erase(*Entry); }
};
//===----------------------------------------------------------------------===//
// Common logic.
//===----------------------------------------------------------------------===//
FileManager::FileManager(const FileSystemOptions &FSO)
: FileSystemOpts(FSO),
UniqueRealDirs(*new UniqueDirContainer()),
UniqueRealFiles(*new UniqueFileContainer()),
SeenDirEntries(64), SeenFileEntries(64), NextFileUID(0) {
NumDirLookups = NumFileLookups = 0;
NumDirCacheMisses = NumFileCacheMisses = 0;
}
FileManager::~FileManager() {
delete &UniqueRealDirs;
delete &UniqueRealFiles;
for (unsigned i = 0, e = VirtualFileEntries.size(); i != e; ++i)
delete VirtualFileEntries[i];
for (unsigned i = 0, e = VirtualDirectoryEntries.size(); i != e; ++i)
delete VirtualDirectoryEntries[i];
}
void FileManager::addStatCache(FileSystemStatCache *statCache,
bool AtBeginning) {
assert(statCache && "No stat cache provided?");
if (AtBeginning || StatCache.get() == 0) {
statCache->setNextStatCache(StatCache.take());
StatCache.reset(statCache);
return;
}
FileSystemStatCache *LastCache = StatCache.get();
while (LastCache->getNextStatCache())
LastCache = LastCache->getNextStatCache();
LastCache->setNextStatCache(statCache);
}
void FileManager::removeStatCache(FileSystemStatCache *statCache) {
if (!statCache)
return;
if (StatCache.get() == statCache) {
// This is the first stat cache.
StatCache.reset(StatCache->takeNextStatCache());
return;
}
// Find the stat cache in the list.
FileSystemStatCache *PrevCache = StatCache.get();
while (PrevCache && PrevCache->getNextStatCache() != statCache)
PrevCache = PrevCache->getNextStatCache();
assert(PrevCache && "Stat cache not found for removal");
PrevCache->setNextStatCache(statCache->getNextStatCache());
}
void FileManager::clearStatCaches() {
StatCache.reset(0);
}
/// \brief Retrieve the directory that the given file name resides in.
/// Filename can point to either a real file or a virtual file.
static const DirectoryEntry *getDirectoryFromFile(FileManager &FileMgr,
StringRef Filename,
bool CacheFailure) {
if (Filename.empty())
return NULL;
if (llvm::sys::path::is_separator(Filename[Filename.size() - 1]))
return NULL; // If Filename is a directory.
StringRef DirName = llvm::sys::path::parent_path(Filename);
// Use the current directory if file has no path component.
if (DirName.empty())
DirName = ".";
return FileMgr.getDirectory(DirName, CacheFailure);
}
/// Add all ancestors of the given path (pointing to either a file or
/// a directory) as virtual directories.
void FileManager::addAncestorsAsVirtualDirs(StringRef Path) {
StringRef DirName = llvm::sys::path::parent_path(Path);
if (DirName.empty())
return;
llvm::StringMapEntry<DirectoryEntry *> &NamedDirEnt =
SeenDirEntries.GetOrCreateValue(DirName);
// When caching a virtual directory, we always cache its ancestors
// at the same time. Therefore, if DirName is already in the cache,
// we don't need to recurse as its ancestors must also already be in
// the cache.
if (NamedDirEnt.getValue())
return;
// Add the virtual directory to the cache.
DirectoryEntry *UDE = new DirectoryEntry;
UDE->Name = NamedDirEnt.getKeyData();
NamedDirEnt.setValue(UDE);
VirtualDirectoryEntries.push_back(UDE);
// Recursively add the other ancestors.
addAncestorsAsVirtualDirs(DirName);
}
const DirectoryEntry *FileManager::getDirectory(StringRef DirName,
bool CacheFailure) {
// stat doesn't like trailing separators except for root directory.
// At least, on Win32 MSVCRT, stat() cannot strip trailing '/'.
// (though it can strip '\\')
if (DirName.size() > 1 &&
DirName != llvm::sys::path::root_path(DirName) &&
llvm::sys::path::is_separator(DirName.back()))
DirName = DirName.substr(0, DirName.size()-1);
#ifdef LLVM_ON_WIN32
// Fixing a problem with "clang C:test.c" on Windows.
// Stat("C:") does not recognize "C:" as a valid directory
std::string DirNameStr;
if (DirName.size() > 1 && DirName.back() == ':' &&
DirName.equals_lower(llvm::sys::path::root_name(DirName))) {
DirNameStr = DirName.str() + '.';
DirName = DirNameStr;
}
#endif
++NumDirLookups;
llvm::StringMapEntry<DirectoryEntry *> &NamedDirEnt =
SeenDirEntries.GetOrCreateValue(DirName);
// See if there was already an entry in the map. Note that the map
// contains both virtual and real directories.
if (NamedDirEnt.getValue())
return NamedDirEnt.getValue() == NON_EXISTENT_DIR
? 0 : NamedDirEnt.getValue();
++NumDirCacheMisses;
// By default, initialize it to invalid.
NamedDirEnt.setValue(NON_EXISTENT_DIR);
// Get the null-terminated directory name as stored as the key of the
// SeenDirEntries map.
const char *InterndDirName = NamedDirEnt.getKeyData();
// Check to see if the directory exists.
FileData Data;
if (getStatValue(InterndDirName, Data, false, 0 /*directory lookup*/)) {
// There's no real directory at the given path.
if (!CacheFailure)
SeenDirEntries.erase(DirName);
return 0;
}
// It exists. See if we have already opened a directory with the
// same inode (this occurs on Unix-like systems when one dir is
// symlinked to another, for example) or the same path (on
// Windows).
DirectoryEntry &UDE =
UniqueRealDirs.getDirectory(Data.UniqueID);
NamedDirEnt.setValue(&UDE);
if (!UDE.getName()) {
// We don't have this directory yet, add it. We use the string
// key from the SeenDirEntries map as the string.
UDE.Name = InterndDirName;
}
return &UDE;
}
const FileEntry *FileManager::getFile(StringRef Filename, bool openFile,
bool CacheFailure) {
++NumFileLookups;
// See if there is already an entry in the map.
llvm::StringMapEntry<FileEntry *> &NamedFileEnt =
SeenFileEntries.GetOrCreateValue(Filename);
// See if there is already an entry in the map.
if (NamedFileEnt.getValue())
return NamedFileEnt.getValue() == NON_EXISTENT_FILE
? 0 : NamedFileEnt.getValue();
++NumFileCacheMisses;
// By default, initialize it to invalid.
NamedFileEnt.setValue(NON_EXISTENT_FILE);
// Get the null-terminated file name as stored as the key of the
// SeenFileEntries map.
const char *InterndFileName = NamedFileEnt.getKeyData();
// Look up the directory for the file. When looking up something like
// sys/foo.h we'll discover all of the search directories that have a 'sys'
// subdirectory. This will let us avoid having to waste time on known-to-fail
// searches when we go to find sys/bar.h, because all the search directories
// without a 'sys' subdir will get a cached failure result.
const DirectoryEntry *DirInfo = getDirectoryFromFile(*this, Filename,
CacheFailure);
if (DirInfo == 0) { // Directory doesn't exist, file can't exist.
if (!CacheFailure)
SeenFileEntries.erase(Filename);
return 0;
}
// FIXME: Use the directory info to prune this, before doing the stat syscall.
// FIXME: This will reduce the # syscalls.
// Nope, there isn't. Check to see if the file exists.
int FileDescriptor = -1;
FileData Data;
if (getStatValue(InterndFileName, Data, true,
openFile ? &FileDescriptor : 0)) {
// There's no real file at the given path.
if (!CacheFailure)
SeenFileEntries.erase(Filename);
return 0;
}
if (FileDescriptor != -1 && !openFile) {
close(FileDescriptor);
FileDescriptor = -1;
}
// It exists. See if we have already opened a file with the same inode.
// This occurs when one dir is symlinked to another, for example.
FileEntry &UFE =
UniqueRealFiles.getFile(Data.UniqueID, Data.IsNamedPipe, Data.InPCH);
NamedFileEnt.setValue(&UFE);
if (UFE.getName()) { // Already have an entry with this inode, return it.
// If the stat process opened the file, close it to avoid a FD leak.
if (FileDescriptor != -1)
close(FileDescriptor);
return &UFE;
}
// Otherwise, we don't have this directory yet, add it.
// FIXME: Change the name to be a char* that points back to the
// 'SeenFileEntries' key.
UFE.Name = InterndFileName;
UFE.Size = Data.Size;
UFE.ModTime = Data.ModTime;
UFE.Dir = DirInfo;
UFE.UID = NextFileUID++;
UFE.FD = FileDescriptor;
return &UFE;
}
const FileEntry *
FileManager::getVirtualFile(StringRef Filename, off_t Size,
time_t ModificationTime) {
++NumFileLookups;
// See if there is already an entry in the map.
llvm::StringMapEntry<FileEntry *> &NamedFileEnt =
SeenFileEntries.GetOrCreateValue(Filename);
// See if there is already an entry in the map.
if (NamedFileEnt.getValue() && NamedFileEnt.getValue() != NON_EXISTENT_FILE)
return NamedFileEnt.getValue();
++NumFileCacheMisses;
// By default, initialize it to invalid.
NamedFileEnt.setValue(NON_EXISTENT_FILE);
addAncestorsAsVirtualDirs(Filename);
FileEntry *UFE = 0;
// Now that all ancestors of Filename are in the cache, the
// following call is guaranteed to find the DirectoryEntry from the
// cache.
const DirectoryEntry *DirInfo = getDirectoryFromFile(*this, Filename,
/*CacheFailure=*/true);
assert(DirInfo &&
"The directory of a virtual file should already be in the cache.");
// Check to see if the file exists. If so, drop the virtual file
FileData Data;
const char *InterndFileName = NamedFileEnt.getKeyData();
if (getStatValue(InterndFileName, Data, true, 0) == 0) {
Data.Size = Size;
Data.ModTime = ModificationTime;
UFE = &UniqueRealFiles.getFile(Data.UniqueID, Data.IsNamedPipe, Data.InPCH);
NamedFileEnt.setValue(UFE);
// If we had already opened this file, close it now so we don't
// leak the descriptor. We're not going to use the file
// descriptor anyway, since this is a virtual file.
if (UFE->FD != -1) {
close(UFE->FD);
UFE->FD = -1;
}
// If we already have an entry with this inode, return it.
if (UFE->getName())
return UFE;
}
if (!UFE) {
UFE = new FileEntry();
VirtualFileEntries.push_back(UFE);
NamedFileEnt.setValue(UFE);
}
UFE->Name = InterndFileName;
UFE->Size = Size;
UFE->ModTime = ModificationTime;
UFE->Dir = DirInfo;
UFE->UID = NextFileUID++;
UFE->FD = -1;
return UFE;
}
void FileManager::FixupRelativePath(SmallVectorImpl<char> &path) const {
StringRef pathRef(path.data(), path.size());
if (FileSystemOpts.WorkingDir.empty()
|| llvm::sys::path::is_absolute(pathRef))
return;
SmallString<128> NewPath(FileSystemOpts.WorkingDir);
llvm::sys::path::append(NewPath, pathRef);
path = NewPath;
}
llvm::MemoryBuffer *FileManager::
getBufferForFile(const FileEntry *Entry, std::string *ErrorStr,
bool isVolatile) {
OwningPtr<llvm::MemoryBuffer> Result;
llvm::error_code ec;
uint64_t FileSize = Entry->getSize();
// If there's a high enough chance that the file have changed since we
// got its size, force a stat before opening it.
if (isVolatile)
FileSize = -1;
const char *Filename = Entry->getName();
// If the file is already open, use the open file descriptor.
if (Entry->FD != -1) {
ec = llvm::MemoryBuffer::getOpenFile(Entry->FD, Filename, Result, FileSize);
if (ErrorStr)
*ErrorStr = ec.message();
close(Entry->FD);
Entry->FD = -1;
return Result.take();
}
// Otherwise, open the file.
if (FileSystemOpts.WorkingDir.empty()) {
ec = llvm::MemoryBuffer::getFile(Filename, Result, FileSize);
if (ec && ErrorStr)
*ErrorStr = ec.message();
return Result.take();
}
SmallString<128> FilePath(Entry->getName());
FixupRelativePath(FilePath);
ec = llvm::MemoryBuffer::getFile(FilePath.str(), Result, FileSize);
if (ec && ErrorStr)
*ErrorStr = ec.message();
return Result.take();
}
llvm::MemoryBuffer *FileManager::
getBufferForFile(StringRef Filename, std::string *ErrorStr) {
OwningPtr<llvm::MemoryBuffer> Result;
llvm::error_code ec;
if (FileSystemOpts.WorkingDir.empty()) {
ec = llvm::MemoryBuffer::getFile(Filename, Result);
if (ec && ErrorStr)
*ErrorStr = ec.message();
return Result.take();
}
SmallString<128> FilePath(Filename);
FixupRelativePath(FilePath);
ec = llvm::MemoryBuffer::getFile(FilePath.c_str(), Result);
if (ec && ErrorStr)
*ErrorStr = ec.message();
return Result.take();
}
/// getStatValue - Get the 'stat' information for the specified path,
/// using the cache to accelerate it if possible. This returns true
/// if the path points to a virtual file or does not exist, or returns
/// false if it's an existent real file. If FileDescriptor is NULL,
/// do directory look-up instead of file look-up.
bool FileManager::getStatValue(const char *Path, FileData &Data, bool isFile,
int *FileDescriptor) {
// FIXME: FileSystemOpts shouldn't be passed in here, all paths should be
// absolute!
if (FileSystemOpts.WorkingDir.empty())
return FileSystemStatCache::get(Path, Data, isFile, FileDescriptor,
StatCache.get());
SmallString<128> FilePath(Path);
FixupRelativePath(FilePath);
return FileSystemStatCache::get(FilePath.c_str(), Data, isFile,
FileDescriptor, StatCache.get());
}
bool FileManager::getNoncachedStatValue(StringRef Path,
llvm::sys::fs::file_status &Result) {
SmallString<128> FilePath(Path);
FixupRelativePath(FilePath);
return llvm::sys::fs::status(FilePath.c_str(), Result);
}
void FileManager::invalidateCache(const FileEntry *Entry) {
assert(Entry && "Cannot invalidate a NULL FileEntry");
SeenFileEntries.erase(Entry->getName());
// FileEntry invalidation should not block future optimizations in the file
// caches. Possible alternatives are cache truncation (invalidate last N) or
// invalidation of the whole cache.
UniqueRealFiles.erase(Entry);
}
void FileManager::GetUniqueIDMapping(
SmallVectorImpl<const FileEntry *> &UIDToFiles) const {
UIDToFiles.clear();
UIDToFiles.resize(NextFileUID);
// Map file entries
for (llvm::StringMap<FileEntry*, llvm::BumpPtrAllocator>::const_iterator
FE = SeenFileEntries.begin(), FEEnd = SeenFileEntries.end();
FE != FEEnd; ++FE)
if (FE->getValue() && FE->getValue() != NON_EXISTENT_FILE)
UIDToFiles[FE->getValue()->getUID()] = FE->getValue();
// Map virtual file entries
for (SmallVectorImpl<FileEntry *>::const_iterator
VFE = VirtualFileEntries.begin(), VFEEnd = VirtualFileEntries.end();
VFE != VFEEnd; ++VFE)
if (*VFE && *VFE != NON_EXISTENT_FILE)
UIDToFiles[(*VFE)->getUID()] = *VFE;
}
void FileManager::modifyFileEntry(FileEntry *File,
off_t Size, time_t ModificationTime) {
File->Size = Size;
File->ModTime = ModificationTime;
}
StringRef FileManager::getCanonicalName(const DirectoryEntry *Dir) {
// FIXME: use llvm::sys::fs::canonical() when it gets implemented
#ifdef LLVM_ON_UNIX
llvm::DenseMap<const DirectoryEntry *, llvm::StringRef>::iterator Known
= CanonicalDirNames.find(Dir);
if (Known != CanonicalDirNames.end())
return Known->second;
StringRef CanonicalName(Dir->getName());
char CanonicalNameBuf[PATH_MAX];
if (realpath(Dir->getName(), CanonicalNameBuf)) {
unsigned Len = strlen(CanonicalNameBuf);
char *Mem = static_cast<char *>(CanonicalNameStorage.Allocate(Len, 1));
memcpy(Mem, CanonicalNameBuf, Len);
CanonicalName = StringRef(Mem, Len);
}
CanonicalDirNames.insert(std::make_pair(Dir, CanonicalName));
return CanonicalName;
#else
return StringRef(Dir->getName());
#endif
}
void FileManager::PrintStats() const {
llvm::errs() << "\n*** File Manager Stats:\n";
llvm::errs() << UniqueRealFiles.size() << " real files found, "
<< UniqueRealDirs.size() << " real dirs found.\n";
llvm::errs() << VirtualFileEntries.size() << " virtual files found, "
<< VirtualDirectoryEntries.size() << " virtual dirs found.\n";
llvm::errs() << NumDirLookups << " dir lookups, "
<< NumDirCacheMisses << " dir cache misses.\n";
llvm::errs() << NumFileLookups << " file lookups, "
<< NumFileCacheMisses << " file cache misses.\n";
//llvm::errs() << PagesMapped << BytesOfPagesMapped << FSLookups;
}