forked from OSchip/llvm-project
[flang] Replace manual mmap with llvm::MemoryBuffer
The previous code had handling for cases when too many file descriptors may be opened; this is not necessary with MemoryBuffer as the file descriptors are closed after the mapping occurs. MemoryBuffer also internally handles the case where a file is small and therefore an mmap is bad for performance; such files are simply copied to memory after being opened. Many places elsewhere in the code assume that the buffer is not empty, and the old file opening code handles this by replacing an empty file with a buffer containing a single newline. That behavior is now kept in the new MemoryBuffer based code. Original-commit: flang-compiler/f18@d34df84351 Reviewed-on: https://github.com/flang-compiler/f18/pull/1032
This commit is contained in:
parent
901198441f
commit
13ea73e42d
|
@ -19,6 +19,7 @@
|
|||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include "llvm/Support/MemoryBuffer.h"
|
||||
|
||||
namespace llvm {
|
||||
class raw_ostream;
|
||||
|
@ -42,8 +43,8 @@ public:
|
|||
explicit SourceFile(Encoding e) : encoding_{e} {}
|
||||
~SourceFile();
|
||||
std::string path() const { return path_; }
|
||||
const char *content() const { return content_; }
|
||||
std::size_t bytes() const { return bytes_; }
|
||||
llvm::ArrayRef<char> content() const { return buf_->getBuffer().slice(bom_end_, buf_end_ - bom_end_); }
|
||||
std::size_t bytes() const { return content().size(); }
|
||||
std::size_t lines() const { return lineStart_.size(); }
|
||||
Encoding encoding() const { return encoding_; }
|
||||
|
||||
|
@ -56,20 +57,16 @@ public:
|
|||
}
|
||||
|
||||
private:
|
||||
bool ReadFile(std::string errorPath, llvm::raw_ostream &error);
|
||||
void ReadFile();
|
||||
void IdentifyPayload();
|
||||
void RecordLineStarts();
|
||||
|
||||
std::string path_;
|
||||
int fileDescriptor_{-1};
|
||||
bool isMemoryMapped_{false};
|
||||
const char *address_{nullptr}; // raw content
|
||||
std::size_t size_{0};
|
||||
const char *content_{nullptr}; // usable content
|
||||
std::size_t bytes_{0};
|
||||
std::unique_ptr<llvm::WritableMemoryBuffer> buf_;
|
||||
std::vector<std::size_t> lineStart_;
|
||||
std::string normalized_;
|
||||
Encoding encoding_{Encoding::UTF_8};
|
||||
std::size_t bom_end_ {0};
|
||||
std::size_t buf_end_;
|
||||
Encoding encoding_;
|
||||
};
|
||||
}
|
||||
#endif // FORTRAN_PARSER_SOURCE_H_
|
||||
|
|
|
@ -63,7 +63,7 @@ void Prescanner::Prescan(ProvenanceRange range) {
|
|||
std::size_t offset{0};
|
||||
const SourceFile *source{allSources.GetSourceFile(startProvenance_, &offset)};
|
||||
CHECK(source);
|
||||
start_ = source->content() + offset;
|
||||
start_ = source->content().data() + offset;
|
||||
limit_ = start_ + range.size();
|
||||
nextLine_ = start_;
|
||||
const bool beganInFixedForm{inFixedForm_};
|
||||
|
|
|
@ -228,7 +228,7 @@ void AllSources::EmitMessage(llvm::raw_ostream &o,
|
|||
o << ':' << pos.line << ':' << pos.column;
|
||||
o << ": " << message << '\n';
|
||||
if (echoSourceLine) {
|
||||
const char *text{inc.source.content() +
|
||||
const char *text{inc.source.content().data() +
|
||||
inc.source.GetLineStartOffset(pos.line)};
|
||||
o << " ";
|
||||
for (const char *p{text}; *p != '\n'; ++p) {
|
||||
|
|
|
@ -10,64 +10,42 @@
|
|||
#include "flang/Common/idioms.h"
|
||||
#include "flang/Parser/char-buffer.h"
|
||||
#include "llvm/Support/Errno.h"
|
||||
#include "llvm/Support/FileSystem.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include <algorithm>
|
||||
#include <cstddef>
|
||||
#include <cstring>
|
||||
#include <fcntl.h>
|
||||
#include <memory>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
#include <vector>
|
||||
|
||||
// TODO: Port to Windows &c.
|
||||
|
||||
namespace Fortran::parser {
|
||||
|
||||
static constexpr bool useMMap{true};
|
||||
static constexpr int minMapFileBytes{1}; // i.e., no minimum requirement
|
||||
static constexpr int maxMapOpenFileDescriptors{100};
|
||||
static int openFileDescriptors{0};
|
||||
|
||||
SourceFile::~SourceFile() { Close(); }
|
||||
|
||||
static std::vector<std::size_t> FindLineStarts(
|
||||
const char *source, std::size_t bytes) {
|
||||
static std::vector<std::size_t> FindLineStarts(llvm::StringRef source) {
|
||||
std::vector<std::size_t> result;
|
||||
if (bytes > 0) {
|
||||
CHECK(source[bytes - 1] == '\n' && "missing ultimate newline");
|
||||
if (source.size() > 0) {
|
||||
CHECK(source.back() == '\n' && "missing ultimate newline");
|
||||
std::size_t at{0};
|
||||
do {
|
||||
result.push_back(at);
|
||||
const void *vp{static_cast<const void *>(&source[at])};
|
||||
const void *vnl{std::memchr(vp, '\n', bytes - at)};
|
||||
const char *nl{static_cast<const char *>(vnl)};
|
||||
at = nl + 1 - source;
|
||||
} while (at < bytes);
|
||||
at = source.find('\n', at) + 1;
|
||||
} while (at < source.size());
|
||||
result.shrink_to_fit();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void SourceFile::RecordLineStarts() {
|
||||
lineStart_ = FindLineStarts(content_, bytes_);
|
||||
lineStart_ = FindLineStarts({content().data(), bytes()});
|
||||
}
|
||||
|
||||
// Check for a Unicode byte order mark (BOM).
|
||||
// Module files all have one; so can source files.
|
||||
void SourceFile::IdentifyPayload() {
|
||||
content_ = address_;
|
||||
bytes_ = size_;
|
||||
if (content_) {
|
||||
static constexpr int BOMBytes{3};
|
||||
static const char UTF8_BOM[]{"\xef\xbb\xbf"};
|
||||
if (bytes_ >= BOMBytes && std::memcmp(content_, UTF8_BOM, BOMBytes) == 0) {
|
||||
content_ += BOMBytes;
|
||||
bytes_ -= BOMBytes;
|
||||
encoding_ = Encoding::UTF_8;
|
||||
}
|
||||
llvm::StringRef content{buf_->getBufferStart(), buf_->getBufferSize()};
|
||||
constexpr llvm::StringLiteral UTF8_BOM{"\xef\xbb\xbf"};
|
||||
if (content.startswith(UTF8_BOM)) {
|
||||
bom_end_ = UTF8_BOM.size();
|
||||
encoding_ = Encoding::UTF_8;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -83,17 +61,20 @@ std::string LocateSourceFile(
|
|||
}
|
||||
for (const std::string &dir : searchPath) {
|
||||
std::string path{dir + '/' + name};
|
||||
struct stat statbuf;
|
||||
if (stat(path.c_str(), &statbuf) == 0 && !S_ISDIR(statbuf.st_mode)) {
|
||||
bool isDir{false};
|
||||
auto er = llvm::sys::fs::is_directory(path, isDir);
|
||||
if (!er && !isDir) {
|
||||
return path;
|
||||
}
|
||||
}
|
||||
return name;
|
||||
}
|
||||
|
||||
static std::size_t RemoveCarriageReturns(char *buffer, std::size_t bytes) {
|
||||
std::size_t RemoveCarriageReturns(llvm::MutableArrayRef<char> buf) {
|
||||
std::size_t wrote{0};
|
||||
char *p{buffer};
|
||||
char *buffer{buf.data()};
|
||||
char *p{buf.data()};
|
||||
std::size_t bytes = buf.size();
|
||||
while (bytes > 0) {
|
||||
void *vp{static_cast<void *>(p)};
|
||||
void *crvp{std::memchr(vp, '\r', bytes)};
|
||||
|
@ -115,154 +96,57 @@ static std::size_t RemoveCarriageReturns(char *buffer, std::size_t bytes) {
|
|||
bool SourceFile::Open(std::string path, llvm::raw_ostream &error) {
|
||||
Close();
|
||||
path_ = path;
|
||||
std::string errorPath{"'"s + path + "'"};
|
||||
errno = 0;
|
||||
fileDescriptor_ = open(path.c_str(), O_RDONLY);
|
||||
if (fileDescriptor_ < 0) {
|
||||
error << "Could not open " << errorPath << ": "
|
||||
<< llvm::sys::StrError(errno);
|
||||
std::string errorPath{"'"s + path_ + "'"};
|
||||
auto bufOr{llvm::WritableMemoryBuffer::getFile(path)};
|
||||
if (!bufOr) {
|
||||
auto err = bufOr.getError();
|
||||
error << "Could not open " << errorPath << ": " << err.message();
|
||||
return false;
|
||||
}
|
||||
++openFileDescriptors;
|
||||
return ReadFile(errorPath, error);
|
||||
buf_ = std::move(bufOr.get());
|
||||
ReadFile();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool SourceFile::ReadStandardInput(llvm::raw_ostream &error) {
|
||||
Close();
|
||||
path_ = "standard input";
|
||||
fileDescriptor_ = 0;
|
||||
return ReadFile(path_, error);
|
||||
}
|
||||
|
||||
bool SourceFile::ReadFile(std::string errorPath, llvm::raw_ostream &error) {
|
||||
struct stat statbuf;
|
||||
if (fstat(fileDescriptor_, &statbuf) != 0) {
|
||||
error << "fstat failed on " << errorPath << ": "
|
||||
<< llvm::sys::StrError(errno);
|
||||
Close();
|
||||
auto buf_or = llvm::MemoryBuffer::getSTDIN();
|
||||
if (!buf_or) {
|
||||
auto err = buf_or.getError();
|
||||
error << err.message();
|
||||
return false;
|
||||
}
|
||||
if (S_ISDIR(statbuf.st_mode)) {
|
||||
error << errorPath << " is a directory";
|
||||
Close();
|
||||
return false;
|
||||
}
|
||||
|
||||
// Try to map a large source file into the process' address space.
|
||||
// Don't bother with small ones. This also helps keep the number
|
||||
// of open file descriptors from getting out of hand.
|
||||
if (useMMap && S_ISREG(statbuf.st_mode)) {
|
||||
size_ = static_cast<std::size_t>(statbuf.st_size);
|
||||
if (size_ >= minMapFileBytes &&
|
||||
openFileDescriptors <= maxMapOpenFileDescriptors) {
|
||||
void *vp = mmap(0, size_, PROT_READ, MAP_SHARED, fileDescriptor_, 0);
|
||||
if (vp != MAP_FAILED) {
|
||||
address_ = static_cast<const char *>(const_cast<const void *>(vp));
|
||||
IdentifyPayload();
|
||||
if (bytes_ > 0 && content_[bytes_ - 1] == '\n' &&
|
||||
std::memchr(static_cast<const void *>(content_), '\r', bytes_) ==
|
||||
nullptr) {
|
||||
isMemoryMapped_ = true;
|
||||
RecordLineStarts();
|
||||
return true;
|
||||
}
|
||||
// The file needs to have its line endings normalized to simple
|
||||
// newlines. Remap it for a private rewrite in place.
|
||||
vp = mmap(
|
||||
vp, size_, PROT_READ | PROT_WRITE, MAP_PRIVATE, fileDescriptor_, 0);
|
||||
if (vp != MAP_FAILED) {
|
||||
address_ = static_cast<const char *>(const_cast<const void *>(vp));
|
||||
IdentifyPayload();
|
||||
auto mutableContent{const_cast<char *>(content_)};
|
||||
bytes_ = RemoveCarriageReturns(mutableContent, bytes_);
|
||||
if (bytes_ > 0) {
|
||||
if (mutableContent[bytes_ - 1] == '\n' ||
|
||||
(bytes_ & 0xfff) != 0 /* don't cross into next page */) {
|
||||
if (mutableContent[bytes_ - 1] != '\n') {
|
||||
// Append a final newline.
|
||||
mutableContent[bytes_++] = '\n';
|
||||
}
|
||||
bool isNowReadOnly{mprotect(vp, bytes_, PROT_READ) == 0};
|
||||
CHECK(isNowReadOnly);
|
||||
content_ = mutableContent;
|
||||
isMemoryMapped_ = true;
|
||||
RecordLineStarts();
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
munmap(vp, size_);
|
||||
address_ = content_ = nullptr;
|
||||
size_ = bytes_ = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Read it into an expandable buffer, then marshal its content into a single
|
||||
// contiguous block.
|
||||
CharBuffer buffer;
|
||||
while (true) {
|
||||
std::size_t count;
|
||||
char *to{buffer.FreeSpace(count)};
|
||||
ssize_t got{read(fileDescriptor_, to, count)};
|
||||
if (got < 0) {
|
||||
error << "could not read " << errorPath << ": "
|
||||
<< llvm::sys::StrError(errno);
|
||||
Close();
|
||||
return false;
|
||||
}
|
||||
if (got == 0) {
|
||||
break;
|
||||
}
|
||||
buffer.Claim(got);
|
||||
}
|
||||
if (fileDescriptor_ > 0) {
|
||||
close(fileDescriptor_);
|
||||
--openFileDescriptors;
|
||||
}
|
||||
fileDescriptor_ = -1;
|
||||
normalized_ = buffer.MarshalNormalized();
|
||||
address_ = normalized_.c_str();
|
||||
size_ = normalized_.size();
|
||||
IdentifyPayload();
|
||||
RecordLineStarts();
|
||||
auto inbuf = std::move(buf_or.get());
|
||||
buf_ =
|
||||
llvm::WritableMemoryBuffer::getNewUninitMemBuffer(inbuf->getBufferSize());
|
||||
llvm::copy(inbuf->getBuffer(), buf_->getBufferStart());
|
||||
ReadFile();
|
||||
return true;
|
||||
}
|
||||
|
||||
void SourceFile::ReadFile() {
|
||||
if (buf_->getBuffer().size() == 0) {
|
||||
Close();
|
||||
buf_ = llvm::WritableMemoryBuffer::getNewUninitMemBuffer(1);
|
||||
buf_->getBuffer()[0] = '\n';
|
||||
}
|
||||
buf_end_ = RemoveCarriageReturns(buf_->getBuffer());
|
||||
IdentifyPayload();
|
||||
RecordLineStarts();
|
||||
}
|
||||
|
||||
void SourceFile::Close() {
|
||||
if (useMMap && isMemoryMapped_) {
|
||||
munmap(reinterpret_cast<void *>(const_cast<char *>(address_)), size_);
|
||||
isMemoryMapped_ = false;
|
||||
} else if (!normalized_.empty()) {
|
||||
normalized_.clear();
|
||||
} else if (address_) {
|
||||
delete[] address_;
|
||||
}
|
||||
address_ = content_ = nullptr;
|
||||
size_ = bytes_ = 0;
|
||||
if (fileDescriptor_ > 0) {
|
||||
close(fileDescriptor_);
|
||||
--openFileDescriptors;
|
||||
}
|
||||
fileDescriptor_ = -1;
|
||||
path_.clear();
|
||||
buf_.reset();
|
||||
}
|
||||
|
||||
SourcePosition SourceFile::FindOffsetLineAndColumn(std::size_t at) const {
|
||||
CHECK(at < bytes_);
|
||||
if (lineStart_.empty()) {
|
||||
return {*this, 1, static_cast<int>(at + 1)};
|
||||
}
|
||||
std::size_t low{0}, count{lineStart_.size()};
|
||||
while (count > 1) {
|
||||
std::size_t mid{low + (count >> 1)};
|
||||
if (lineStart_[mid] > at) {
|
||||
count = mid - low;
|
||||
} else {
|
||||
count -= mid - low;
|
||||
low = mid;
|
||||
}
|
||||
}
|
||||
CHECK(at < bytes());
|
||||
|
||||
auto it = llvm::upper_bound(lineStart_, at);
|
||||
auto low = std::distance(lineStart_.begin(), it - 1);
|
||||
return {*this, static_cast<int>(low + 1),
|
||||
static_cast<int>(at - lineStart_[low] + 1)};
|
||||
}
|
||||
|
|
|
@ -728,8 +728,8 @@ static std::string CheckSum(const std::string_view &contents) {
|
|||
return result;
|
||||
}
|
||||
|
||||
static bool VerifyHeader(const char *content, std::size_t len) {
|
||||
std::string_view sv{content, len};
|
||||
static bool VerifyHeader(llvm::ArrayRef<char> content) {
|
||||
std::string_view sv{content.data(), content.size()};
|
||||
if (sv.substr(0, ModHeader::magicLen) != ModHeader::magic) {
|
||||
return false;
|
||||
}
|
||||
|
@ -767,7 +767,7 @@ Scope *ModFileReader::Read(const SourceName &name, Scope *ancestor) {
|
|||
return nullptr;
|
||||
}
|
||||
CHECK(sourceFile);
|
||||
if (!VerifyHeader(sourceFile->content(), sourceFile->bytes())) {
|
||||
if (!VerifyHeader(sourceFile->content())) {
|
||||
Say(name, ancestorName, "File has invalid checksum: %s"_en_US,
|
||||
sourceFile->path());
|
||||
return nullptr;
|
||||
|
|
|
@ -0,0 +1,4 @@
|
|||
! RUN: %f18 -fparse-only %s
|
||||
! RUN: rm -rf %t && mkdir %t
|
||||
! RUN: touch %t/empty.f90
|
||||
! RUN: %f18 -fparse-only %t/empty.f90
|
Loading…
Reference in New Issue