diff --git a/flang/include/flang/Parser/source.h b/flang/include/flang/Parser/source.h index cc7dc9219a88..4c5be0f62c0c 100644 --- a/flang/include/flang/Parser/source.h +++ b/flang/include/flang/Parser/source.h @@ -19,6 +19,7 @@ #include #include #include +#include "llvm/Support/MemoryBuffer.h" namespace llvm { class raw_ostream; @@ -42,8 +43,8 @@ public: explicit SourceFile(Encoding e) : encoding_{e} {} ~SourceFile(); std::string path() const { return path_; } - const char *content() const { return content_; } - std::size_t bytes() const { return bytes_; } + llvm::ArrayRef content() const { return buf_->getBuffer().slice(bom_end_, buf_end_ - bom_end_); } + std::size_t bytes() const { return content().size(); } std::size_t lines() const { return lineStart_.size(); } Encoding encoding() const { return encoding_; } @@ -56,20 +57,16 @@ public: } private: - bool ReadFile(std::string errorPath, llvm::raw_ostream &error); + void ReadFile(); void IdentifyPayload(); void RecordLineStarts(); std::string path_; - int fileDescriptor_{-1}; - bool isMemoryMapped_{false}; - const char *address_{nullptr}; // raw content - std::size_t size_{0}; - const char *content_{nullptr}; // usable content - std::size_t bytes_{0}; + std::unique_ptr buf_; std::vector lineStart_; - std::string normalized_; - Encoding encoding_{Encoding::UTF_8}; + std::size_t bom_end_ {0}; + std::size_t buf_end_; + Encoding encoding_; }; } #endif // FORTRAN_PARSER_SOURCE_H_ diff --git a/flang/lib/Parser/prescan.cpp b/flang/lib/Parser/prescan.cpp index 67d5cdfbfb1d..1648a0297603 100644 --- a/flang/lib/Parser/prescan.cpp +++ b/flang/lib/Parser/prescan.cpp @@ -63,7 +63,7 @@ void Prescanner::Prescan(ProvenanceRange range) { std::size_t offset{0}; const SourceFile *source{allSources.GetSourceFile(startProvenance_, &offset)}; CHECK(source); - start_ = source->content() + offset; + start_ = source->content().data() + offset; limit_ = start_ + range.size(); nextLine_ = start_; const bool beganInFixedForm{inFixedForm_}; diff --git a/flang/lib/Parser/provenance.cpp b/flang/lib/Parser/provenance.cpp index fb43496b53ef..1a2a77cd1daf 100644 --- a/flang/lib/Parser/provenance.cpp +++ b/flang/lib/Parser/provenance.cpp @@ -228,7 +228,7 @@ void AllSources::EmitMessage(llvm::raw_ostream &o, o << ':' << pos.line << ':' << pos.column; o << ": " << message << '\n'; if (echoSourceLine) { - const char *text{inc.source.content() + + const char *text{inc.source.content().data() + inc.source.GetLineStartOffset(pos.line)}; o << " "; for (const char *p{text}; *p != '\n'; ++p) { diff --git a/flang/lib/Parser/source.cpp b/flang/lib/Parser/source.cpp index 4f8a08aa5271..6b1a9df3b731 100644 --- a/flang/lib/Parser/source.cpp +++ b/flang/lib/Parser/source.cpp @@ -10,64 +10,42 @@ #include "flang/Common/idioms.h" #include "flang/Parser/char-buffer.h" #include "llvm/Support/Errno.h" +#include "llvm/Support/FileSystem.h" #include "llvm/Support/raw_ostream.h" #include -#include -#include -#include #include -#include -#include -#include -#include #include -// TODO: Port to Windows &c. - namespace Fortran::parser { -static constexpr bool useMMap{true}; -static constexpr int minMapFileBytes{1}; // i.e., no minimum requirement -static constexpr int maxMapOpenFileDescriptors{100}; -static int openFileDescriptors{0}; - SourceFile::~SourceFile() { Close(); } -static std::vector FindLineStarts( - const char *source, std::size_t bytes) { +static std::vector FindLineStarts(llvm::StringRef source) { std::vector result; - if (bytes > 0) { - CHECK(source[bytes - 1] == '\n' && "missing ultimate newline"); + if (source.size() > 0) { + CHECK(source.back() == '\n' && "missing ultimate newline"); std::size_t at{0}; do { result.push_back(at); - const void *vp{static_cast(&source[at])}; - const void *vnl{std::memchr(vp, '\n', bytes - at)}; - const char *nl{static_cast(vnl)}; - at = nl + 1 - source; - } while (at < bytes); + at = source.find('\n', at) + 1; + } while (at < source.size()); result.shrink_to_fit(); } return result; } void SourceFile::RecordLineStarts() { - lineStart_ = FindLineStarts(content_, bytes_); + lineStart_ = FindLineStarts({content().data(), bytes()}); } // Check for a Unicode byte order mark (BOM). // Module files all have one; so can source files. void SourceFile::IdentifyPayload() { - content_ = address_; - bytes_ = size_; - if (content_) { - static constexpr int BOMBytes{3}; - static const char UTF8_BOM[]{"\xef\xbb\xbf"}; - if (bytes_ >= BOMBytes && std::memcmp(content_, UTF8_BOM, BOMBytes) == 0) { - content_ += BOMBytes; - bytes_ -= BOMBytes; - encoding_ = Encoding::UTF_8; - } + llvm::StringRef content{buf_->getBufferStart(), buf_->getBufferSize()}; + constexpr llvm::StringLiteral UTF8_BOM{"\xef\xbb\xbf"}; + if (content.startswith(UTF8_BOM)) { + bom_end_ = UTF8_BOM.size(); + encoding_ = Encoding::UTF_8; } } @@ -83,17 +61,20 @@ std::string LocateSourceFile( } for (const std::string &dir : searchPath) { std::string path{dir + '/' + name}; - struct stat statbuf; - if (stat(path.c_str(), &statbuf) == 0 && !S_ISDIR(statbuf.st_mode)) { + bool isDir{false}; + auto er = llvm::sys::fs::is_directory(path, isDir); + if (!er && !isDir) { return path; } } return name; } -static std::size_t RemoveCarriageReturns(char *buffer, std::size_t bytes) { +std::size_t RemoveCarriageReturns(llvm::MutableArrayRef buf) { std::size_t wrote{0}; - char *p{buffer}; + char *buffer{buf.data()}; + char *p{buf.data()}; + std::size_t bytes = buf.size(); while (bytes > 0) { void *vp{static_cast(p)}; void *crvp{std::memchr(vp, '\r', bytes)}; @@ -115,154 +96,57 @@ static std::size_t RemoveCarriageReturns(char *buffer, std::size_t bytes) { bool SourceFile::Open(std::string path, llvm::raw_ostream &error) { Close(); path_ = path; - std::string errorPath{"'"s + path + "'"}; - errno = 0; - fileDescriptor_ = open(path.c_str(), O_RDONLY); - if (fileDescriptor_ < 0) { - error << "Could not open " << errorPath << ": " - << llvm::sys::StrError(errno); + std::string errorPath{"'"s + path_ + "'"}; + auto bufOr{llvm::WritableMemoryBuffer::getFile(path)}; + if (!bufOr) { + auto err = bufOr.getError(); + error << "Could not open " << errorPath << ": " << err.message(); return false; } - ++openFileDescriptors; - return ReadFile(errorPath, error); + buf_ = std::move(bufOr.get()); + ReadFile(); + return true; } bool SourceFile::ReadStandardInput(llvm::raw_ostream &error) { Close(); path_ = "standard input"; - fileDescriptor_ = 0; - return ReadFile(path_, error); -} -bool SourceFile::ReadFile(std::string errorPath, llvm::raw_ostream &error) { - struct stat statbuf; - if (fstat(fileDescriptor_, &statbuf) != 0) { - error << "fstat failed on " << errorPath << ": " - << llvm::sys::StrError(errno); - Close(); + auto buf_or = llvm::MemoryBuffer::getSTDIN(); + if (!buf_or) { + auto err = buf_or.getError(); + error << err.message(); return false; } - if (S_ISDIR(statbuf.st_mode)) { - error << errorPath << " is a directory"; - Close(); - return false; - } - - // Try to map a large source file into the process' address space. - // Don't bother with small ones. This also helps keep the number - // of open file descriptors from getting out of hand. - if (useMMap && S_ISREG(statbuf.st_mode)) { - size_ = static_cast(statbuf.st_size); - if (size_ >= minMapFileBytes && - openFileDescriptors <= maxMapOpenFileDescriptors) { - void *vp = mmap(0, size_, PROT_READ, MAP_SHARED, fileDescriptor_, 0); - if (vp != MAP_FAILED) { - address_ = static_cast(const_cast(vp)); - IdentifyPayload(); - if (bytes_ > 0 && content_[bytes_ - 1] == '\n' && - std::memchr(static_cast(content_), '\r', bytes_) == - nullptr) { - isMemoryMapped_ = true; - RecordLineStarts(); - return true; - } - // The file needs to have its line endings normalized to simple - // newlines. Remap it for a private rewrite in place. - vp = mmap( - vp, size_, PROT_READ | PROT_WRITE, MAP_PRIVATE, fileDescriptor_, 0); - if (vp != MAP_FAILED) { - address_ = static_cast(const_cast(vp)); - IdentifyPayload(); - auto mutableContent{const_cast(content_)}; - bytes_ = RemoveCarriageReturns(mutableContent, bytes_); - if (bytes_ > 0) { - if (mutableContent[bytes_ - 1] == '\n' || - (bytes_ & 0xfff) != 0 /* don't cross into next page */) { - if (mutableContent[bytes_ - 1] != '\n') { - // Append a final newline. - mutableContent[bytes_++] = '\n'; - } - bool isNowReadOnly{mprotect(vp, bytes_, PROT_READ) == 0}; - CHECK(isNowReadOnly); - content_ = mutableContent; - isMemoryMapped_ = true; - RecordLineStarts(); - return true; - } - } - } - munmap(vp, size_); - address_ = content_ = nullptr; - size_ = bytes_ = 0; - } - } - } - - // Read it into an expandable buffer, then marshal its content into a single - // contiguous block. - CharBuffer buffer; - while (true) { - std::size_t count; - char *to{buffer.FreeSpace(count)}; - ssize_t got{read(fileDescriptor_, to, count)}; - if (got < 0) { - error << "could not read " << errorPath << ": " - << llvm::sys::StrError(errno); - Close(); - return false; - } - if (got == 0) { - break; - } - buffer.Claim(got); - } - if (fileDescriptor_ > 0) { - close(fileDescriptor_); - --openFileDescriptors; - } - fileDescriptor_ = -1; - normalized_ = buffer.MarshalNormalized(); - address_ = normalized_.c_str(); - size_ = normalized_.size(); - IdentifyPayload(); - RecordLineStarts(); + auto inbuf = std::move(buf_or.get()); + buf_ = + llvm::WritableMemoryBuffer::getNewUninitMemBuffer(inbuf->getBufferSize()); + llvm::copy(inbuf->getBuffer(), buf_->getBufferStart()); + ReadFile(); return true; } +void SourceFile::ReadFile() { + if (buf_->getBuffer().size() == 0) { + Close(); + buf_ = llvm::WritableMemoryBuffer::getNewUninitMemBuffer(1); + buf_->getBuffer()[0] = '\n'; + } + buf_end_ = RemoveCarriageReturns(buf_->getBuffer()); + IdentifyPayload(); + RecordLineStarts(); +} + void SourceFile::Close() { - if (useMMap && isMemoryMapped_) { - munmap(reinterpret_cast(const_cast(address_)), size_); - isMemoryMapped_ = false; - } else if (!normalized_.empty()) { - normalized_.clear(); - } else if (address_) { - delete[] address_; - } - address_ = content_ = nullptr; - size_ = bytes_ = 0; - if (fileDescriptor_ > 0) { - close(fileDescriptor_); - --openFileDescriptors; - } - fileDescriptor_ = -1; path_.clear(); + buf_.reset(); } SourcePosition SourceFile::FindOffsetLineAndColumn(std::size_t at) const { - CHECK(at < bytes_); - if (lineStart_.empty()) { - return {*this, 1, static_cast(at + 1)}; - } - std::size_t low{0}, count{lineStart_.size()}; - while (count > 1) { - std::size_t mid{low + (count >> 1)}; - if (lineStart_[mid] > at) { - count = mid - low; - } else { - count -= mid - low; - low = mid; - } - } + CHECK(at < bytes()); + + auto it = llvm::upper_bound(lineStart_, at); + auto low = std::distance(lineStart_.begin(), it - 1); return {*this, static_cast(low + 1), static_cast(at - lineStart_[low] + 1)}; } diff --git a/flang/lib/Semantics/mod-file.cpp b/flang/lib/Semantics/mod-file.cpp index bbf62f9c372f..5c457321cc68 100644 --- a/flang/lib/Semantics/mod-file.cpp +++ b/flang/lib/Semantics/mod-file.cpp @@ -728,8 +728,8 @@ static std::string CheckSum(const std::string_view &contents) { return result; } -static bool VerifyHeader(const char *content, std::size_t len) { - std::string_view sv{content, len}; +static bool VerifyHeader(llvm::ArrayRef content) { + std::string_view sv{content.data(), content.size()}; if (sv.substr(0, ModHeader::magicLen) != ModHeader::magic) { return false; } @@ -767,7 +767,7 @@ Scope *ModFileReader::Read(const SourceName &name, Scope *ancestor) { return nullptr; } CHECK(sourceFile); - if (!VerifyHeader(sourceFile->content(), sourceFile->bytes())) { + if (!VerifyHeader(sourceFile->content())) { Say(name, ancestorName, "File has invalid checksum: %s"_en_US, sourceFile->path()); return nullptr; diff --git a/flang/test/Semantics/empty.f90 b/flang/test/Semantics/empty.f90 new file mode 100644 index 000000000000..e47c2e65342c --- /dev/null +++ b/flang/test/Semantics/empty.f90 @@ -0,0 +1,4 @@ +! RUN: %f18 -fparse-only %s +! RUN: rm -rf %t && mkdir %t +! RUN: touch %t/empty.f90 +! RUN: %f18 -fparse-only %t/empty.f90