[flang] Improve module file reading and writing

Fix problems with writing a mod file while another compilation is
reading or writing. Write to a temp and then rename it:
- compute the new contents of the .mod file
- if it already exists, check if it is already correct
- if not, write new contents to a temp file
- rename the temp to the final destination

`mkstemps()` seems to be the best way to create the temp file.
It returns a file descriptor, so change the rest of the mod file
writing to use POSIX open/read/write/close. This seems to set
errno more reliably too.

There is some extra work around creating the temp to make it have
the same directory and suffix as the final file (so that if one gets
left behind by a crash, "rm *.mod" still cleans it up).
`mkstemps()` creates file with 0600 permissions so try to change it
to what it would have been if we just wrote the file.

Change module file reading to only read the file once; we used to
read it to verify the checksum and then again to parse it.
Instead, change `Parsing` so that we can get the file contents
after `Prescan()` and use that to verify the checksum. Also, it has
a mechanism for searching directories for files, so make use of that
instead of duplicating that functionality in `ModFileReader`.
This requires some changes to how errors are returned so they can
be reported in the right place.

Original-commit: flang-compiler/f18@d0d54971a5
Reviewed-on: https://github.com/flang-compiler/f18/pull/758
Tree-same-pre-rewrite: false
This commit is contained in:
Tim Keith 2019-09-23 17:10:58 -07:00 committed by GitHub
parent 42c1c21300
commit 73c630a4ea
9 changed files with 177 additions and 144 deletions

View File

@ -236,6 +236,7 @@ public:
return *this;
}
std::forward_list<Message> &messages() { return messages_; }
bool empty() const { return messages_.empty(); }
void clear();

View File

@ -28,34 +28,42 @@ namespace Fortran::parser {
Parsing::Parsing(AllSources &s) : cooked_{s} {}
Parsing::~Parsing() {}
void Parsing::Prescan(const std::string &path, Options options) {
const SourceFile *Parsing::Prescan(const std::string &path, Options options) {
options_ = options;
AllSources &allSources{cooked_.allSources()};
if (options.isModuleFile) {
for (const auto &path : options.searchDirectories) {
allSources.PushSearchPathDirectory(path);
}
}
std::stringstream fileError;
const SourceFile *sourceFile;
AllSources &allSources{cooked_.allSources()};
if (path == "-") {
sourceFile = allSources.ReadStandardInput(&fileError);
} else {
sourceFile = allSources.Open(path, &fileError);
}
if (sourceFile == nullptr) {
if (!fileError.str().empty()) {
ProvenanceRange range{allSources.AddCompilerInsertion(path)};
messages_.Say(range, "%s"_err_en_US, fileError.str());
return;
return sourceFile;
}
if (sourceFile->bytes() == 0) {
ProvenanceRange range{allSources.AddCompilerInsertion(path)};
messages_.Say(range, "file is empty"_err_en_US);
return;
return sourceFile;
}
// N.B. Be sure to not push the search directory paths until the primary
// source file has been opened. If foo.f is missing from the current
// working directory, we don't want to accidentally read another foo.f
// from another directory that's on the search path.
for (const auto &path : options.searchDirectories) {
allSources.PushSearchPathDirectory(path);
if (!options.isModuleFile) {
// For .mod files we always want to look in the search directories.
// For normal source files we don't push them until after the primary
// source file has been opened. If foo.f is missing from the current
// working directory, we don't want to accidentally read another foo.f
// from another directory that's on the search path.
for (const auto &path : options.searchDirectories) {
allSources.PushSearchPathDirectory(path);
}
}
Preprocessor preprocessor{allSources};
@ -81,6 +89,7 @@ void Parsing::Prescan(const std::string &path, Options options) {
if (options.needProvenanceRangeToCharBlockMappings) {
cooked_.CompileProvenanceRangeToOffsetMappings();
}
return sourceFile;
}
void Parsing::DumpCookedChars(std::ostream &out) const {

View File

@ -55,7 +55,7 @@ public:
Messages &messages() { return messages_; }
std::optional<Program> &parseTree() { return parseTree_; }
void Prescan(const std::string &path, Options);
const SourceFile *Prescan(const std::string &path, Options);
void DumpCookedChars(std::ostream &) const;
void DumpProvenance(std::ostream &) const;
void DumpParsingLog(std::ostream &) const;

View File

@ -174,10 +174,8 @@ std::string AllSources::PopSearchPathDirectory() {
const SourceFile *AllSources::Open(std::string path, std::stringstream *error) {
std::unique_ptr<SourceFile> source{std::make_unique<SourceFile>(encoding_)};
if (source->Open(LocateSourceFile(path, searchPath_), error)) {
return ownedSourceFiles_.emplace_back(std::move(source)).get();
}
return nullptr;
source->Open(LocateSourceFile(path, searchPath_), error);
return ownedSourceFiles_.emplace_back(std::move(source)).get();
}
const SourceFile *AllSources::ReadStandardInput(std::stringstream *error) {

View File

@ -124,7 +124,7 @@ bool SourceFile::Open(std::string path, std::stringstream *error) {
errno = 0;
fileDescriptor_ = open(path.c_str(), O_RDONLY);
if (fileDescriptor_ < 0) {
*error << "could not open " << errorPath << ": " << std::strerror(errno);
*error << "Could not open " << errorPath << ": " << std::strerror(errno);
return false;
}
++openFileDescriptors;

View File

@ -13,32 +13,43 @@
// limitations under the License.
#include "mod-file.h"
#include "resolve-names.h"
#include "scope.h"
#include "semantics.h"
#include "symbol.h"
#include "../evaluate/tools.h"
#include "../parser/message.h"
#include "../parser/parsing.h"
#include <algorithm>
#include <cerrno>
#include <fstream>
#include <ostream>
#include <set>
#include <string_view>
#include <sys/file.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include <vector>
namespace Fortran::semantics {
using namespace parser::literals;
// The initial characters of a file that identify it as a .mod file.
// The first line of a file that identifies it as a .mod file.
// The first three bytes are a Unicode byte order mark that ensures
// that the module file is decoded as UTF-8 even if source files
// are using another encoding.
static constexpr auto magic{"\xef\xbb\xbf!mod$ v1 sum:"};
struct ModHeader {
static constexpr const char *bom{"\xef\xbb\xbf"};
static constexpr const char *magic{"!mod$ v1 sum:"};
static constexpr int magicLen{std::strlen(magic)};
static constexpr int sumLen{16};
static constexpr char terminator{'\n'};
static constexpr int len{magicLen + 1 + sumLen};
};
static std::optional<SourceName> GetSubmoduleParent(const parser::Program &);
static std::string ModFilePath(const std::string &dir, const SourceName &,
const std::string &ancestor, const std::string &suffix);
static std::vector<const Symbol *> CollectSymbols(const Scope &);
static void PutEntity(std::ostream &, const Symbol &);
static void PutObjectEntity(std::ostream &, const Symbol &);
@ -55,11 +66,11 @@ static std::ostream &PutAttrs(std::ostream &, Attrs,
static std::ostream &PutAttr(std::ostream &, Attr);
static std::ostream &PutType(std::ostream &, const DeclTypeSpec &);
static std::ostream &PutLower(std::ostream &, const std::string &);
static bool WriteFile(const std::string &, std::string &&);
static int WriteFile(const std::string &, const std::string &);
static bool FileContentsMatch(
std::fstream &, const std::string &, const std::string &);
static std::string GetHeader(const std::string &);
const std::string &, const std::string &, const std::string &);
static std::size_t GetFileSize(const std::string &);
static std::string CheckSum(const std::string_view &);
// Collect symbols needed for a subprogram interface
class SubprogramSymbolCollector {
@ -116,16 +127,23 @@ void ModFileWriter::WriteOne(const Scope &scope) {
}
}
// Construct the name of a module file. Non-empty ancestorName means submodule.
static std::string ModFileName(const SourceName &name,
const std::string &ancestorName, const std::string &suffix) {
std::string result{name.ToString() + suffix};
return ancestorName.empty() ? result : ancestorName + '-' + result;
}
// Write the module file for symbol, which must be a module or submodule.
void ModFileWriter::Write(const Symbol &symbol) {
auto *ancestor{symbol.get<ModuleDetails>().ancestor()};
auto ancestorName{ancestor ? ancestor->GetName().value().ToString() : ""s};
auto path{ModFilePath(context_.moduleDirectory(), symbol.name(), ancestorName,
context_.moduleFileSuffix())};
auto path{context_.moduleDirectory() + '/' +
ModFileName(symbol.name(), ancestorName, context_.moduleFileSuffix())};
PutSymbols(*symbol.scope());
if (!WriteFile(path, GetAsString(symbol))) {
if (int error{WriteFile(path, GetAsString(symbol))}) {
context_.Say(symbol.name(), "Error writing %s: %s"_err_en_US, path,
std::strerror(errno));
std::strerror(error));
}
}
@ -598,84 +616,118 @@ std::ostream &PutLower(std::ostream &os, const std::string &str) {
return os;
}
// Write the module file at path, prepending header. Return false on error.
static bool WriteFile(const std::string &path, std::string &&contents) {
std::fstream stream;
auto header{GetHeader(contents)};
auto size{GetFileSize(path)};
if (size == header.size() + 1 + contents.size()) {
// file exists and has the right size, check the contents
stream.open(path, std::ios::in | std::ios::out);
if (FileContentsMatch(stream, header, contents)) {
return true;
}
stream.seekp(0);
} else {
stream.open(path, std::ios::out);
struct Temp {
Temp() = delete;
~Temp() {
close(fd);
unlink(path.c_str());
}
stream << header << '\n' << contents;
stream.close();
return !stream.fail();
int fd;
std::string path;
};
// Create a temp file in the same directory and with the same suffix as path.
// Return an open file descriptor and its path.
static Temp MkTemp(const std::string &path) {
auto length{path.length()};
auto dot{path.find_last_of("./")};
std::string suffix{dot < length && path[dot] == '.' ? path.substr(dot) : ""};
CHECK(length > suffix.length() &&
path.substr(length - suffix.length()) == suffix);
auto tempPath{path.substr(0, length - suffix.length()) + "XXXXXX" + suffix};
int fd{mkstemps(&tempPath[0], suffix.length())};
auto mask{umask(0777)};
umask(mask);
chmod(tempPath.c_str(), 0666 & ~mask); // temp is created with mode 0600
return Temp{fd, tempPath};
}
// Write the module file at path, prepending header. If an error occurs,
// return errno, otherwise 0.
static int WriteFile(const std::string &path, const std::string &contents) {
auto header{std::string{ModHeader::bom} + ModHeader::magic +
CheckSum(contents) + ModHeader::terminator};
if (FileContentsMatch(path, header, contents)) {
return 0;
}
Temp temp{MkTemp(path)};
if (temp.fd < 0) {
return errno;
}
if (write(temp.fd, header.c_str(), header.size()) !=
static_cast<ssize_t>(header.size()) ||
write(temp.fd, contents.c_str(), contents.size()) !=
static_cast<ssize_t>(contents.size())) {
return errno;
}
if (std::rename(temp.path.c_str(), path.c_str()) == -1) {
return errno;
}
return 0;
}
// Return true if the stream matches what we would write for the mod file.
static bool FileContentsMatch(std::fstream &stream, const std::string &header,
const std::string &contents) {
char c;
for (std::size_t i{0}; i < header.size(); ++i) {
if (!stream.get(c) || c != header[i]) {
return false;
}
}
if (!stream.get(c) || c != '\n') {
static bool FileContentsMatch(const std::string &path,
const std::string &header, const std::string &contents) {
std::size_t hsize{header.size()};
std::size_t csize{contents.size()};
if (GetFileSize(path) != hsize + csize) {
return false;
}
for (std::size_t i{0}; i < contents.size(); ++i) {
if (!stream.get(c) || c != contents[i]) {
int fd{open(path.c_str(), O_RDONLY)};
if (fd < 0) {
return false;
}
constexpr std::size_t bufSize{4096};
std::string buffer(bufSize, '\0');
if (read(fd, &buffer[0], hsize) != static_cast<ssize_t>(hsize) ||
std::memcmp(&buffer[0], &header[0], hsize) != 0) {
close(fd);
return false; // header doesn't match
}
for (auto remaining{csize};;) {
auto bytes{std::min(bufSize, remaining)};
auto got{read(fd, &buffer[0], bytes)};
if (got != static_cast<ssize_t>(bytes) ||
std::memcmp(&buffer[0], &contents[csize - remaining], bytes) != 0) {
close(fd);
return false;
}
if (bytes == 0 && remaining == 0) {
close(fd);
return true;
}
remaining -= bytes;
}
return !stream.get(c);
}
// Compute a simple hash of the contents of a module file and
// return it as a string of hex digits.
// This uses the Fowler-Noll-Vo hash function.
template<typename Iter> static std::string CheckSum(Iter begin, Iter end) {
static std::string CheckSum(const std::string_view &contents) {
std::uint64_t hash{0xcbf29ce484222325ull};
for (auto it{begin}; it != end; ++it) {
char c{*it};
for (char c : contents) {
hash ^= c & 0xff;
hash *= 0x100000001b3;
}
static const char *digits = "0123456789abcdef";
std::string result(16, '0');
for (size_t i{16}; hash != 0; hash >>= 4) {
std::string result(ModHeader::sumLen, '0');
for (size_t i{ModHeader::sumLen}; hash != 0; hash >>= 4) {
result[--i] = digits[hash & 0xf];
}
return result;
}
static bool VerifyHeader(const std::string &path) {
std::fstream stream{path};
std::string header;
std::getline(stream, header);
auto magicLen{strlen(magic)};
if (header.compare(0, magicLen, magic) != 0) {
static bool VerifyHeader(const char *content, std::size_t len) {
std::string_view sv{content, len};
if (sv.substr(0, ModHeader::magicLen) != ModHeader::magic) {
return false;
}
std::string expectSum{header.substr(magicLen, 16)};
std::string actualSum{CheckSum(std::istreambuf_iterator<char>(stream),
std::istreambuf_iterator<char>())};
std::string_view expectSum{sv.substr(ModHeader::magicLen, ModHeader::sumLen)};
std::string actualSum{CheckSum(sv.substr(ModHeader::len))};
return expectSum == actualSum;
}
static std::string GetHeader(const std::string &all) {
std::stringstream ss;
ss << magic << CheckSum(all.begin(), all.end());
return ss.str();
}
static std::size_t GetFileSize(const std::string &path) {
struct stat statbuf;
if (stat(path.c_str(), &statbuf) == 0) {
@ -698,28 +750,34 @@ Scope *ModFileReader::Read(const SourceName &name, Scope *ancestor) {
return it->second->scope();
}
}
auto path{FindModFile(name, ancestorName)};
if (!path.has_value()) {
return nullptr;
}
// TODO: We are reading the file once to verify the checksum and then again
// to parse. Do it only reading the file once.
if (!VerifyHeader(*path)) {
context_.Say(name,
"Module file for '%s' has invalid checksum: %s"_err_en_US, name, *path);
return nullptr;
}
parser::Parsing parsing{context_.allSources()};
parser::Options options;
options.isModuleFile = true;
options.features.Enable(parser::LanguageFeature::BackslashEscapes);
parsing.Prescan(*path, options);
options.searchDirectories = context_.searchDirectories();
auto path{ModFileName(name, ancestorName, context_.moduleFileSuffix())};
const auto *sourceFile{parsing.Prescan(path, options)};
if (sourceFile == nullptr) {
return nullptr;
} else if (parsing.messages().AnyFatalError()) {
for (auto &msg : parsing.messages().messages()) {
std::string str{msg.ToString()};
Say(name, ancestorName, parser::MessageFixedText{str.c_str(), str.size()},
sourceFile->path());
}
return nullptr;
} else if (!VerifyHeader(sourceFile->content(), sourceFile->bytes())) {
Say(name, ancestorName, "File has invalid checksum: %s"_en_US,
sourceFile->path());
return nullptr;
}
parsing.Parse(nullptr);
auto &parseTree{parsing.parseTree()};
if (!parsing.messages().empty() || !parsing.consumedWholeFile() ||
!parseTree.has_value()) {
context_.Say(
name, "Module file for '%s' is corrupt: %s"_err_en_US, name, *path);
Say(name, ancestorName, "Module file is corrupt: %s"_err_en_US,
sourceFile->path());
return nullptr;
}
Scope *parentScope; // the scope this module/submodule goes into
@ -730,7 +788,6 @@ Scope *ModFileReader::Read(const SourceName &name, Scope *ancestor) {
} else {
parentScope = ancestor;
}
// TODO: Check that default kinds of intrinsic types match?
ResolveNames(context_, *parseTree);
const auto &it{parentScope->find(name)};
if (it == parentScope->end()) {
@ -742,32 +799,16 @@ Scope *ModFileReader::Read(const SourceName &name, Scope *ancestor) {
return modSymbol.scope();
}
std::optional<std::string> ModFileReader::FindModFile(
const SourceName &name, const std::string &ancestor) {
parser::Messages attachments;
for (auto &dir : context_.searchDirectories()) {
std::string path{
ModFilePath(dir, name, ancestor, context_.moduleFileSuffix())};
std::ifstream ifstream{path};
if (!ifstream.good()) {
attachments.Say(name, "%s: %s"_en_US, path, std::strerror(errno));
} else {
std::string line;
std::getline(ifstream, line);
if (line.compare(0, strlen(magic), magic) == 0) {
return path;
}
attachments.Say(name, "%s: Not a valid module file"_en_US, path);
}
}
auto error{parser::Message{name,
ancestor.empty()
? "Cannot find module file for '%s'"_err_en_US
: "Cannot find module file for submodule '%s' of module '%s'"_err_en_US,
name, ancestor}};
attachments.AttachTo(error);
context_.Say(std::move(error));
return std::nullopt;
parser::Message &ModFileReader::Say(const SourceName &name,
const std::string &ancestor, parser::MessageFixedText &&msg,
const std::string &arg) {
return context_
.Say(name,
ancestor.empty()
? "Error reading module file for module '%s'"_err_en_US
: "Error reading module file for submodule '%s' of module '%s'"_err_en_US,
name, ancestor)
.Attach(name, std::move(msg), arg);
}
// program was read from a .mod file for a submodule; return the name of the
@ -787,20 +828,6 @@ static std::optional<SourceName> GetSubmoduleParent(
}
}
// Construct the path to a module file. ancestorName not empty means submodule.
static std::string ModFilePath(const std::string &dir, const SourceName &name,
const std::string &ancestorName, const std::string &suffix) {
std::stringstream path;
if (dir != "."s) {
path << dir << '/';
}
if (!ancestorName.empty()) {
path << ancestorName << '-';
}
path << name << suffix;
return path.str();
}
void SubprogramSymbolCollector::Collect() {
const auto &details{symbol_.get<SubprogramDetails>()};
isInterface_ = details.isInterface();

View File

@ -16,15 +16,13 @@
#define FORTRAN_SEMANTICS_MOD_FILE_H_
#include "attr.h"
#include "resolve-names.h"
#include "../parser/message.h"
#include <set>
#include <sstream>
#include <string>
#include <vector>
namespace Fortran::parser {
class CharBlock;
class Message;
class MessageFixedText;
}
namespace Fortran::semantics {
@ -71,9 +69,9 @@ public:
private:
SemanticsContext &context_;
std::optional<std::string> FindModFile(
const SourceName &, const std::string &);
parser::Message &Say(const SourceName &, const std::string &,
parser::MessageFixedText &&, const std::string &);
};
}
}
#endif

View File

@ -1,4 +1,4 @@
! Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
! Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.
!
! Licensed under the Apache License, Version 2.0 (the "License");
! you may not use this file except in compliance with the License.
@ -19,7 +19,7 @@ subroutine sub
end
use m1
!ERROR: Cannot find module file for 'm2'
!ERROR: Error reading module file for module 'm2'
use m2
!ERROR: 'sub' is not a module
use sub

View File

@ -1,4 +1,4 @@
! Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
! Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.
!
! Licensed under the Apache License, Version 2.0 (the "License");
! you may not use this file except in compliance with the License.
@ -29,10 +29,10 @@ end
submodule(m1) s1
end
!ERROR: Cannot find module file for submodule 's1' of module 'm2'
!ERROR: Error reading module file for submodule 's1' of module 'm2'
submodule(m2:s1) s2
end
!ERROR: Cannot find module file for 'm3'
!ERROR: Error reading module file for module 'm3'
submodule(m3:s1) s3
end