2020-02-25 23:11:52 +08:00
|
|
|
//===-- lib/Parser/source.cpp ---------------------------------------------===//
|
2018-05-02 03:50:34 +08:00
|
|
|
//
|
2019-12-21 04:52:07 +08:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2018-05-02 03:50:34 +08:00
|
|
|
//
|
2020-01-11 04:12:03 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
2018-05-02 03:50:34 +08:00
|
|
|
|
2020-02-25 23:11:52 +08:00
|
|
|
#include "flang/Parser/source.h"
|
|
|
|
#include "flang/Common/idioms.h"
|
|
|
|
#include "flang/Parser/char-buffer.h"
|
2020-02-28 23:11:03 +08:00
|
|
|
#include "llvm/Support/Errno.h"
|
2020-02-27 21:42:56 +08:00
|
|
|
#include "llvm/Support/FileSystem.h"
|
2020-02-28 23:11:03 +08:00
|
|
|
#include "llvm/Support/raw_ostream.h"
|
2018-01-31 03:55:03 +08:00
|
|
|
#include <algorithm>
|
2018-02-06 06:29:26 +08:00
|
|
|
#include <memory>
|
2018-02-14 07:24:43 +08:00
|
|
|
#include <vector>
|
2018-01-31 03:55:03 +08:00
|
|
|
|
2018-05-03 04:48:12 +08:00
|
|
|
namespace Fortran::parser {
|
2018-01-31 03:55:03 +08:00
|
|
|
|
2018-02-06 06:29:26 +08:00
|
|
|
SourceFile::~SourceFile() { Close(); }
|
2018-01-31 03:55:03 +08:00
|
|
|
|
2020-02-27 21:42:56 +08:00
|
|
|
static std::vector<std::size_t> FindLineStarts(llvm::StringRef source) {
|
2018-03-21 01:59:07 +08:00
|
|
|
std::vector<std::size_t> result;
|
2020-02-27 21:42:56 +08:00
|
|
|
if (source.size() > 0) {
|
|
|
|
CHECK(source.back() == '\n' && "missing ultimate newline");
|
2018-10-24 07:48:06 +08:00
|
|
|
std::size_t at{0};
|
|
|
|
do {
|
|
|
|
result.push_back(at);
|
2020-02-27 21:42:56 +08:00
|
|
|
at = source.find('\n', at) + 1;
|
|
|
|
} while (at < source.size());
|
2018-10-24 07:48:06 +08:00
|
|
|
result.shrink_to_fit();
|
|
|
|
}
|
2018-02-08 05:18:36 +08:00
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2018-10-24 07:48:06 +08:00
|
|
|
void SourceFile::RecordLineStarts() {
|
2020-02-27 21:42:56 +08:00
|
|
|
lineStart_ = FindLineStarts({content().data(), bytes()});
|
2018-10-24 07:48:06 +08:00
|
|
|
}
|
|
|
|
|
2019-06-13 06:26:37 +08:00
|
|
|
// Check for a Unicode byte order mark (BOM).
|
|
|
|
// Module files all have one; so can source files.
|
2018-10-24 07:48:06 +08:00
|
|
|
void SourceFile::IdentifyPayload() {
|
2020-02-27 21:42:56 +08:00
|
|
|
llvm::StringRef content{buf_->getBufferStart(), buf_->getBufferSize()};
|
|
|
|
constexpr llvm::StringLiteral UTF8_BOM{"\xef\xbb\xbf"};
|
|
|
|
if (content.startswith(UTF8_BOM)) {
|
|
|
|
bom_end_ = UTF8_BOM.size();
|
|
|
|
encoding_ = Encoding::UTF_8;
|
2018-10-24 07:48:06 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-02-14 07:24:43 +08:00
|
|
|
std::string DirectoryName(std::string path) {
|
2018-07-11 08:09:07 +08:00
|
|
|
auto lastSlash{path.rfind("/")};
|
2018-02-14 07:24:43 +08:00
|
|
|
return lastSlash == std::string::npos ? path : path.substr(0, lastSlash);
|
|
|
|
}
|
|
|
|
|
|
|
|
std::string LocateSourceFile(
|
|
|
|
std::string name, const std::vector<std::string> &searchPath) {
|
|
|
|
if (name.empty() || name == "-" || name[0] == '/') {
|
|
|
|
return name;
|
|
|
|
}
|
|
|
|
for (const std::string &dir : searchPath) {
|
|
|
|
std::string path{dir + '/' + name};
|
2020-02-27 21:42:56 +08:00
|
|
|
bool isDir{false};
|
|
|
|
auto er = llvm::sys::fs::is_directory(path, isDir);
|
|
|
|
if (!er && !isDir) {
|
2018-02-14 07:24:43 +08:00
|
|
|
return path;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return name;
|
|
|
|
}
|
|
|
|
|
2020-02-27 21:42:56 +08:00
|
|
|
std::size_t RemoveCarriageReturns(llvm::MutableArrayRef<char> buf) {
|
2018-04-06 05:39:55 +08:00
|
|
|
std::size_t wrote{0};
|
2020-02-27 21:42:56 +08:00
|
|
|
char *buffer{buf.data()};
|
|
|
|
char *p{buf.data()};
|
|
|
|
std::size_t bytes = buf.size();
|
2018-04-06 05:39:55 +08:00
|
|
|
while (bytes > 0) {
|
|
|
|
void *vp{static_cast<void *>(p)};
|
|
|
|
void *crvp{std::memchr(vp, '\r', bytes)};
|
|
|
|
char *crcp{static_cast<char *>(crvp)};
|
2019-11-10 01:29:31 +08:00
|
|
|
if (!crcp) {
|
2018-06-21 06:12:13 +08:00
|
|
|
std::memmove(buffer + wrote, p, bytes);
|
2018-04-06 05:39:55 +08:00
|
|
|
wrote += bytes;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
std::size_t chunk = crcp - p;
|
2018-06-21 06:12:13 +08:00
|
|
|
std::memmove(buffer + wrote, p, chunk);
|
2018-04-06 05:39:55 +08:00
|
|
|
wrote += chunk;
|
|
|
|
p += chunk + 1;
|
|
|
|
bytes -= chunk + 1;
|
|
|
|
}
|
|
|
|
return wrote;
|
|
|
|
}
|
|
|
|
|
2020-02-28 23:11:03 +08:00
|
|
|
bool SourceFile::Open(std::string path, llvm::raw_ostream &error) {
|
2018-01-31 03:55:03 +08:00
|
|
|
Close();
|
|
|
|
path_ = path;
|
2020-02-27 21:42:56 +08:00
|
|
|
std::string errorPath{"'"s + path_ + "'"};
|
|
|
|
auto bufOr{llvm::WritableMemoryBuffer::getFile(path)};
|
|
|
|
if (!bufOr) {
|
|
|
|
auto err = bufOr.getError();
|
|
|
|
error << "Could not open " << errorPath << ": " << err.message();
|
2018-04-07 01:34:59 +08:00
|
|
|
return false;
|
2018-01-31 03:55:03 +08:00
|
|
|
}
|
2020-02-27 21:42:56 +08:00
|
|
|
buf_ = std::move(bufOr.get());
|
|
|
|
ReadFile();
|
|
|
|
return true;
|
2018-04-07 01:34:59 +08:00
|
|
|
}
|
|
|
|
|
2020-02-28 23:11:03 +08:00
|
|
|
bool SourceFile::ReadStandardInput(llvm::raw_ostream &error) {
|
2018-04-07 01:34:59 +08:00
|
|
|
Close();
|
|
|
|
path_ = "standard input";
|
|
|
|
|
2020-02-27 21:42:56 +08:00
|
|
|
auto buf_or = llvm::MemoryBuffer::getSTDIN();
|
|
|
|
if (!buf_or) {
|
|
|
|
auto err = buf_or.getError();
|
|
|
|
error << err.message();
|
2018-01-31 03:55:03 +08:00
|
|
|
return false;
|
|
|
|
}
|
2020-02-27 21:42:56 +08:00
|
|
|
auto inbuf = std::move(buf_or.get());
|
|
|
|
buf_ =
|
|
|
|
llvm::WritableMemoryBuffer::getNewUninitMemBuffer(inbuf->getBufferSize());
|
|
|
|
llvm::copy(inbuf->getBuffer(), buf_->getBufferStart());
|
|
|
|
ReadFile();
|
|
|
|
return true;
|
|
|
|
}
|
2018-01-31 03:55:03 +08:00
|
|
|
|
2020-02-27 21:42:56 +08:00
|
|
|
void SourceFile::ReadFile() {
|
|
|
|
buf_end_ = RemoveCarriageReturns(buf_->getBuffer());
|
2020-04-22 02:45:43 +08:00
|
|
|
if (content().size() == 0 || content().back() != '\n') {
|
|
|
|
// Don't bother to copy if we have spare memory
|
|
|
|
if (content().size() >= buf_->getBufferSize()) {
|
|
|
|
auto tmp_buf{llvm::WritableMemoryBuffer::getNewUninitMemBuffer(
|
|
|
|
content().size() + 1)};
|
|
|
|
llvm::copy(content(), tmp_buf->getBufferStart());
|
|
|
|
Close();
|
|
|
|
buf_ = std::move(tmp_buf);
|
|
|
|
}
|
|
|
|
buf_end_++;
|
|
|
|
buf_->getBuffer()[buf_end_ - 1] = '\n';
|
|
|
|
}
|
2019-12-07 01:37:07 +08:00
|
|
|
IdentifyPayload();
|
|
|
|
RecordLineStarts();
|
2018-01-31 03:55:03 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void SourceFile::Close() {
|
|
|
|
path_.clear();
|
2020-02-27 21:42:56 +08:00
|
|
|
buf_.reset();
|
2018-01-31 03:55:03 +08:00
|
|
|
}
|
2018-02-08 05:18:36 +08:00
|
|
|
|
2019-09-06 01:05:45 +08:00
|
|
|
SourcePosition SourceFile::FindOffsetLineAndColumn(std::size_t at) const {
|
2020-02-27 21:42:56 +08:00
|
|
|
CHECK(at < bytes());
|
|
|
|
|
|
|
|
auto it = llvm::upper_bound(lineStart_, at);
|
|
|
|
auto low = std::distance(lineStart_.begin(), it - 1);
|
2019-09-06 01:05:45 +08:00
|
|
|
return {*this, static_cast<int>(low + 1),
|
|
|
|
static_cast<int>(at - lineStart_[low] + 1)};
|
2018-02-08 05:18:36 +08:00
|
|
|
}
|
2020-03-29 12:00:16 +08:00
|
|
|
} // namespace Fortran::parser
|