[JSON] Facility to track position within an object and report errors.

This error model should be rich enough for most applications. It comprises:

- a name for the root object, so the user knows what we're parsing
- a path from the root object to the JSON node most associated with the error
- a local error message

This can be presented as an llvm::Error e.g.
  "expected string at ConfigFile.credentials[0].username"

It's designed to be cheap: Paths are a linked list of lightweight
objects on the stack. No heap allocations unless errors are encountered.

A subsequent commit will make use of this in the JSON-to-object
translation facilities: fromJSON and ObjectMapper.
However it's independent of these and can be used for e.g. validation alone.

Another subsequent commit will support showing the error in its context
within the parsed value.

Differential Revision: https://reviews.llvm.org/D88103
This commit is contained in:
Sam McCall 2020-09-24 00:01:45 +02:00
parent 8f2c31f22b
commit 16619e7139
3 changed files with 117 additions and 0 deletions

View File

@ -557,6 +557,75 @@ inline bool Object::erase(StringRef K) {
return M.erase(ObjectKey(K));
}
/// A "cursor" marking a position within a Value.
/// The Value is a tree, and this is the path from the root to the current node.
/// This is used to associate errors with particular subobjects.
class Path {
public:
class Root;
/// Records that the value at the current path is invalid.
/// Message is e.g. "expected number" and becomes part of the final error.
/// This overwrites any previously written error message in the root.
void report(llvm::StringLiteral Message);
/// The root may be treated as a Path.
Path(Root &R) : Parent(nullptr), Seg(&R) {}
/// Derives a path for an array element: this[Index]
Path index(unsigned Index) const { return Path(this, Segment(Index)); }
/// Derives a path for an object field: this.Field
Path field(StringRef Field) const { return Path(this, Segment(Field)); }
private:
/// One element in a JSON path: an object field (.foo) or array index [27].
/// Exception: the root Path encodes a pointer to the Path::Root.
class Segment {
uintptr_t Pointer;
unsigned Offset;
public:
Segment() = default;
Segment(Root *R) : Pointer(reinterpret_cast<uintptr_t>(R)) {}
Segment(llvm::StringRef Field)
: Pointer(reinterpret_cast<uintptr_t>(Field.data())),
Offset(static_cast<unsigned>(Field.size())) {}
Segment(unsigned Index) : Pointer(0), Offset(Index) {}
bool isField() const { return Pointer != 0; }
StringRef field() const {
return StringRef(reinterpret_cast<const char *>(Pointer), Offset);
}
unsigned index() const { return Offset; }
Root *root() const { return reinterpret_cast<Root *>(Pointer); }
};
const Path *Parent;
Segment Seg;
Path(const Path *Parent, Segment S) : Parent(Parent), Seg(S) {}
};
/// The root is the trivial Path to the root value.
/// It also stores the latest reported error and the path where it occurred.
class Path::Root {
llvm::StringRef Name;
llvm::StringLiteral ErrorMessage;
std::vector<Path::Segment> ErrorPath; // Only valid in error state. Reversed.
friend void Path::report(llvm::StringLiteral Message);
public:
Root(llvm::StringRef Name = "") : Name(Name), ErrorMessage("") {}
// No copy/move allowed as there are incoming pointers.
Root(Root &&) = delete;
Root &operator=(Root &&) = delete;
Root(const Root &) = delete;
Root &operator=(const Root &) = delete;
/// Returns the last error reported, or else a generic error.
Error getError() const;
};
// Standard deserializers are provided for primitive types.
// See comments on Value.
inline bool fromJSON(const Value &E, std::string &Out) {

View File

@ -7,7 +7,9 @@
//===---------------------------------------------------------------------===//
#include "llvm/Support/JSON.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/ConvertUTF.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
#include <cctype>
@ -199,6 +201,40 @@ bool operator==(const Value &L, const Value &R) {
llvm_unreachable("Unknown value kind");
}
void Path::report(llvm::StringLiteral Msg) {
// Walk up to the root context, and count the number of segments.
unsigned Count = 0;
const Path *P;
for (P = this; P->Parent != nullptr; P = P->Parent)
++Count;
Path::Root *R = P->Seg.root();
// Fill in the error message and copy the path (in reverse order).
R->ErrorMessage = Msg;
R->ErrorPath.resize(Count);
auto It = R->ErrorPath.begin();
for (P = this; P->Parent != nullptr; P = P->Parent)
*It++ = P->Seg;
}
Error Path::Root::getError() const {
std::string S;
raw_string_ostream OS(S);
OS << (ErrorMessage.empty() ? "invalid JSON contents" : ErrorMessage);
if (ErrorPath.empty()) {
if (!Name.empty())
OS << " when parsing " << Name;
} else {
OS << " at " << (Name.empty() ? "(root)" : Name);
for (const Path::Segment &S : llvm::reverse(ErrorPath)) {
if (S.isField())
OS << '.' << S.field();
else
OS << '[' << S.index() << ']';
}
}
return createStringError(llvm::inconvertibleErrorCode(), OS.str());
}
namespace {
// Simple recursive-descent JSON parser.
class Parser {

View File

@ -8,6 +8,7 @@
#include "llvm/Support/JSON.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Testing/Support/Error.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
@ -461,6 +462,17 @@ TEST(JSONTest, Stream) {
EXPECT_EQ(Pretty, StreamStuff(2));
}
TEST(JSONTest, Path) {
Path::Root R("foo");
Path P = R, A = P.field("a"), B = P.field("b");
A.index(1).field("c").index(2).report("boom");
EXPECT_THAT_ERROR(R.getError(), FailedWithMessage("boom at foo.a[1].c[2]"));
B.field("d").field("e").report("bam");
EXPECT_THAT_ERROR(R.getError(), FailedWithMessage("bam at foo.b.d.e"));
P.report("oh no");
EXPECT_THAT_ERROR(R.getError(), FailedWithMessage("oh no when parsing foo"));
}
} // namespace
} // namespace json
} // namespace llvm