1130 lines
35 KiB
C++
1130 lines
35 KiB
C++
/*
|
|
* FDBTypes.h
|
|
*
|
|
* This source file is part of the FoundationDB open source project
|
|
*
|
|
* Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#ifndef FDBCLIENT_FDBTYPES_H
|
|
#define FDBCLIENT_FDBTYPES_H
|
|
|
|
#include <algorithm>
|
|
#include <set>
|
|
#include <string>
|
|
#include <vector>
|
|
#include <unordered_set>
|
|
|
|
#include "flow/Arena.h"
|
|
#include "flow/flow.h"
|
|
|
|
typedef int64_t Version;
|
|
typedef uint64_t LogEpoch;
|
|
typedef uint64_t Sequence;
|
|
typedef StringRef KeyRef;
|
|
typedef StringRef ValueRef;
|
|
typedef int64_t Generation;
|
|
typedef UID SpanID;
|
|
|
|
enum {
|
|
tagLocalitySpecial = -1, // tag with this locality means it is invalidTag (id=0), txsTag (id=1), or cacheTag (id=2)
|
|
tagLocalityLogRouter = -2,
|
|
tagLocalityRemoteLog = -3, // tag created by log router for remote (aka. not in Primary DC) tLogs
|
|
tagLocalityUpgraded = -4, // tlogs with old log format
|
|
tagLocalitySatellite = -5,
|
|
tagLocalityLogRouterMapped = -6, // The pseudo tag used by log routers to pop the real LogRouter tag (i.e., -2)
|
|
tagLocalityTxs = -7,
|
|
tagLocalityBackup = -8, // used by backup role to pop from TLogs
|
|
tagLocalityInvalid = -99
|
|
}; // The TLog and LogRouter require these number to be as compact as possible
|
|
|
|
inline bool isPseudoLocality(int8_t locality) {
|
|
return locality == tagLocalityLogRouterMapped || locality == tagLocalityBackup;
|
|
}
|
|
|
|
#pragma pack(push, 1)
|
|
struct Tag {
|
|
// if locality > 0,
|
|
// locality decides which DC id the tLog is in;
|
|
// id decides which SS owns the tag; id <-> SS mapping is in the system keyspace: serverTagKeys.
|
|
// if locality < 0, locality decides the type of tLog set: satellite, LR, or remote tLog, etc.
|
|
// id decides which tLog in the tLog type will be used.
|
|
int8_t locality;
|
|
uint16_t id;
|
|
|
|
Tag() : locality(tagLocalitySpecial), id(0) {}
|
|
Tag(int8_t locality, uint16_t id) : locality(locality), id(id) {}
|
|
|
|
bool operator==(const Tag& r) const { return locality == r.locality && id == r.id; }
|
|
bool operator!=(const Tag& r) const { return locality != r.locality || id != r.id; }
|
|
bool operator<(const Tag& r) const { return locality < r.locality || (locality == r.locality && id < r.id); }
|
|
|
|
int toTagDataIndex() const { return locality >= 0 ? 2 * locality : 1 - (2 * locality); }
|
|
|
|
std::string toString() const { return format("%d:%d", locality, id); }
|
|
|
|
template <class Ar>
|
|
force_inline void serialize_unversioned(Ar& ar) {
|
|
serializer(ar, locality, id);
|
|
}
|
|
};
|
|
|
|
template <>
|
|
struct flow_ref<Tag> : std::integral_constant<bool, false> {};
|
|
|
|
#pragma pack(pop)
|
|
|
|
template <class Ar>
|
|
void load(Ar& ar, Tag& tag) {
|
|
tag.serialize_unversioned(ar);
|
|
}
|
|
template <class Ar>
|
|
void save(Ar& ar, Tag const& tag) {
|
|
const_cast<Tag&>(tag).serialize_unversioned(ar);
|
|
}
|
|
|
|
template <>
|
|
struct struct_like_traits<Tag> : std::true_type {
|
|
using Member = Tag;
|
|
using types = pack<uint16_t, int8_t>;
|
|
|
|
template <int i, class Context>
|
|
static const index_t<i, types>& get(const Member& m, Context&) {
|
|
if constexpr (i == 0) {
|
|
return m.id;
|
|
} else {
|
|
static_assert(i == 1);
|
|
return m.locality;
|
|
}
|
|
}
|
|
|
|
template <int i, class Type, class Context>
|
|
static void assign(Member& m, const Type& t, Context&) {
|
|
if constexpr (i == 0) {
|
|
m.id = t;
|
|
} else {
|
|
static_assert(i == 1);
|
|
m.locality = t;
|
|
}
|
|
}
|
|
};
|
|
|
|
template <>
|
|
struct Traceable<Tag> : std::true_type {
|
|
static std::string toString(const Tag& value) { return value.toString(); }
|
|
};
|
|
|
|
static const Tag invalidTag{ tagLocalitySpecial, 0 };
|
|
static const Tag txsTag{ tagLocalitySpecial, 1 };
|
|
static const Tag cacheTag{ tagLocalitySpecial, 2 };
|
|
|
|
enum { txsTagOld = -1, invalidTagOld = -100 };
|
|
|
|
struct TagsAndMessage {
|
|
StringRef message;
|
|
VectorRef<Tag> tags;
|
|
|
|
TagsAndMessage() {}
|
|
TagsAndMessage(StringRef message, VectorRef<Tag> tags) : message(message), tags(tags) {}
|
|
|
|
// Loads tags and message from a serialized buffer. "rd" is checkpointed at
|
|
// its begining position to allow the caller to rewind if needed.
|
|
// T can be ArenaReader or BinaryReader.
|
|
template <class T>
|
|
void loadFromArena(T* rd, uint32_t* messageVersionSub) {
|
|
int32_t messageLength;
|
|
uint16_t tagCount;
|
|
uint32_t sub;
|
|
|
|
rd->checkpoint();
|
|
*rd >> messageLength >> sub >> tagCount;
|
|
if (messageVersionSub)
|
|
*messageVersionSub = sub;
|
|
tags = VectorRef<Tag>((Tag*)rd->readBytes(tagCount * sizeof(Tag)), tagCount);
|
|
const int32_t rawLength = messageLength + sizeof(messageLength);
|
|
rd->rewind();
|
|
rd->checkpoint();
|
|
message = StringRef((const uint8_t*)rd->readBytes(rawLength), rawLength);
|
|
}
|
|
|
|
// Returns the size of the header, including: msg_length, version.sub, tag_count, tags.
|
|
int32_t getHeaderSize() const {
|
|
return sizeof(int32_t) + sizeof(uint32_t) + sizeof(uint16_t) + tags.size() * sizeof(Tag);
|
|
}
|
|
|
|
StringRef getMessageWithoutTags() const { return message.substr(getHeaderSize()); }
|
|
|
|
// Returns the message with the header.
|
|
StringRef getRawMessage() const { return message; }
|
|
};
|
|
|
|
struct KeyRangeRef;
|
|
struct KeyValueRef;
|
|
|
|
template <class Collection>
|
|
void uniquify(Collection& c) {
|
|
std::sort(c.begin(), c.end());
|
|
c.resize(std::unique(c.begin(), c.end()) - c.begin());
|
|
}
|
|
|
|
inline std::string describe(const Tag item) {
|
|
return format("%d:%d", item.locality, item.id);
|
|
}
|
|
|
|
inline std::string describe(const int item) {
|
|
return format("%d", item);
|
|
}
|
|
|
|
// Allows describeList to work on a vector of std::string
|
|
static std::string describe(const std::string& s) {
|
|
return s;
|
|
}
|
|
|
|
template <class T>
|
|
std::string describe(Reference<T> const& item) {
|
|
return item->toString();
|
|
}
|
|
|
|
static std::string describe(UID const& item) {
|
|
return item.shortString();
|
|
}
|
|
|
|
template <class T>
|
|
std::string describe(T const& item) {
|
|
return item.toString();
|
|
}
|
|
|
|
template <class K, class V>
|
|
std::string describe(std::map<K, V> const& items, int max_items = -1) {
|
|
if (!items.size())
|
|
return "[no items]";
|
|
|
|
std::string s;
|
|
int count = 0;
|
|
for (auto it = items.begin(); it != items.end(); it++) {
|
|
if (++count > max_items && max_items >= 0)
|
|
break;
|
|
if (count > 1)
|
|
s += ",";
|
|
s += describe(it->first) + "=>" + describe(it->second);
|
|
}
|
|
return s;
|
|
}
|
|
|
|
template <class T>
|
|
std::string describeList(T const& items, int max_items) {
|
|
if (!items.size())
|
|
return "[no items]";
|
|
|
|
std::string s;
|
|
int count = 0;
|
|
for (auto const& item : items) {
|
|
if (++count > max_items && max_items >= 0)
|
|
break;
|
|
if (count > 1)
|
|
s += ",";
|
|
s += describe(item);
|
|
}
|
|
return s;
|
|
}
|
|
|
|
template <class T>
|
|
std::string describe(std::vector<T> const& items, int max_items = -1) {
|
|
return describeList(items, max_items);
|
|
}
|
|
|
|
template <class T>
|
|
std::string describe(std::unordered_set<T> const& items, int max_items = -1) {
|
|
return describeList(items, max_items);
|
|
}
|
|
|
|
template <typename T>
|
|
struct Traceable<std::vector<T>> : std::true_type {
|
|
static std::string toString(const std::vector<T>& value) { return describe(value); }
|
|
};
|
|
|
|
template <class T>
|
|
std::string describe(std::set<T> const& items, int max_items = -1) {
|
|
return describeList(items, max_items);
|
|
}
|
|
|
|
template <typename T>
|
|
struct Traceable<std::set<T>> : std::true_type {
|
|
static std::string toString(const std::set<T>& value) { return describe(value); }
|
|
};
|
|
|
|
std::string printable(const StringRef& val);
|
|
std::string printable(const std::string& val);
|
|
std::string printable(const KeyRangeRef& range);
|
|
std::string printable(const VectorRef<KeyRangeRef>& val);
|
|
std::string printable(const VectorRef<StringRef>& val);
|
|
std::string printable(const VectorRef<KeyValueRef>& val);
|
|
std::string printable(const KeyValueRef& val);
|
|
|
|
template <class T>
|
|
std::string printable(const Optional<T>& val) {
|
|
if (val.present())
|
|
return printable(val.get());
|
|
return "[not set]";
|
|
}
|
|
|
|
inline bool equalsKeyAfter(const KeyRef& key, const KeyRef& compareKey) {
|
|
if (key.size() + 1 != compareKey.size() || compareKey[compareKey.size() - 1] != 0)
|
|
return false;
|
|
return compareKey.startsWith(key);
|
|
}
|
|
|
|
struct KeyRangeRef {
|
|
const KeyRef begin, end;
|
|
KeyRangeRef() {}
|
|
KeyRangeRef(const KeyRef& begin, const KeyRef& end) : begin(begin), end(end) {
|
|
if (begin > end) {
|
|
TraceEvent("InvertedRange").detail("Begin", begin).detail("End", end);
|
|
throw inverted_range();
|
|
}
|
|
}
|
|
KeyRangeRef(Arena& a, const KeyRangeRef& copyFrom) : begin(a, copyFrom.begin), end(a, copyFrom.end) {}
|
|
bool operator==(const KeyRangeRef& r) const { return begin == r.begin && end == r.end; }
|
|
bool operator!=(const KeyRangeRef& r) const { return begin != r.begin || end != r.end; }
|
|
bool contains(const KeyRef& key) const { return begin <= key && key < end; }
|
|
bool contains(const KeyRangeRef& keys) const { return begin <= keys.begin && keys.end <= end; }
|
|
bool intersects(const KeyRangeRef& keys) const { return begin < keys.end && keys.begin < end; }
|
|
bool intersects(const VectorRef<KeyRangeRef>& keysVec) const {
|
|
for (const auto& keys : keysVec) {
|
|
if (intersects(keys)) {
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
bool empty() const { return begin == end; }
|
|
bool singleKeyRange() const { return equalsKeyAfter(begin, end); }
|
|
|
|
Standalone<KeyRangeRef> withPrefix(const StringRef& prefix) const {
|
|
return KeyRangeRef(begin.withPrefix(prefix), end.withPrefix(prefix));
|
|
}
|
|
|
|
KeyRangeRef withPrefix(const StringRef& prefix, Arena& arena) const {
|
|
return KeyRangeRef(begin.withPrefix(prefix, arena), end.withPrefix(prefix, arena));
|
|
}
|
|
|
|
KeyRangeRef removePrefix(const StringRef& prefix) const {
|
|
return KeyRangeRef(begin.removePrefix(prefix), end.removePrefix(prefix));
|
|
}
|
|
|
|
const KeyRangeRef& operator=(const KeyRangeRef& rhs) {
|
|
const_cast<KeyRef&>(begin) = rhs.begin;
|
|
const_cast<KeyRef&>(end) = rhs.end;
|
|
return *this;
|
|
}
|
|
|
|
int expectedSize() const { return begin.expectedSize() + end.expectedSize(); }
|
|
|
|
template <class Ar>
|
|
force_inline void serialize(Ar& ar) {
|
|
if (!ar.isDeserializing && equalsKeyAfter(begin, end)) {
|
|
StringRef empty;
|
|
serializer(ar, const_cast<KeyRef&>(end), empty);
|
|
} else {
|
|
serializer(ar, const_cast<KeyRef&>(begin), const_cast<KeyRef&>(end));
|
|
}
|
|
if (ar.isDeserializing && end == StringRef() && begin != StringRef()) {
|
|
ASSERT(begin[begin.size() - 1] == '\x00');
|
|
const_cast<KeyRef&>(end) = begin;
|
|
const_cast<KeyRef&>(begin) = end.substr(0, end.size() - 1);
|
|
}
|
|
|
|
if (begin > end) {
|
|
TraceEvent("InvertedRange").detail("Begin", begin).detail("End", end);
|
|
throw inverted_range();
|
|
};
|
|
}
|
|
|
|
struct ArbitraryOrder {
|
|
bool operator()(KeyRangeRef const& a, KeyRangeRef const& b) const {
|
|
if (a.begin < b.begin)
|
|
return true;
|
|
if (a.begin > b.begin)
|
|
return false;
|
|
return a.end < b.end;
|
|
}
|
|
};
|
|
|
|
std::string toString() const { return "Begin:" + begin.printable() + "End:" + end.printable(); }
|
|
};
|
|
|
|
template <>
|
|
struct Traceable<KeyRangeRef> : std::true_type {
|
|
static std::string toString(const KeyRangeRef& value) {
|
|
auto begin = Traceable<StringRef>::toString(value.begin);
|
|
auto end = Traceable<StringRef>::toString(value.end);
|
|
std::string result;
|
|
result.reserve(begin.size() + end.size() + 3);
|
|
std::copy(begin.begin(), begin.end(), std::back_inserter(result));
|
|
result.push_back(' ');
|
|
result.push_back('-');
|
|
result.push_back(' ');
|
|
std::copy(end.begin(), end.end(), std::back_inserter(result));
|
|
return result;
|
|
}
|
|
};
|
|
|
|
inline KeyRangeRef operator&(const KeyRangeRef& lhs, const KeyRangeRef& rhs) {
|
|
KeyRef b = std::max(lhs.begin, rhs.begin), e = std::min(lhs.end, rhs.end);
|
|
if (e < b)
|
|
return KeyRangeRef();
|
|
return KeyRangeRef(b, e);
|
|
}
|
|
|
|
struct KeyValueRef {
|
|
KeyRef key;
|
|
ValueRef value;
|
|
KeyValueRef() {}
|
|
KeyValueRef(const KeyRef& key, const ValueRef& value) : key(key), value(value) {}
|
|
KeyValueRef(Arena& a, const KeyValueRef& copyFrom) : key(a, copyFrom.key), value(a, copyFrom.value) {}
|
|
bool operator==(const KeyValueRef& r) const { return key == r.key && value == r.value; }
|
|
bool operator!=(const KeyValueRef& r) const { return key != r.key || value != r.value; }
|
|
|
|
int expectedSize() const { return key.expectedSize() + value.expectedSize(); }
|
|
|
|
template <class Ar>
|
|
force_inline void serialize(Ar& ar) {
|
|
serializer(ar, key, value);
|
|
}
|
|
|
|
struct OrderByKey {
|
|
bool operator()(KeyValueRef const& a, KeyValueRef const& b) const { return a.key < b.key; }
|
|
template <class T>
|
|
bool operator()(T const& a, KeyValueRef const& b) const {
|
|
return a < b.key;
|
|
}
|
|
template <class T>
|
|
bool operator()(KeyValueRef const& a, T const& b) const {
|
|
return a.key < b;
|
|
}
|
|
};
|
|
|
|
struct OrderByKeyBack {
|
|
bool operator()(KeyValueRef const& a, KeyValueRef const& b) const { return a.key > b.key; }
|
|
template <class T>
|
|
bool operator()(T const& a, KeyValueRef const& b) const {
|
|
return a > b.key;
|
|
}
|
|
template <class T>
|
|
bool operator()(KeyValueRef const& a, T const& b) const {
|
|
return a.key > b;
|
|
}
|
|
};
|
|
};
|
|
|
|
template <>
|
|
struct string_serialized_traits<KeyValueRef> : std::true_type {
|
|
int32_t getSize(const KeyValueRef& item) const {
|
|
return 2 * sizeof(uint32_t) + item.key.size() + item.value.size();
|
|
}
|
|
|
|
uint32_t save(uint8_t* out, const KeyValueRef& item) const {
|
|
auto begin = out;
|
|
uint32_t sz = item.key.size();
|
|
*reinterpret_cast<decltype(sz)*>(out) = sz;
|
|
out += sizeof(sz);
|
|
memcpy(out, item.key.begin(), sz);
|
|
out += sz;
|
|
sz = item.value.size();
|
|
*reinterpret_cast<decltype(sz)*>(out) = sz;
|
|
out += sizeof(sz);
|
|
memcpy(out, item.value.begin(), sz);
|
|
out += sz;
|
|
return out - begin;
|
|
}
|
|
|
|
template <class Context>
|
|
uint32_t load(const uint8_t* data, KeyValueRef& t, Context& context) {
|
|
auto begin = data;
|
|
uint32_t sz;
|
|
memcpy(&sz, data, sizeof(sz));
|
|
data += sizeof(sz);
|
|
t.key = StringRef(context.tryReadZeroCopy(data, sz), sz);
|
|
data += sz;
|
|
memcpy(&sz, data, sizeof(sz));
|
|
data += sizeof(sz);
|
|
t.value = StringRef(context.tryReadZeroCopy(data, sz), sz);
|
|
data += sz;
|
|
return data - begin;
|
|
}
|
|
};
|
|
|
|
template <>
|
|
struct Traceable<KeyValueRef> : std::true_type {
|
|
static std::string toString(const KeyValueRef& value) {
|
|
return Traceable<KeyRef>::toString(value.key) + format(":%d", value.value.size());
|
|
}
|
|
};
|
|
|
|
using Key = Standalone<KeyRef>;
|
|
using Value = Standalone<ValueRef>;
|
|
using KeyRange = Standalone<KeyRangeRef>;
|
|
using KeyValue = Standalone<KeyValueRef>;
|
|
using KeySelector = Standalone<struct KeySelectorRef>;
|
|
using RangeResult = Standalone<struct RangeResultRef>;
|
|
|
|
enum { invalidVersion = -1, latestVersion = -2, MAX_VERSION = std::numeric_limits<int64_t>::max() };
|
|
|
|
inline Key keyAfter(const KeyRef& key) {
|
|
if (key == LiteralStringRef("\xff\xff"))
|
|
return key;
|
|
|
|
Standalone<StringRef> r;
|
|
uint8_t* s = new (r.arena()) uint8_t[key.size() + 1];
|
|
if (key.size() > 0) {
|
|
memcpy(s, key.begin(), key.size());
|
|
}
|
|
s[key.size()] = 0;
|
|
((StringRef&)r) = StringRef(s, key.size() + 1);
|
|
return r;
|
|
}
|
|
inline KeyRef keyAfter(const KeyRef& key, Arena& arena) {
|
|
if (key == LiteralStringRef("\xff\xff"))
|
|
return key;
|
|
uint8_t* t = new (arena) uint8_t[key.size() + 1];
|
|
memcpy(t, key.begin(), key.size());
|
|
t[key.size()] = 0;
|
|
return KeyRef(t, key.size() + 1);
|
|
}
|
|
inline KeyRange singleKeyRange(const KeyRef& a) {
|
|
return KeyRangeRef(a, keyAfter(a));
|
|
}
|
|
inline KeyRangeRef singleKeyRange(KeyRef const& key, Arena& arena) {
|
|
uint8_t* t = new (arena) uint8_t[key.size() + 1];
|
|
memcpy(t, key.begin(), key.size());
|
|
t[key.size()] = 0;
|
|
return KeyRangeRef(KeyRef(t, key.size()), KeyRef(t, key.size() + 1));
|
|
}
|
|
inline KeyRange prefixRange(KeyRef prefix) {
|
|
Standalone<KeyRangeRef> range;
|
|
KeyRef start = KeyRef(range.arena(), prefix);
|
|
KeyRef end = strinc(prefix, range.arena());
|
|
range.contents() = KeyRangeRef(start, end);
|
|
return range;
|
|
}
|
|
|
|
// Returns (one of) the shortest key(s) either contained in keys or equal to keys.end,
|
|
// assuming its length is no more than CLIENT_KNOBS->SPLIT_KEY_SIZE_LIMIT. If the length of
|
|
// the shortest key exceeds that limit, then the end key is returned.
|
|
// The returned reference is valid as long as keys is valid.
|
|
KeyRef keyBetween(const KeyRangeRef& keys);
|
|
|
|
struct KeySelectorRef {
|
|
private:
|
|
KeyRef key; // Find the last item less than key
|
|
|
|
public:
|
|
bool orEqual; // (or equal to key, if this is true)
|
|
int offset; // and then move forward this many items (or backward if negative)
|
|
KeySelectorRef() : orEqual(false), offset(0) {}
|
|
KeySelectorRef(const KeyRef& key, bool orEqual, int offset) : orEqual(orEqual), offset(offset) { setKey(key); }
|
|
|
|
KeySelectorRef(Arena& arena, const KeySelectorRef& copyFrom)
|
|
: key(arena, copyFrom.key), orEqual(copyFrom.orEqual), offset(copyFrom.offset) {}
|
|
int expectedSize() const { return key.expectedSize(); }
|
|
|
|
void removeOrEqual(Arena& arena) {
|
|
if (orEqual) {
|
|
setKey(keyAfter(key, arena));
|
|
orEqual = false;
|
|
}
|
|
}
|
|
|
|
KeyRef getKey() const { return key; }
|
|
|
|
void setKey(KeyRef const& key);
|
|
|
|
std::string toString() const;
|
|
|
|
bool isBackward() const {
|
|
return !orEqual && offset <= 0;
|
|
} // True if the resolution of the KeySelector depends only on keys less than key
|
|
bool isFirstGreaterOrEqual() const { return !orEqual && offset == 1; }
|
|
bool isFirstGreaterThan() const { return orEqual && offset == 1; }
|
|
bool isLastLessOrEqual() const { return orEqual && offset == 0; }
|
|
|
|
// True iff, regardless of the contents of the database, lhs must resolve to a key > rhs
|
|
bool isDefinitelyGreater(KeyRef const& k) { return offset >= 1 && (isFirstGreaterOrEqual() ? key > k : key >= k); }
|
|
// True iff, regardless of the contents of the database, lhs must resolve to a key < rhs
|
|
bool isDefinitelyLess(KeyRef const& k) { return offset <= 0 && (isLastLessOrEqual() ? key < k : key <= k); }
|
|
|
|
template <class Ar>
|
|
void serialize(Ar& ar) {
|
|
serializer(ar, key, orEqual, offset);
|
|
}
|
|
};
|
|
|
|
inline bool operator==(const KeySelectorRef& lhs, const KeySelectorRef& rhs) {
|
|
return lhs.getKey() == rhs.getKey() && lhs.orEqual == rhs.orEqual && lhs.offset == rhs.offset;
|
|
}
|
|
inline KeySelectorRef lastLessThan(const KeyRef& k) {
|
|
return KeySelectorRef(k, false, 0);
|
|
}
|
|
inline KeySelectorRef lastLessOrEqual(const KeyRef& k) {
|
|
return KeySelectorRef(k, true, 0);
|
|
}
|
|
inline KeySelectorRef firstGreaterThan(const KeyRef& k) {
|
|
return KeySelectorRef(k, true, +1);
|
|
}
|
|
inline KeySelectorRef firstGreaterOrEqual(const KeyRef& k) {
|
|
return KeySelectorRef(k, false, +1);
|
|
}
|
|
inline KeySelectorRef operator+(const KeySelectorRef& s, int off) {
|
|
return KeySelectorRef(s.getKey(), s.orEqual, s.offset + off);
|
|
}
|
|
inline KeySelectorRef operator-(const KeySelectorRef& s, int off) {
|
|
return KeySelectorRef(s.getKey(), s.orEqual, s.offset - off);
|
|
}
|
|
inline bool selectorInRange(KeySelectorRef const& sel, KeyRangeRef const& range) {
|
|
// Returns true if the given range suffices to at least begin to resolve the given KeySelectorRef
|
|
return sel.getKey() >= range.begin && (sel.isBackward() ? sel.getKey() <= range.end : sel.getKey() < range.end);
|
|
}
|
|
|
|
template <class Val>
|
|
struct KeyRangeWith : KeyRange {
|
|
Val value;
|
|
KeyRangeWith() {}
|
|
KeyRangeWith(const KeyRangeRef& range, const Val& value) : KeyRange(range), value(value) {}
|
|
bool operator==(const KeyRangeWith& r) const { return KeyRangeRef::operator==(r) && value == r.value; }
|
|
|
|
template <class Ar>
|
|
void serialize(Ar& ar) {
|
|
serializer(ar, ((KeyRange&)*this), value);
|
|
}
|
|
};
|
|
template <class Val>
|
|
KeyRangeWith<Val> keyRangeWith(const KeyRangeRef& range, const Val& value) {
|
|
return KeyRangeWith<Val>(range, value);
|
|
}
|
|
|
|
struct GetRangeLimits {
|
|
enum { ROW_LIMIT_UNLIMITED = -1, BYTE_LIMIT_UNLIMITED = -1 };
|
|
|
|
int rows;
|
|
int minRows;
|
|
int bytes;
|
|
|
|
GetRangeLimits() : rows(ROW_LIMIT_UNLIMITED), minRows(1), bytes(BYTE_LIMIT_UNLIMITED) {}
|
|
explicit GetRangeLimits(int rowLimit) : rows(rowLimit), minRows(1), bytes(BYTE_LIMIT_UNLIMITED) {}
|
|
GetRangeLimits(int rowLimit, int byteLimit) : rows(rowLimit), minRows(1), bytes(byteLimit) {}
|
|
|
|
void decrement(VectorRef<KeyValueRef> const& data);
|
|
void decrement(KeyValueRef const& data);
|
|
|
|
// True if either the row or byte limit has been reached
|
|
bool isReached();
|
|
|
|
// True if data would cause the row or byte limit to be reached
|
|
bool reachedBy(VectorRef<KeyValueRef> const& data);
|
|
|
|
bool hasByteLimit();
|
|
bool hasRowLimit();
|
|
|
|
bool hasSatisfiedMinRows();
|
|
bool isValid() const {
|
|
return (rows >= 0 || rows == ROW_LIMIT_UNLIMITED) && (bytes >= 0 || bytes == BYTE_LIMIT_UNLIMITED) &&
|
|
minRows >= 0 && (minRows <= rows || rows == ROW_LIMIT_UNLIMITED);
|
|
}
|
|
};
|
|
|
|
struct RangeResultRef : VectorRef<KeyValueRef> {
|
|
bool more; // True if (but not necessarily only if) values remain in the *key* range requested (possibly beyond the
|
|
// limits requested) False implies that no such values remain
|
|
Optional<KeyRef> readThrough; // Only present when 'more' is true. When present, this value represent the end (or
|
|
// beginning if reverse) of the range which was read to produce these results. This is
|
|
// guarenteed to be less than the requested range.
|
|
bool readToBegin;
|
|
bool readThroughEnd;
|
|
|
|
RangeResultRef() : more(false), readToBegin(false), readThroughEnd(false) {}
|
|
RangeResultRef(Arena& p, const RangeResultRef& toCopy)
|
|
: VectorRef<KeyValueRef>(p, toCopy), more(toCopy.more),
|
|
readThrough(toCopy.readThrough.present() ? KeyRef(p, toCopy.readThrough.get()) : Optional<KeyRef>()),
|
|
readToBegin(toCopy.readToBegin), readThroughEnd(toCopy.readThroughEnd) {}
|
|
RangeResultRef(const VectorRef<KeyValueRef>& value, bool more, Optional<KeyRef> readThrough = Optional<KeyRef>())
|
|
: VectorRef<KeyValueRef>(value), more(more), readThrough(readThrough), readToBegin(false), readThroughEnd(false) {
|
|
}
|
|
RangeResultRef(bool readToBegin, bool readThroughEnd)
|
|
: more(false), readToBegin(readToBegin), readThroughEnd(readThroughEnd) {}
|
|
|
|
template <class Ar>
|
|
void serialize(Ar& ar) {
|
|
serializer(ar, ((VectorRef<KeyValueRef>&)*this), more, readThrough, readToBegin, readThroughEnd);
|
|
}
|
|
|
|
std::string toString() const {
|
|
return "more:" + std::to_string(more) +
|
|
" readThrough:" + (readThrough.present() ? readThrough.get().toString() : "[unset]") +
|
|
" readToBegin:" + std::to_string(readToBegin) + " readThroughEnd:" + std::to_string(readThroughEnd);
|
|
}
|
|
};
|
|
|
|
template <>
|
|
struct Traceable<RangeResultRef> : std::true_type {
|
|
static std::string toString(const RangeResultRef& value) {
|
|
return Traceable<VectorRef<KeyValueRef>>::toString(value);
|
|
}
|
|
};
|
|
|
|
struct KeyValueStoreType {
|
|
constexpr static FileIdentifier file_identifier = 6560359;
|
|
// These enumerated values are stored in the database configuration, so should NEVER be changed.
|
|
// Only add new ones just before END.
|
|
// SS storeType is END before the storageServerInterface is initialized.
|
|
enum StoreType { SSD_BTREE_V1, MEMORY, SSD_BTREE_V2, SSD_REDWOOD_V1, MEMORY_RADIXTREE, SSD_ROCKSDB_V1, END };
|
|
|
|
KeyValueStoreType() : type(END) {}
|
|
KeyValueStoreType(StoreType type) : type(type) {
|
|
if ((uint32_t)type > END)
|
|
this->type = END;
|
|
}
|
|
operator StoreType() const { return StoreType(type); }
|
|
StoreType storeType() const { return StoreType(type); }
|
|
|
|
template <class Ar>
|
|
void serialize(Ar& ar) {
|
|
serializer(ar, type);
|
|
}
|
|
|
|
std::string toString() const {
|
|
switch (type) {
|
|
case SSD_BTREE_V1:
|
|
return "ssd-1";
|
|
case SSD_BTREE_V2:
|
|
return "ssd-2";
|
|
case SSD_REDWOOD_V1:
|
|
return "ssd-redwood-experimental";
|
|
case SSD_ROCKSDB_V1:
|
|
return "ssd-rocksdb-experimental";
|
|
case MEMORY:
|
|
return "memory";
|
|
case MEMORY_RADIXTREE:
|
|
return "memory-radixtree-beta";
|
|
default:
|
|
return "unknown";
|
|
}
|
|
}
|
|
|
|
private:
|
|
uint32_t type;
|
|
};
|
|
|
|
template <>
|
|
struct Traceable<KeyValueStoreType> : std::true_type {
|
|
static std::string toString(KeyValueStoreType const& value) { return value.toString(); }
|
|
};
|
|
|
|
struct TLogVersion {
|
|
enum Version {
|
|
UNSET = 0,
|
|
// Everything between BEGIN and END should be densely packed, so that we
|
|
// can iterate over them easily.
|
|
// V3 was the introduction of spill by reference;
|
|
// V4 changed how data gets written to satellite TLogs so that we can peek from them;
|
|
// V5 merged reference and value spilling
|
|
// V6 added span context to list of serialized mutations sent from proxy to tlogs
|
|
// V1 = 1, // 4.6 is dispatched to via 6.0
|
|
V2 = 2, // 6.0
|
|
V3 = 3, // 6.1
|
|
V4 = 4, // 6.2
|
|
V5 = 5, // 6.3
|
|
V6 = 6, // 7.0
|
|
MIN_SUPPORTED = V2,
|
|
MAX_SUPPORTED = V6,
|
|
MIN_RECRUITABLE = V5,
|
|
DEFAULT = V5,
|
|
} version;
|
|
|
|
TLogVersion() : version(UNSET) {}
|
|
TLogVersion(Version v) : version(v) {}
|
|
|
|
operator Version() const { return version; }
|
|
|
|
template <class Ar>
|
|
void serialize(Ar& ar) {
|
|
uint32_t v = (uint32_t)version;
|
|
serializer(ar, v);
|
|
version = (Version)v;
|
|
}
|
|
|
|
static ErrorOr<TLogVersion> FromStringRef(StringRef s) {
|
|
if (s == LiteralStringRef("2"))
|
|
return V2;
|
|
if (s == LiteralStringRef("3"))
|
|
return V3;
|
|
if (s == LiteralStringRef("4"))
|
|
return V4;
|
|
if (s == LiteralStringRef("5"))
|
|
return V5;
|
|
if (s == LiteralStringRef("6"))
|
|
return V6;
|
|
return default_error_or();
|
|
}
|
|
};
|
|
|
|
template <>
|
|
struct Traceable<TLogVersion> : std::true_type {
|
|
static std::string toString(TLogVersion const& value) { return Traceable<Version>::toString(value.version); }
|
|
};
|
|
|
|
struct TLogSpillType {
|
|
// These enumerated values are stored in the database configuration, so can NEVER be changed. Only add new ones
|
|
// just before END.
|
|
enum SpillType {
|
|
UNSET = 0,
|
|
DEFAULT = 2,
|
|
VALUE = 1,
|
|
REFERENCE = 2,
|
|
END = 3,
|
|
};
|
|
|
|
TLogSpillType() : type(DEFAULT) {}
|
|
TLogSpillType(SpillType type) : type(type) {
|
|
if ((uint32_t)type >= END) {
|
|
this->type = UNSET;
|
|
}
|
|
}
|
|
operator SpillType() const { return SpillType(type); }
|
|
|
|
template <class Ar>
|
|
void serialize(Ar& ar) {
|
|
serializer(ar, type);
|
|
}
|
|
|
|
std::string toString() const {
|
|
switch (type) {
|
|
case VALUE:
|
|
return "value";
|
|
case REFERENCE:
|
|
return "reference";
|
|
case UNSET:
|
|
return "unset";
|
|
default:
|
|
ASSERT(false);
|
|
}
|
|
return "";
|
|
}
|
|
|
|
static ErrorOr<TLogSpillType> FromStringRef(StringRef s) {
|
|
if (s == LiteralStringRef("1"))
|
|
return VALUE;
|
|
if (s == LiteralStringRef("2"))
|
|
return REFERENCE;
|
|
return default_error_or();
|
|
}
|
|
|
|
uint32_t type;
|
|
};
|
|
|
|
// Contains the amount of free and total space for a storage server, in bytes
|
|
struct StorageBytes {
|
|
// Free space on the filesystem
|
|
int64_t free;
|
|
// Total space on the filesystem
|
|
int64_t total;
|
|
// Used by *this* store, not total - free
|
|
int64_t used;
|
|
// Amount of space available for use by the store, which includes free space on the filesystem
|
|
// and internal free space within the store data that is immediately reusable.
|
|
int64_t available;
|
|
// Amount of space that could eventually be available for use after garbage collection
|
|
int64_t temp;
|
|
|
|
StorageBytes() {}
|
|
StorageBytes(int64_t free, int64_t total, int64_t used, int64_t available, int64_t temp = 0)
|
|
: free(free), total(total), used(used), available(available), temp(temp) {}
|
|
|
|
template <class Ar>
|
|
void serialize(Ar& ar) {
|
|
serializer(ar, free, total, used, available);
|
|
}
|
|
|
|
std::string toString() const {
|
|
return format("{%.2f MB total, %.2f MB free, %.2f MB available, %.2f MB used, %.2f MB temp}",
|
|
total / 1e6,
|
|
free / 1e6,
|
|
available / 1e6,
|
|
used / 1e6,
|
|
temp / 1e6);
|
|
}
|
|
};
|
|
struct LogMessageVersion {
|
|
// Each message pushed into the log system has a unique, totally ordered LogMessageVersion
|
|
// See ILogSystem::push() for how these are assigned
|
|
Version version;
|
|
uint32_t sub;
|
|
|
|
void reset(Version v) {
|
|
version = v;
|
|
sub = 0;
|
|
}
|
|
|
|
bool operator<(LogMessageVersion const& r) const {
|
|
if (version < r.version)
|
|
return true;
|
|
if (r.version < version)
|
|
return false;
|
|
return sub < r.sub;
|
|
}
|
|
bool operator>(LogMessageVersion const& r) const { return r < *this; }
|
|
bool operator<=(LogMessageVersion const& r) const { return !(*this > r); }
|
|
bool operator>=(LogMessageVersion const& r) const { return !(*this < r); }
|
|
|
|
bool operator==(LogMessageVersion const& r) const { return version == r.version && sub == r.sub; }
|
|
|
|
std::string toString() const { return format("%lld.%d", version, sub); }
|
|
|
|
LogMessageVersion(Version version, uint32_t sub) : version(version), sub(sub) {}
|
|
explicit LogMessageVersion(Version version) : version(version), sub(0) {}
|
|
LogMessageVersion() : version(0), sub(0) {}
|
|
bool empty() const { return (version == 0) && (sub == 0); }
|
|
|
|
template <class Ar>
|
|
void serialize(Ar& ar) {
|
|
serializer(ar, version, sub);
|
|
}
|
|
};
|
|
|
|
struct AddressExclusion {
|
|
IPAddress ip;
|
|
int port;
|
|
|
|
AddressExclusion() : ip(0), port(0) {}
|
|
explicit AddressExclusion(const IPAddress& ip) : ip(ip), port(0) {}
|
|
explicit AddressExclusion(const IPAddress& ip, int port) : ip(ip), port(port) {}
|
|
|
|
bool operator<(AddressExclusion const& r) const {
|
|
if (ip != r.ip)
|
|
return ip < r.ip;
|
|
return port < r.port;
|
|
}
|
|
bool operator==(AddressExclusion const& r) const { return ip == r.ip && port == r.port; }
|
|
|
|
bool isWholeMachine() const { return port == 0; }
|
|
bool isValid() const { return ip.isValid() || port != 0; }
|
|
|
|
bool excludes(NetworkAddress const& addr) const {
|
|
if (isWholeMachine())
|
|
return ip == addr.ip;
|
|
return ip == addr.ip && port == addr.port;
|
|
}
|
|
|
|
// This is for debugging and IS NOT to be used for serialization to persistant state
|
|
std::string toString() const {
|
|
if (!isWholeMachine())
|
|
return formatIpPort(ip, port);
|
|
return ip.toString();
|
|
}
|
|
|
|
static AddressExclusion parse(StringRef const&);
|
|
|
|
template <class Ar>
|
|
void serialize(Ar& ar) {
|
|
serializer(ar, ip, port);
|
|
}
|
|
};
|
|
|
|
inline bool addressExcluded(std::set<AddressExclusion> const& exclusions, NetworkAddress const& addr) {
|
|
return exclusions.count(AddressExclusion(addr.ip, addr.port)) || exclusions.count(AddressExclusion(addr.ip));
|
|
}
|
|
|
|
struct ClusterControllerPriorityInfo {
|
|
enum DCFitness {
|
|
FitnessPrimary,
|
|
FitnessRemote,
|
|
FitnessPreferred,
|
|
FitnessUnknown,
|
|
FitnessNotPreferred,
|
|
FitnessBad
|
|
}; // cannot be larger than 7 because of leader election mask
|
|
|
|
static DCFitness calculateDCFitness(Optional<Key> const& dcId, std::vector<Optional<Key>> const& dcPriority) {
|
|
if (!dcPriority.size()) {
|
|
return FitnessUnknown;
|
|
} else if (dcPriority.size() == 1) {
|
|
if (dcId == dcPriority[0]) {
|
|
return FitnessPreferred;
|
|
} else {
|
|
return FitnessNotPreferred;
|
|
}
|
|
} else {
|
|
if (dcId == dcPriority[0]) {
|
|
return FitnessPrimary;
|
|
} else if (dcId == dcPriority[1]) {
|
|
return FitnessRemote;
|
|
} else {
|
|
return FitnessBad;
|
|
}
|
|
}
|
|
}
|
|
|
|
uint8_t processClassFitness;
|
|
bool isExcluded;
|
|
uint8_t dcFitness;
|
|
|
|
bool operator==(ClusterControllerPriorityInfo const& r) const {
|
|
return processClassFitness == r.processClassFitness && isExcluded == r.isExcluded && dcFitness == r.dcFitness;
|
|
}
|
|
bool operator!=(ClusterControllerPriorityInfo const& r) const { return !(*this == r); }
|
|
ClusterControllerPriorityInfo()
|
|
: ClusterControllerPriorityInfo(/*ProcessClass::UnsetFit*/ 2,
|
|
false,
|
|
ClusterControllerPriorityInfo::FitnessUnknown) {}
|
|
ClusterControllerPriorityInfo(uint8_t processClassFitness, bool isExcluded, uint8_t dcFitness)
|
|
: processClassFitness(processClassFitness), isExcluded(isExcluded), dcFitness(dcFitness) {}
|
|
|
|
// To change this serialization, ProtocolVersion::ClusterControllerPriorityInfo must be updated, and downgrades need
|
|
// to be considered
|
|
template <class Ar>
|
|
void serialize(Ar& ar) {
|
|
serializer(ar, processClassFitness, isExcluded, dcFitness);
|
|
}
|
|
};
|
|
|
|
class Database;
|
|
|
|
struct HealthMetrics {
|
|
struct StorageStats {
|
|
int64_t storageQueue;
|
|
int64_t storageDurabilityLag;
|
|
double diskUsage;
|
|
double cpuUsage;
|
|
|
|
bool operator==(StorageStats const& r) const {
|
|
return ((storageQueue == r.storageQueue) && (storageDurabilityLag == r.storageDurabilityLag) &&
|
|
(diskUsage == r.diskUsage) && (cpuUsage == r.cpuUsage));
|
|
}
|
|
|
|
template <class Ar>
|
|
void serialize(Ar& ar) {
|
|
serializer(ar, storageQueue, storageDurabilityLag, diskUsage, cpuUsage);
|
|
}
|
|
};
|
|
|
|
int64_t worstStorageQueue;
|
|
int64_t limitingStorageQueue;
|
|
int64_t worstStorageDurabilityLag;
|
|
int64_t limitingStorageDurabilityLag;
|
|
int64_t worstTLogQueue;
|
|
double tpsLimit;
|
|
bool batchLimited;
|
|
std::map<UID, StorageStats> storageStats;
|
|
std::map<UID, int64_t> tLogQueue;
|
|
|
|
HealthMetrics()
|
|
: worstStorageQueue(0), limitingStorageQueue(0), worstStorageDurabilityLag(0), limitingStorageDurabilityLag(0),
|
|
worstTLogQueue(0), tpsLimit(0.0), batchLimited(false) {}
|
|
|
|
void update(const HealthMetrics& hm, bool detailedInput, bool detailedOutput) {
|
|
worstStorageQueue = hm.worstStorageQueue;
|
|
limitingStorageQueue = hm.limitingStorageQueue;
|
|
worstStorageDurabilityLag = hm.worstStorageDurabilityLag;
|
|
limitingStorageDurabilityLag = hm.limitingStorageDurabilityLag;
|
|
worstTLogQueue = hm.worstTLogQueue;
|
|
tpsLimit = hm.tpsLimit;
|
|
batchLimited = hm.batchLimited;
|
|
|
|
if (!detailedOutput) {
|
|
storageStats.clear();
|
|
tLogQueue.clear();
|
|
} else if (detailedInput) {
|
|
storageStats = hm.storageStats;
|
|
tLogQueue = hm.tLogQueue;
|
|
}
|
|
}
|
|
|
|
bool operator==(HealthMetrics const& r) const {
|
|
return (worstStorageQueue == r.worstStorageQueue && limitingStorageQueue == r.limitingStorageQueue &&
|
|
worstStorageDurabilityLag == r.worstStorageDurabilityLag &&
|
|
limitingStorageDurabilityLag == r.limitingStorageDurabilityLag && worstTLogQueue == r.worstTLogQueue &&
|
|
storageStats == r.storageStats && tLogQueue == r.tLogQueue && batchLimited == r.batchLimited);
|
|
}
|
|
|
|
template <class Ar>
|
|
void serialize(Ar& ar) {
|
|
serializer(ar,
|
|
worstStorageQueue,
|
|
worstStorageDurabilityLag,
|
|
worstTLogQueue,
|
|
tpsLimit,
|
|
batchLimited,
|
|
storageStats,
|
|
tLogQueue,
|
|
limitingStorageQueue,
|
|
limitingStorageDurabilityLag);
|
|
}
|
|
};
|
|
|
|
struct DDMetricsRef {
|
|
int64_t shardBytes;
|
|
KeyRef beginKey;
|
|
|
|
DDMetricsRef() : shardBytes(0) {}
|
|
DDMetricsRef(int64_t bytes, KeyRef begin) : shardBytes(bytes), beginKey(begin) {}
|
|
DDMetricsRef(Arena& a, const DDMetricsRef& copyFrom)
|
|
: shardBytes(copyFrom.shardBytes), beginKey(a, copyFrom.beginKey) {}
|
|
|
|
template <class Ar>
|
|
void serialize(Ar& ar) {
|
|
serializer(ar, shardBytes, beginKey);
|
|
}
|
|
};
|
|
|
|
struct WorkerBackupStatus {
|
|
LogEpoch epoch;
|
|
Version version;
|
|
Tag tag;
|
|
int32_t totalTags;
|
|
|
|
WorkerBackupStatus() : epoch(0), version(invalidVersion) {}
|
|
WorkerBackupStatus(LogEpoch e, Version v, Tag t, int32_t total) : epoch(e), version(v), tag(t), totalTags(total) {}
|
|
|
|
// To change this serialization, ProtocolVersion::BackupProgressValue must be updated, and downgrades need to be
|
|
// considered
|
|
template <class Ar>
|
|
void serialize(Ar& ar) {
|
|
serializer(ar, epoch, version, tag, totalTags);
|
|
}
|
|
};
|
|
|
|
enum class TransactionPriority : uint8_t { BATCH, DEFAULT, IMMEDIATE, MIN = BATCH, MAX = IMMEDIATE };
|
|
|
|
const std::array<TransactionPriority, (int)TransactionPriority::MAX + 1> allTransactionPriorities = {
|
|
TransactionPriority::BATCH,
|
|
TransactionPriority::DEFAULT,
|
|
TransactionPriority::IMMEDIATE
|
|
};
|
|
|
|
inline const char* transactionPriorityToString(TransactionPriority priority, bool capitalize = true) {
|
|
switch (priority) {
|
|
case TransactionPriority::BATCH:
|
|
return capitalize ? "Batch" : "batch";
|
|
case TransactionPriority::DEFAULT:
|
|
return capitalize ? "Default" : "default";
|
|
case TransactionPriority::IMMEDIATE:
|
|
return capitalize ? "Immediate" : "immediate";
|
|
}
|
|
|
|
ASSERT(false);
|
|
throw internal_error();
|
|
}
|
|
|
|
#endif
|