Merge branch 'main' of github.com:apple/foundationdb into monitorusage
This commit is contained in:
commit
959bf9f4e7
|
@ -82,7 +82,8 @@ extern "C" DLLEXPORT fdb_bool_t fdb_error_predicate(int predicate_test, fdb_erro
|
|||
code == error_code_grv_proxy_memory_limit_exceeded ||
|
||||
code == error_code_commit_proxy_memory_limit_exceeded ||
|
||||
code == error_code_batch_transaction_throttled || code == error_code_process_behind ||
|
||||
code == error_code_tag_throttled || code == error_code_unknown_tenant;
|
||||
code == error_code_tag_throttled || code == error_code_unknown_tenant ||
|
||||
code == error_code_proxy_tag_throttled;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -70,10 +70,13 @@ void ApiWorkload::start() {
|
|||
schedule([this]() {
|
||||
// 1. Clear data
|
||||
clearData([this]() {
|
||||
// 2. Populate initial data
|
||||
populateData([this]() {
|
||||
// 3. Generate random workload
|
||||
runTests();
|
||||
// 2. Workload setup
|
||||
setup([this]() {
|
||||
// 3. Populate initial data
|
||||
populateData([this]() {
|
||||
// 4. Generate random workload
|
||||
runTests();
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
|
@ -249,6 +252,10 @@ void ApiWorkload::populateData(TTaskFct cont) {
|
|||
}
|
||||
}
|
||||
|
||||
void ApiWorkload::setup(TTaskFct cont) {
|
||||
schedule(cont);
|
||||
}
|
||||
|
||||
void ApiWorkload::randomInsertOp(TTaskFct cont, std::optional<int> tenantId) {
|
||||
int numKeys = Random::get().randomInt(1, maxKeysPerTransaction);
|
||||
auto kvPairs = std::make_shared<std::vector<fdb::KeyValue>>();
|
||||
|
@ -322,4 +329,85 @@ std::optional<fdb::BytesRef> ApiWorkload::getTenant(std::optional<int> tenantId)
|
|||
}
|
||||
}
|
||||
|
||||
std::string ApiWorkload::debugTenantStr(std::optional<int> tenantId) {
|
||||
return tenantId.has_value() ? fmt::format("(tenant {0})", tenantId.value()) : "()";
|
||||
}
|
||||
|
||||
// BlobGranule setup.
|
||||
// This blobbifies ['\x00', '\xff') per tenant or for the whole database if there are no tenants.
|
||||
void ApiWorkload::setupBlobGranules(TTaskFct cont) {
|
||||
// This count is used to synchronize the # of tenant blobbifyRange() calls to ensure
|
||||
// we only start the workload once blobbification has fully finished.
|
||||
auto blobbifiedCount = std::make_shared<std::atomic<int>>(1);
|
||||
|
||||
if (tenants.empty()) {
|
||||
blobbifiedCount->store(1);
|
||||
blobbifyTenant({}, blobbifiedCount, cont);
|
||||
} else {
|
||||
blobbifiedCount->store(tenants.size());
|
||||
for (int i = 0; i < tenants.size(); i++) {
|
||||
schedule([=]() { blobbifyTenant(i, blobbifiedCount, cont); });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ApiWorkload::blobbifyTenant(std::optional<int> tenantId,
|
||||
std::shared_ptr<std::atomic<int>> blobbifiedCount,
|
||||
TTaskFct cont) {
|
||||
auto retBlobbifyRange = std::make_shared<bool>(false);
|
||||
execOperation(
|
||||
[=](auto ctx) {
|
||||
fdb::Key begin(1, '\x00');
|
||||
fdb::Key end(1, '\xff');
|
||||
|
||||
info(fmt::format("setup: blobbifying {}: [\\x00 - \\xff)\n", debugTenantStr(tenantId)));
|
||||
|
||||
fdb::Future f = ctx->dbOps()->blobbifyRange(begin, end).eraseType();
|
||||
ctx->continueAfter(f, [ctx, retBlobbifyRange, f]() {
|
||||
*retBlobbifyRange = f.get<fdb::future_var::Bool>();
|
||||
ctx->done();
|
||||
});
|
||||
},
|
||||
[=]() {
|
||||
if (!*retBlobbifyRange) {
|
||||
schedule([=]() { blobbifyTenant(tenantId, blobbifiedCount, cont); });
|
||||
} else {
|
||||
schedule([=]() { verifyTenant(tenantId, blobbifiedCount, cont); });
|
||||
}
|
||||
},
|
||||
/*tenant=*/getTenant(tenantId),
|
||||
/* failOnError = */ false);
|
||||
}
|
||||
|
||||
void ApiWorkload::verifyTenant(std::optional<int> tenantId,
|
||||
std::shared_ptr<std::atomic<int>> blobbifiedCount,
|
||||
TTaskFct cont) {
|
||||
auto retVerifyVersion = std::make_shared<int64_t>(-1);
|
||||
|
||||
execOperation(
|
||||
[=](auto ctx) {
|
||||
fdb::Key begin(1, '\x00');
|
||||
fdb::Key end(1, '\xff');
|
||||
|
||||
info(fmt::format("setup: verifying {}: [\\x00 - \\xff)\n", debugTenantStr(tenantId)));
|
||||
|
||||
fdb::Future f = ctx->dbOps()->verifyBlobRange(begin, end, /*latest_version*/ -2).eraseType();
|
||||
ctx->continueAfter(f, [ctx, retVerifyVersion, f]() {
|
||||
*retVerifyVersion = f.get<fdb::future_var::Int64>();
|
||||
ctx->done();
|
||||
});
|
||||
},
|
||||
[=]() {
|
||||
if (*retVerifyVersion == -1) {
|
||||
schedule([=]() { verifyTenant(tenantId, blobbifiedCount, cont); });
|
||||
} else {
|
||||
if (blobbifiedCount->fetch_sub(1) == 1) {
|
||||
schedule(cont);
|
||||
}
|
||||
}
|
||||
},
|
||||
/*tenant=*/getTenant(tenantId),
|
||||
/* failOnError = */ false);
|
||||
}
|
||||
|
||||
} // namespace FdbApiTester
|
||||
|
|
|
@ -41,6 +41,9 @@ public:
|
|||
|
||||
virtual void checkProgress() override;
|
||||
|
||||
// Workload specific setup phase.
|
||||
virtual void setup(TTaskFct cont);
|
||||
|
||||
// Running specific tests
|
||||
// The default implementation generates a workload consisting of
|
||||
// random operations generated by randomOperation
|
||||
|
@ -126,6 +129,12 @@ protected:
|
|||
void randomClearRangeOp(TTaskFct cont, std::optional<int> tenantId);
|
||||
|
||||
std::optional<fdb::BytesRef> getTenant(std::optional<int> tenantId);
|
||||
std::string debugTenantStr(std::optional<int> tenantId);
|
||||
|
||||
// Generic BlobGranules setup.
|
||||
void setupBlobGranules(TTaskFct cont);
|
||||
void blobbifyTenant(std::optional<int> tenantId, std::shared_ptr<std::atomic<int>> blobbifiedCount, TTaskFct cont);
|
||||
void verifyTenant(std::optional<int> tenantId, std::shared_ptr<std::atomic<int>> blobbifiedCount, TTaskFct cont);
|
||||
|
||||
private:
|
||||
void populateDataTx(TTaskFct cont, std::optional<int> tenantId);
|
||||
|
|
|
@ -52,26 +52,23 @@ private:
|
|||
};
|
||||
std::vector<OpType> excludedOpTypes;
|
||||
|
||||
void setup(TTaskFct cont) override { setupBlobGranules(cont); }
|
||||
|
||||
// Allow reads at the start to get blob_granule_transaction_too_old if BG data isn't initialized yet
|
||||
// FIXME: should still guarantee a read succeeds eventually somehow
|
||||
// FIXME: this needs to be per tenant if tenant ids are set
|
||||
std::unordered_set<std::optional<int>> tenantsWithReadSuccess;
|
||||
|
||||
inline void setReadSuccess(std::optional<int> tenantId) { tenantsWithReadSuccess.insert(tenantId); }
|
||||
|
||||
inline bool seenReadSuccess(std::optional<int> tenantId) { return tenantsWithReadSuccess.count(tenantId); }
|
||||
|
||||
std::string tenantDebugString(std::optional<int> tenantId) {
|
||||
return tenantId.has_value() ? fmt::format(" (tenant {0})", tenantId.value()) : "";
|
||||
}
|
||||
|
||||
void debugOp(std::string opName, fdb::Key begin, fdb::Key end, std::optional<int> tenantId, std::string message) {
|
||||
if (BG_API_DEBUG_VERBOSE) {
|
||||
info(fmt::format("{0}: [{1} - {2}){3}: {4}",
|
||||
info(fmt::format("{0}: [{1} - {2}) {3}: {4}",
|
||||
opName,
|
||||
fdb::toCharsRef(begin),
|
||||
fdb::toCharsRef(end),
|
||||
tenantDebugString(tenantId),
|
||||
debugTenantStr(tenantId),
|
||||
message));
|
||||
}
|
||||
}
|
||||
|
@ -117,7 +114,7 @@ private:
|
|||
results.get()->assign(resVector.begin(), resVector.end());
|
||||
bool previousSuccess = seenReadSuccess(tenantId);
|
||||
if (!previousSuccess) {
|
||||
info(fmt::format("Read{0}: first success\n", tenantDebugString(tenantId)));
|
||||
info(fmt::format("Read {0}: first success\n", debugTenantStr(tenantId)));
|
||||
setReadSuccess(tenantId);
|
||||
} else {
|
||||
debugOp("Read", begin, end, tenantId, "complete");
|
||||
|
@ -289,20 +286,19 @@ private:
|
|||
}
|
||||
|
||||
// TODO: tenant support
|
||||
void randomGetBlobRangesOp(TTaskFct cont) {
|
||||
void randomGetBlobRangesOp(TTaskFct cont, std::optional<int> tenantId) {
|
||||
fdb::Key begin = randomKeyName();
|
||||
fdb::Key end = randomKeyName();
|
||||
auto results = std::make_shared<std::vector<fdb::KeyRange>>();
|
||||
if (begin > end) {
|
||||
std::swap(begin, end);
|
||||
}
|
||||
std::optional<int> tenantId = {};
|
||||
|
||||
debugOp("GetBlobRanges", begin, end, tenantId, "starting");
|
||||
|
||||
execOperation(
|
||||
[begin, end, results](auto ctx) {
|
||||
fdb::Future f = ctx->db().listBlobbifiedRanges(begin, end, 1000).eraseType();
|
||||
fdb::Future f = ctx->dbOps()->listBlobbifiedRanges(begin, end, 1000).eraseType();
|
||||
ctx->continueAfter(f, [ctx, f, results]() {
|
||||
*results = copyKeyRangeArray(f.get<fdb::future_var::KeyRangeRefArray>());
|
||||
ctx->done();
|
||||
|
@ -314,25 +310,24 @@ private:
|
|||
this->validateRanges(results, begin, end, seenReadSuccess(tenantId));
|
||||
schedule(cont);
|
||||
},
|
||||
getTenant(tenantId),
|
||||
/* failOnError = */ false);
|
||||
}
|
||||
|
||||
// TODO: tenant support
|
||||
void randomVerifyOp(TTaskFct cont) {
|
||||
void randomVerifyOp(TTaskFct cont, std::optional<int> tenantId) {
|
||||
fdb::Key begin = randomKeyName();
|
||||
fdb::Key end = randomKeyName();
|
||||
std::optional<int> tenantId;
|
||||
if (begin > end) {
|
||||
std::swap(begin, end);
|
||||
}
|
||||
|
||||
auto verifyVersion = std::make_shared<int64_t>(false);
|
||||
|
||||
debugOp("Verify", begin, end, tenantId, "starting");
|
||||
|
||||
auto verifyVersion = std::make_shared<int64_t>(-1);
|
||||
execOperation(
|
||||
[begin, end, verifyVersion](auto ctx) {
|
||||
fdb::Future f = ctx->db().verifyBlobRange(begin, end, -2 /* latest version*/).eraseType();
|
||||
fdb::Future f = ctx->dbOps()->verifyBlobRange(begin, end, -2 /* latest version*/).eraseType();
|
||||
ctx->continueAfter(f, [ctx, verifyVersion, f]() {
|
||||
*verifyVersion = f.get<fdb::future_var::Int64>();
|
||||
ctx->done();
|
||||
|
@ -344,15 +339,16 @@ private:
|
|||
if (*verifyVersion == -1) {
|
||||
ASSERT(!previousSuccess);
|
||||
} else if (!previousSuccess) {
|
||||
info(fmt::format("Verify{0}: first success\n", tenantDebugString(tenantId)));
|
||||
info(fmt::format("Verify {0}: first success\n", debugTenantStr(tenantId)));
|
||||
setReadSuccess(tenantId);
|
||||
}
|
||||
schedule(cont);
|
||||
},
|
||||
getTenant(tenantId),
|
||||
/* failOnError = */ false);
|
||||
}
|
||||
|
||||
void randomOperation(TTaskFct cont) {
|
||||
void randomOperation(TTaskFct cont) override {
|
||||
std::optional<int> tenantId = randomTenant();
|
||||
|
||||
OpType txType = (stores[tenantId].size() == 0) ? OP_INSERT : (OpType)Random::get().randomInt(0, OP_LAST);
|
||||
|
@ -380,10 +376,10 @@ private:
|
|||
randomSummarizeOp(cont, tenantId);
|
||||
break;
|
||||
case OP_GET_BLOB_RANGES:
|
||||
randomGetBlobRangesOp(cont);
|
||||
randomGetBlobRangesOp(cont, tenantId);
|
||||
break;
|
||||
case OP_VERIFY:
|
||||
randomVerifyOp(cont);
|
||||
randomVerifyOp(cont, tenantId);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -47,6 +47,8 @@ private:
|
|||
OP_LAST = OP_CANCEL_PURGE
|
||||
};
|
||||
|
||||
void setup(TTaskFct cont) override { setupBlobGranules(cont); }
|
||||
|
||||
// could add summarize too old and verify too old as ops if desired but those are lower value
|
||||
|
||||
// Allow reads at the start to get blob_granule_transaction_too_old if BG data isn't initialized yet
|
||||
|
|
|
@ -91,13 +91,15 @@ public:
|
|||
fdbDb = executor->selectDatabase();
|
||||
}
|
||||
|
||||
if (tenantName) {
|
||||
fdbTenant = fdbDb.openTenant(*tenantName);
|
||||
fdbDbOps = std::make_shared<fdb::Tenant>(fdbTenant);
|
||||
} else {
|
||||
fdbDbOps = std::make_shared<fdb::Database>(fdbDb);
|
||||
}
|
||||
|
||||
if (transactional) {
|
||||
if (tenantName) {
|
||||
fdb::Tenant tenant = fdbDb.openTenant(*tenantName);
|
||||
fdbTx = tenant.createTransaction();
|
||||
} else {
|
||||
fdbTx = fdbDb.createTransaction();
|
||||
}
|
||||
fdbTx = fdbDbOps->createTransaction();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -109,6 +111,10 @@ public:
|
|||
|
||||
fdb::Database db() override { return fdbDb.atomic_load(); }
|
||||
|
||||
fdb::Tenant tenant() override { return fdbTenant.atomic_load(); }
|
||||
|
||||
std::shared_ptr<fdb::IDatabaseOps> dbOps() override { return std::atomic_load(&fdbDbOps); }
|
||||
|
||||
fdb::Transaction tx() override { return fdbTx.atomic_load(); }
|
||||
|
||||
// Set a continuation to be executed when a future gets ready
|
||||
|
@ -272,13 +278,17 @@ protected:
|
|||
scheduler->schedule([thisRef]() {
|
||||
fdb::Database db = thisRef->executor->selectDatabase();
|
||||
thisRef->fdbDb.atomic_store(db);
|
||||
if (thisRef->tenantName) {
|
||||
fdb::Tenant tenant = db.openTenant(*thisRef->tenantName);
|
||||
thisRef->fdbTenant.atomic_store(tenant);
|
||||
std::atomic_store(&thisRef->fdbDbOps,
|
||||
std::dynamic_pointer_cast<fdb::IDatabaseOps>(std::make_shared<fdb::Tenant>(tenant)));
|
||||
} else {
|
||||
std::atomic_store(&thisRef->fdbDbOps,
|
||||
std::dynamic_pointer_cast<fdb::IDatabaseOps>(std::make_shared<fdb::Database>(db)));
|
||||
}
|
||||
if (thisRef->transactional) {
|
||||
if (thisRef->tenantName) {
|
||||
fdb::Tenant tenant = db.openTenant(*thisRef->tenantName);
|
||||
thisRef->fdbTx.atomic_store(tenant.createTransaction());
|
||||
} else {
|
||||
thisRef->fdbTx.atomic_store(db.createTransaction());
|
||||
}
|
||||
thisRef->fdbTx.atomic_store(thisRef->fdbDbOps->createTransaction());
|
||||
}
|
||||
thisRef->restartTransaction();
|
||||
});
|
||||
|
@ -317,6 +327,14 @@ protected:
|
|||
// Provides a thread safe interface by itself (no need for mutex)
|
||||
fdb::Database fdbDb;
|
||||
|
||||
// FDB tenant
|
||||
// Provides a thread safe interface by itself (no need for mutex)
|
||||
fdb::Tenant fdbTenant;
|
||||
|
||||
// FDB IDatabaseOps to hide database/tenant accordingly.
|
||||
// Provides a shared pointer to database functions based on if db or tenant.
|
||||
std::shared_ptr<fdb::IDatabaseOps> fdbDbOps;
|
||||
|
||||
// FDB transaction
|
||||
// Provides a thread safe interface by itself (no need for mutex)
|
||||
fdb::Transaction fdbTx;
|
||||
|
|
|
@ -41,6 +41,12 @@ public:
|
|||
// Current FDB database
|
||||
virtual fdb::Database db() = 0;
|
||||
|
||||
// Current FDB tenant
|
||||
virtual fdb::Tenant tenant() = 0;
|
||||
|
||||
// Current FDB IDatabaseOps
|
||||
virtual std::shared_ptr<fdb::IDatabaseOps> dbOps() = 0;
|
||||
|
||||
// Current FDB transaction
|
||||
virtual fdb::Transaction tx() = 0;
|
||||
|
||||
|
|
|
@ -117,8 +117,11 @@ void WorkloadBase::execTransaction(TOpStartFct startFct,
|
|||
}
|
||||
|
||||
// Execute a non-transactional database operation within the workload
|
||||
void WorkloadBase::execOperation(TOpStartFct startFct, TTaskFct cont, bool failOnError) {
|
||||
doExecute(startFct, cont, {}, failOnError, false);
|
||||
void WorkloadBase::execOperation(TOpStartFct startFct,
|
||||
TTaskFct cont,
|
||||
std::optional<fdb::BytesRef> tenant,
|
||||
bool failOnError) {
|
||||
doExecute(startFct, cont, tenant, failOnError, false);
|
||||
}
|
||||
|
||||
void WorkloadBase::doExecute(TOpStartFct startFct,
|
||||
|
|
|
@ -125,7 +125,10 @@ protected:
|
|||
bool failOnError = true);
|
||||
|
||||
// Execute a non-transactional database operation within the workload
|
||||
void execOperation(TOpStartFct startFct, TTaskFct cont, bool failOnError = true);
|
||||
void execOperation(TOpStartFct startFct,
|
||||
TTaskFct cont,
|
||||
std::optional<fdb::BytesRef> tenant = std::optional<fdb::BytesRef>(),
|
||||
bool failOnError = true);
|
||||
|
||||
// Log an error message, increase error counter
|
||||
void error(const std::string& msg);
|
||||
|
|
|
@ -677,7 +677,28 @@ public:
|
|||
}
|
||||
};
|
||||
|
||||
class Tenant final {
|
||||
// Handle this as an abstract class instead of interface to preserve lifetime of fdb objects owned by Tenant and
|
||||
// Database.
|
||||
class IDatabaseOps {
|
||||
public:
|
||||
virtual ~IDatabaseOps() = default;
|
||||
|
||||
virtual Transaction createTransaction() = 0;
|
||||
|
||||
virtual TypedFuture<future_var::Bool> blobbifyRange(KeyRef begin, KeyRef end) = 0;
|
||||
virtual TypedFuture<future_var::Bool> unblobbifyRange(KeyRef begin, KeyRef end) = 0;
|
||||
virtual TypedFuture<future_var::KeyRangeRefArray> listBlobbifiedRanges(KeyRef begin,
|
||||
KeyRef end,
|
||||
int rangeLimit) = 0;
|
||||
virtual TypedFuture<future_var::Int64> verifyBlobRange(KeyRef begin, KeyRef end, int64_t version) = 0;
|
||||
virtual TypedFuture<future_var::KeyRef> purgeBlobGranules(KeyRef begin,
|
||||
KeyRef end,
|
||||
int64_t version,
|
||||
bool force) = 0;
|
||||
virtual TypedFuture<future_var::None> waitPurgeGranulesComplete(KeyRef purgeKey) = 0;
|
||||
};
|
||||
|
||||
class Tenant final : public IDatabaseOps {
|
||||
friend class Database;
|
||||
std::shared_ptr<native::FDBTenant> tenant;
|
||||
|
||||
|
@ -694,6 +715,14 @@ public:
|
|||
Tenant& operator=(const Tenant&) noexcept = default;
|
||||
Tenant() noexcept : tenant(nullptr) {}
|
||||
|
||||
void atomic_store(Tenant other) { std::atomic_store(&tenant, other.tenant); }
|
||||
|
||||
Tenant atomic_load() {
|
||||
Tenant retVal;
|
||||
retVal.tenant = std::atomic_load(&tenant);
|
||||
return retVal;
|
||||
}
|
||||
|
||||
static void createTenant(Transaction tr, BytesRef name) {
|
||||
tr.setOption(FDBTransactionOption::FDB_TR_OPTION_SPECIAL_KEY_SPACE_ENABLE_WRITES, BytesRef());
|
||||
tr.setOption(FDBTransactionOption::FDB_TR_OPTION_LOCK_AWARE, BytesRef());
|
||||
|
@ -715,7 +744,7 @@ public:
|
|||
return tr.get(toBytesRef(fmt::format("{}{}", tenantManagementMapPrefix, toCharsRef(name))), false);
|
||||
}
|
||||
|
||||
Transaction createTransaction() {
|
||||
Transaction createTransaction() override {
|
||||
auto tx_native = static_cast<native::FDBTransaction*>(nullptr);
|
||||
auto err = Error(native::fdb_tenant_create_transaction(tenant.get(), &tx_native));
|
||||
if (err)
|
||||
|
@ -723,14 +752,49 @@ public:
|
|||
return Transaction(tx_native);
|
||||
}
|
||||
|
||||
TypedFuture<future_var::Bool> blobbifyRange(KeyRef begin, KeyRef end) {
|
||||
TypedFuture<future_var::Bool> blobbifyRange(KeyRef begin, KeyRef end) override {
|
||||
if (!tenant)
|
||||
throw std::runtime_error("blobbifyRange from null tenant");
|
||||
throw std::runtime_error("blobbifyRange() from null tenant");
|
||||
return native::fdb_tenant_blobbify_range(tenant.get(), begin.data(), intSize(begin), end.data(), intSize(end));
|
||||
}
|
||||
|
||||
TypedFuture<future_var::Bool> unblobbifyRange(KeyRef begin, KeyRef end) override {
|
||||
if (!tenant)
|
||||
throw std::runtime_error("unblobbifyRange() from null tenant");
|
||||
return native::fdb_tenant_unblobbify_range(
|
||||
tenant.get(), begin.data(), intSize(begin), end.data(), intSize(end));
|
||||
}
|
||||
|
||||
TypedFuture<future_var::KeyRangeRefArray> listBlobbifiedRanges(KeyRef begin, KeyRef end, int rangeLimit) override {
|
||||
if (!tenant)
|
||||
throw std::runtime_error("listBlobbifiedRanges() from null tenant");
|
||||
return native::fdb_tenant_list_blobbified_ranges(
|
||||
tenant.get(), begin.data(), intSize(begin), end.data(), intSize(end), rangeLimit);
|
||||
}
|
||||
|
||||
TypedFuture<future_var::Int64> verifyBlobRange(KeyRef begin, KeyRef end, int64_t version) override {
|
||||
if (!tenant)
|
||||
throw std::runtime_error("verifyBlobRange() from null tenant");
|
||||
return native::fdb_tenant_verify_blob_range(
|
||||
tenant.get(), begin.data(), intSize(begin), end.data(), intSize(end), version);
|
||||
}
|
||||
|
||||
TypedFuture<future_var::KeyRef> purgeBlobGranules(KeyRef begin, KeyRef end, int64_t version, bool force) override {
|
||||
if (!tenant)
|
||||
throw std::runtime_error("purgeBlobGranules() from null tenant");
|
||||
native::fdb_bool_t forceBool = force;
|
||||
return native::fdb_tenant_purge_blob_granules(
|
||||
tenant.get(), begin.data(), intSize(begin), end.data(), intSize(end), version, forceBool);
|
||||
}
|
||||
|
||||
TypedFuture<future_var::None> waitPurgeGranulesComplete(KeyRef purgeKey) override {
|
||||
if (!tenant)
|
||||
throw std::runtime_error("waitPurgeGranulesComplete() from null tenant");
|
||||
return native::fdb_tenant_wait_purge_granules_complete(tenant.get(), purgeKey.data(), intSize(purgeKey));
|
||||
}
|
||||
};
|
||||
|
||||
class Database {
|
||||
class Database : public IDatabaseOps {
|
||||
friend class Tenant;
|
||||
std::shared_ptr<native::FDBDatabase> db;
|
||||
|
||||
|
@ -789,7 +853,7 @@ public:
|
|||
return Tenant(tenant_native);
|
||||
}
|
||||
|
||||
Transaction createTransaction() {
|
||||
Transaction createTransaction() override {
|
||||
if (!db)
|
||||
throw std::runtime_error("create_transaction from null database");
|
||||
auto tx_native = static_cast<native::FDBTransaction*>(nullptr);
|
||||
|
@ -799,33 +863,33 @@ public:
|
|||
return Transaction(tx_native);
|
||||
}
|
||||
|
||||
TypedFuture<future_var::KeyRangeRefArray> listBlobbifiedRanges(KeyRef begin, KeyRef end, int rangeLimit) {
|
||||
TypedFuture<future_var::KeyRangeRefArray> listBlobbifiedRanges(KeyRef begin, KeyRef end, int rangeLimit) override {
|
||||
if (!db)
|
||||
throw std::runtime_error("listBlobbifiedRanges from null database");
|
||||
return native::fdb_database_list_blobbified_ranges(
|
||||
db.get(), begin.data(), intSize(begin), end.data(), intSize(end), rangeLimit);
|
||||
}
|
||||
|
||||
TypedFuture<future_var::Int64> verifyBlobRange(KeyRef begin, KeyRef end, int64_t version) {
|
||||
TypedFuture<future_var::Int64> verifyBlobRange(KeyRef begin, KeyRef end, int64_t version) override {
|
||||
if (!db)
|
||||
throw std::runtime_error("verifyBlobRange from null database");
|
||||
return native::fdb_database_verify_blob_range(
|
||||
db.get(), begin.data(), intSize(begin), end.data(), intSize(end), version);
|
||||
}
|
||||
|
||||
TypedFuture<future_var::Bool> blobbifyRange(KeyRef begin, KeyRef end) {
|
||||
TypedFuture<future_var::Bool> blobbifyRange(KeyRef begin, KeyRef end) override {
|
||||
if (!db)
|
||||
throw std::runtime_error("blobbifyRange from null database");
|
||||
return native::fdb_database_blobbify_range(db.get(), begin.data(), intSize(begin), end.data(), intSize(end));
|
||||
}
|
||||
|
||||
TypedFuture<future_var::Bool> unblobbifyRange(KeyRef begin, KeyRef end) {
|
||||
TypedFuture<future_var::Bool> unblobbifyRange(KeyRef begin, KeyRef end) override {
|
||||
if (!db)
|
||||
throw std::runtime_error("unblobbifyRange from null database");
|
||||
return native::fdb_database_unblobbify_range(db.get(), begin.data(), intSize(begin), end.data(), intSize(end));
|
||||
}
|
||||
|
||||
TypedFuture<future_var::KeyRef> purgeBlobGranules(KeyRef begin, KeyRef end, int64_t version, bool force) {
|
||||
TypedFuture<future_var::KeyRef> purgeBlobGranules(KeyRef begin, KeyRef end, int64_t version, bool force) override {
|
||||
if (!db)
|
||||
throw std::runtime_error("purgeBlobGranules from null database");
|
||||
native::fdb_bool_t forceBool = force;
|
||||
|
@ -833,7 +897,7 @@ public:
|
|||
db.get(), begin.data(), intSize(begin), end.data(), intSize(end), version, forceBool);
|
||||
}
|
||||
|
||||
TypedFuture<future_var::None> waitPurgeGranulesComplete(KeyRef purgeKey) {
|
||||
TypedFuture<future_var::None> waitPurgeGranulesComplete(KeyRef purgeKey) override {
|
||||
if (!db)
|
||||
throw std::runtime_error("purgeBlobGranules from null database");
|
||||
return native::fdb_database_wait_purge_granules_complete(db.get(), purgeKey.data(), intSize(purgeKey));
|
||||
|
|
|
@ -497,6 +497,11 @@ func (o TransactionOptions) SetRawAccess() error {
|
|||
return o.setOpt(303, nil)
|
||||
}
|
||||
|
||||
// Allows this transaction to bypass storage quota enforcement. Should only be used for transactions that directly or indirectly decrease the size of the tenant group's data.
|
||||
func (o TransactionOptions) SetBypassStorageQuota() error {
|
||||
return o.setOpt(304, nil)
|
||||
}
|
||||
|
||||
// Not yet implemented.
|
||||
func (o TransactionOptions) SetDebugRetryLogging(param string) error {
|
||||
return o.setOpt(401, []byte(param))
|
||||
|
|
|
@ -54,7 +54,7 @@ def write_coverage_chunk(tr, path: Tuple[str, ...], metadata: Tuple[str, ...],
|
|||
initialized = v.present()
|
||||
for cov, covered in coverage:
|
||||
if not initialized or covered:
|
||||
tr.add(cov_dir.pack((cov.file, cov.line, cov.comment)), struct.pack('<I', 1 if covered else 0))
|
||||
tr.add(cov_dir.pack((cov.file, cov.line, cov.comment, cov.rare)), struct.pack('<I', 1 if covered else 0))
|
||||
return initialized
|
||||
|
||||
|
||||
|
@ -80,9 +80,9 @@ def _read_coverage(tr, cov_path: Tuple[str, ...]) -> OrderedDict[Coverage, int]:
|
|||
res = collections.OrderedDict()
|
||||
cov_dir = fdb.directory.create_or_open(tr, cov_path)
|
||||
for k, v in tr[cov_dir.range()]:
|
||||
file, line, comment = cov_dir.unpack(k)
|
||||
file, line, comment, rare = cov_dir.unpack(k)
|
||||
count = struct.unpack('<I', v)[0]
|
||||
res[Coverage(file, line, comment)] = count
|
||||
res[Coverage(file, line, comment, rare)] = count
|
||||
return res
|
||||
|
||||
|
||||
|
|
|
@ -19,6 +19,7 @@ class GlobalStatistics:
|
|||
self.total_cpu_time: int = 0
|
||||
self.total_test_runs: int = 0
|
||||
self.total_missed_probes: int = 0
|
||||
self.total_missed_nonrare_probes: int = 0
|
||||
|
||||
|
||||
class EnsembleResults:
|
||||
|
@ -40,6 +41,8 @@ class EnsembleResults:
|
|||
self.coverage.append((cov, count))
|
||||
if count <= self.ratio:
|
||||
self.global_statistics.total_missed_probes += 1
|
||||
if not cov.rare:
|
||||
self.global_statistics.total_missed_nonrare_probes += 1
|
||||
if self.min_coverage_hit is None or self.min_coverage_hit > count:
|
||||
self.min_coverage_hit = count
|
||||
self.coverage.sort(key=lambda x: (x[1], x[0].file, x[0].line))
|
||||
|
@ -63,9 +66,12 @@ class EnsembleResults:
|
|||
out.attributes['MinProbeHit'] = str(self.min_coverage_hit)
|
||||
out.attributes['TotalProbes'] = str(len(self.coverage))
|
||||
out.attributes['MissedProbes'] = str(self.global_statistics.total_missed_probes)
|
||||
out.attributes['MissedNonRareProbes'] = str(self.global_statistics.total_missed_nonrare_probes)
|
||||
|
||||
for cov, count in self.coverage:
|
||||
severity = 10 if count > self.ratio else 40
|
||||
severity = 10
|
||||
if count <= self.ratio:
|
||||
severity = 30 if cov.rare else 40
|
||||
if severity == 40:
|
||||
errors += 1
|
||||
if (severity == 40 and errors <= config.max_errors) or config.details:
|
||||
|
@ -75,6 +81,7 @@ class EnsembleResults:
|
|||
child.attributes['Line'] = str(cov.line)
|
||||
child.attributes['Comment'] = '' if cov.comment is None else cov.comment
|
||||
child.attributes['HitCount'] = str(count)
|
||||
child.attributes['Rare'] = str(cov.rare)
|
||||
out.append(child)
|
||||
|
||||
if config.details:
|
||||
|
|
|
@ -193,16 +193,17 @@ class JsonParser(Parser):
|
|||
|
||||
|
||||
class Coverage:
|
||||
def __init__(self, file: str, line: str | int, comment: str | None = None):
|
||||
def __init__(self, file: str, line: str | int, comment: str | None = None, rare: bool = False):
|
||||
self.file = file
|
||||
self.line = int(line)
|
||||
self.comment = comment
|
||||
self.rare = rare
|
||||
|
||||
def to_tuple(self) -> Tuple[str, int, str | None]:
|
||||
return self.file, self.line, self.comment
|
||||
return self.file, self.line, self.comment, self.rare
|
||||
|
||||
def __eq__(self, other) -> bool:
|
||||
if isinstance(other, tuple) and len(other) == 3:
|
||||
if isinstance(other, tuple) and len(other) == 4:
|
||||
return self.to_tuple() == other
|
||||
elif isinstance(other, Coverage):
|
||||
return self.to_tuple() == other.to_tuple()
|
||||
|
@ -210,7 +211,7 @@ class Coverage:
|
|||
return False
|
||||
|
||||
def __lt__(self, other) -> bool:
|
||||
if isinstance(other, tuple) and len(other) == 3:
|
||||
if isinstance(other, tuple) and len(other) == 4:
|
||||
return self.to_tuple() < other
|
||||
elif isinstance(other, Coverage):
|
||||
return self.to_tuple() < other.to_tuple()
|
||||
|
@ -218,7 +219,7 @@ class Coverage:
|
|||
return False
|
||||
|
||||
def __le__(self, other) -> bool:
|
||||
if isinstance(other, tuple) and len(other) == 3:
|
||||
if isinstance(other, tuple) and len(other) == 4:
|
||||
return self.to_tuple() <= other
|
||||
elif isinstance(other, Coverage):
|
||||
return self.to_tuple() <= other.to_tuple()
|
||||
|
@ -226,7 +227,7 @@ class Coverage:
|
|||
return False
|
||||
|
||||
def __gt__(self, other: Coverage) -> bool:
|
||||
if isinstance(other, tuple) and len(other) == 3:
|
||||
if isinstance(other, tuple) and len(other) == 4:
|
||||
return self.to_tuple() > other
|
||||
elif isinstance(other, Coverage):
|
||||
return self.to_tuple() > other.to_tuple()
|
||||
|
@ -234,7 +235,7 @@ class Coverage:
|
|||
return False
|
||||
|
||||
def __ge__(self, other):
|
||||
if isinstance(other, tuple) and len(other) == 3:
|
||||
if isinstance(other, tuple) and len(other) == 4:
|
||||
return self.to_tuple() >= other
|
||||
elif isinstance(other, Coverage):
|
||||
return self.to_tuple() >= other.to_tuple()
|
||||
|
@ -242,7 +243,7 @@ class Coverage:
|
|||
return False
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.file, self.line, self.comment))
|
||||
return hash((self.file, self.line, self.comment, self.rare))
|
||||
|
||||
|
||||
class TraceFiles:
|
||||
|
@ -378,6 +379,7 @@ class Summary:
|
|||
child = SummaryTree('CodeCoverage')
|
||||
child.attributes['File'] = k.file
|
||||
child.attributes['Line'] = str(k.line)
|
||||
child.attributes['Rare'] = k.rare
|
||||
if not v:
|
||||
child.attributes['Covered'] = '0'
|
||||
if k.comment is not None and len(k.comment):
|
||||
|
@ -595,7 +597,10 @@ class Summary:
|
|||
comment = ''
|
||||
if 'Comment' in attrs:
|
||||
comment = attrs['Comment']
|
||||
c = Coverage(attrs['File'], attrs['Line'], comment)
|
||||
rare = False
|
||||
if 'Rare' in attrs:
|
||||
rare = bool(int(attrs['Rare']))
|
||||
c = Coverage(attrs['File'], attrs['Line'], comment, rare)
|
||||
if covered or c not in self.coverage:
|
||||
self.coverage[c] = covered
|
||||
|
||||
|
|
|
@ -116,12 +116,12 @@ If an individual zone is unhealthy, it may cause the throttling ratio for storag
|
|||
### Client Rate Calculation
|
||||
The smoothed per-client rate for each tag is tracked within `GlobalTagThrottlerImpl::PerTagStatistics`. Once a target rate has been computed, this is passed to `GlobalTagThrotterImpl::PerTagStatistics::updateAndGetPerClientRate` which adjusts the per-client rate. The per-client rate is meant to limit the busiest clients, so that at equilibrium, the per-client rate will remain constant and the sum of throughput from all clients will match the target rate.
|
||||
|
||||
## Testing
|
||||
The `GlobalTagThrottling.toml` test provides a simple end-to-end test using the global tag throttler. Quotas are set using the internal tag quota API in the `GlobalTagThrottling` workload. This is run in parallel with the `ReadWrite` workload, which tags transactions. The number of `transaction_tag_throttled` errors is reported, along with the throughput, which should be roughly predictable based on the quota parameters chosen.
|
||||
## Simulation Testing
|
||||
The `ThroughputQuota.toml` test provides a simple end-to-end test using the global tag throttler. Quotas are set using the internal tag quota API in the `ThroughputQuota` workload. This is run with the `Cycle` workload, which randomly tags transactions.
|
||||
|
||||
In addition to this end-to-end test, there is a suite of unit tests with the `/GlobalTagThrottler/` prefix. These tests run in a mock environment, with mock storage servers providing simulated storage queue statistics and tag busyness reports. Mock clients simulate workload on these mock storage servers, and get throttling feedback directly from a global tag throttler which is monitoring the mock storage servers.
|
||||
|
||||
In each test, the `GlobalTagThrottlerTesting::monitor` function is used to periodically check whether or not a desired equilibrium state has been reached. If the desired state is reached and maintained for a sufficient period of time, the test passes. If the unit test is unable to reach this desired equilibrium state before a timeout, the test will fail. Commonly, the desired state is for the global tag throttler to report a client rate sufficiently close to the desired rate specified as an input to the `GlobalTagThrottlerTesting::rateIsNear` function.
|
||||
In each unit test, the `GlobalTagThrottlerTesting::monitor` function is used to periodically check whether or not a desired equilibrium state has been reached. If the desired state is reached and maintained for a sufficient period of time, the test passes. If the unit test is unable to reach this desired equilibrium state before a timeout, the test will fail. Commonly, the desired state is for the global tag throttler to report a client rate sufficiently close to the desired rate specified as an input to the `GlobalTagThrottlerTesting::rateIsNear` function.
|
||||
|
||||
## Visibility
|
||||
|
||||
|
|
|
@ -107,9 +107,9 @@ struct ConvertParams {
|
|||
bool log_enabled = false;
|
||||
std::string log_dir, trace_format, trace_log_group;
|
||||
|
||||
bool isValid() { return begin != invalidVersion && end != invalidVersion && !container_url.empty(); }
|
||||
bool isValid() const { return begin != invalidVersion && end != invalidVersion && !container_url.empty(); }
|
||||
|
||||
std::string toString() {
|
||||
std::string toString() const {
|
||||
std::string s;
|
||||
s.append("ContainerURL:");
|
||||
s.append(container_url);
|
||||
|
|
|
@ -19,11 +19,13 @@
|
|||
*/
|
||||
|
||||
#include "fdbcli/fdbcli.actor.h"
|
||||
#include "fdbclient/ManagementAPI.actor.h"
|
||||
#include "fdbclient/SystemData.h"
|
||||
#include "flow/actorcompiler.h" // This must be the last include
|
||||
|
||||
namespace {
|
||||
|
||||
enum class LimitType { RESERVED, TOTAL };
|
||||
enum class QuotaType { RESERVED, TOTAL, STORAGE };
|
||||
|
||||
Optional<TransactionTag> parseTag(StringRef token) {
|
||||
if (token.size() > CLIENT_KNOBS->MAX_TRANSACTION_TAG_LENGTH) {
|
||||
|
@ -33,17 +35,19 @@ Optional<TransactionTag> parseTag(StringRef token) {
|
|||
}
|
||||
}
|
||||
|
||||
Optional<LimitType> parseLimitType(StringRef token) {
|
||||
Optional<QuotaType> parseQuotaType(StringRef token) {
|
||||
if (token == "reserved_throughput"_sr) {
|
||||
return LimitType::RESERVED;
|
||||
return QuotaType::RESERVED;
|
||||
} else if (token == "total_throughput"_sr) {
|
||||
return LimitType::TOTAL;
|
||||
return QuotaType::TOTAL;
|
||||
} else if (token == "storage"_sr) {
|
||||
return QuotaType::STORAGE;
|
||||
} else {
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
Optional<int64_t> parseLimitValue(StringRef token) {
|
||||
Optional<int64_t> parseQuotaValue(StringRef token) {
|
||||
try {
|
||||
return std::stol(token.toString());
|
||||
} catch (...) {
|
||||
|
@ -51,20 +55,26 @@ Optional<int64_t> parseLimitValue(StringRef token) {
|
|||
}
|
||||
}
|
||||
|
||||
ACTOR Future<Void> getQuota(Reference<IDatabase> db, TransactionTag tag, LimitType limitType) {
|
||||
ACTOR Future<Void> getQuota(Reference<IDatabase> db, TransactionTag tag, QuotaType quotaType) {
|
||||
state Reference<ITransaction> tr = db->createTransaction();
|
||||
loop {
|
||||
tr->setOption(FDBTransactionOptions::READ_SYSTEM_KEYS);
|
||||
try {
|
||||
state ThreadFuture<Optional<Value>> resultFuture = tr->get(ThrottleApi::getTagQuotaKey(tag));
|
||||
state ThreadFuture<Optional<Value>> resultFuture =
|
||||
tr->get(quotaType == QuotaType::STORAGE ? storageQuotaKey(tag) : ThrottleApi::getTagQuotaKey(tag));
|
||||
Optional<Value> v = wait(safeThreadFutureToFuture(resultFuture));
|
||||
if (!v.present()) {
|
||||
fmt::print("<empty>\n");
|
||||
} else {
|
||||
if (quotaType == QuotaType::STORAGE) {
|
||||
int64_t storageQuota = BinaryReader::fromStringRef<int64_t>(v.get(), Unversioned());
|
||||
fmt::print("{}\n", storageQuota);
|
||||
return Void();
|
||||
}
|
||||
auto const quota = ThrottleApi::TagQuotaValue::fromValue(v.get());
|
||||
if (limitType == LimitType::TOTAL) {
|
||||
if (quotaType == QuotaType::TOTAL) {
|
||||
fmt::print("{}\n", quota.totalQuota);
|
||||
} else if (limitType == LimitType::RESERVED) {
|
||||
} else if (quotaType == QuotaType::RESERVED) {
|
||||
fmt::print("{}\n", quota.reservedQuota);
|
||||
}
|
||||
}
|
||||
|
@ -75,32 +85,36 @@ ACTOR Future<Void> getQuota(Reference<IDatabase> db, TransactionTag tag, LimitTy
|
|||
}
|
||||
}
|
||||
|
||||
ACTOR Future<Void> setQuota(Reference<IDatabase> db, TransactionTag tag, LimitType limitType, int64_t value) {
|
||||
ACTOR Future<Void> setQuota(Reference<IDatabase> db, TransactionTag tag, QuotaType quotaType, int64_t value) {
|
||||
state Reference<ITransaction> tr = db->createTransaction();
|
||||
loop {
|
||||
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||
try {
|
||||
state ThreadFuture<Optional<Value>> resultFuture = tr->get(ThrottleApi::getTagQuotaKey(tag));
|
||||
Optional<Value> v = wait(safeThreadFutureToFuture(resultFuture));
|
||||
ThrottleApi::TagQuotaValue quota;
|
||||
if (v.present()) {
|
||||
quota = ThrottleApi::TagQuotaValue::fromValue(v.get());
|
||||
if (quotaType == QuotaType::STORAGE) {
|
||||
tr->set(storageQuotaKey(tag), BinaryWriter::toValue<int64_t>(value, Unversioned()));
|
||||
} else {
|
||||
state ThreadFuture<Optional<Value>> resultFuture = tr->get(ThrottleApi::getTagQuotaKey(tag));
|
||||
Optional<Value> v = wait(safeThreadFutureToFuture(resultFuture));
|
||||
ThrottleApi::TagQuotaValue quota;
|
||||
if (v.present()) {
|
||||
quota = ThrottleApi::TagQuotaValue::fromValue(v.get());
|
||||
}
|
||||
// Internally, costs are stored in terms of pages, but in the API,
|
||||
// costs are specified in terms of bytes
|
||||
if (quotaType == QuotaType::TOTAL) {
|
||||
// Round up to nearest page size
|
||||
quota.totalQuota = ((value - 1) / CLIENT_KNOBS->TAG_THROTTLING_PAGE_SIZE + 1) *
|
||||
CLIENT_KNOBS->TAG_THROTTLING_PAGE_SIZE;
|
||||
} else if (quotaType == QuotaType::RESERVED) {
|
||||
// Round up to nearest page size
|
||||
quota.reservedQuota = ((value - 1) / CLIENT_KNOBS->TAG_THROTTLING_PAGE_SIZE + 1) *
|
||||
CLIENT_KNOBS->TAG_THROTTLING_PAGE_SIZE;
|
||||
}
|
||||
if (!quota.isValid()) {
|
||||
throw invalid_throttle_quota_value();
|
||||
}
|
||||
ThrottleApi::setTagQuota(tr, tag, quota.reservedQuota, quota.totalQuota);
|
||||
}
|
||||
// Internally, costs are stored in terms of pages, but in the API,
|
||||
// costs are specified in terms of bytes
|
||||
if (limitType == LimitType::TOTAL) {
|
||||
// Round up to nearest page size
|
||||
quota.totalQuota =
|
||||
((value - 1) / CLIENT_KNOBS->TAG_THROTTLING_PAGE_SIZE + 1) * CLIENT_KNOBS->TAG_THROTTLING_PAGE_SIZE;
|
||||
} else if (limitType == LimitType::RESERVED) {
|
||||
// Round up to nearest page size
|
||||
quota.reservedQuota =
|
||||
((value - 1) / CLIENT_KNOBS->TAG_THROTTLING_PAGE_SIZE + 1) * CLIENT_KNOBS->TAG_THROTTLING_PAGE_SIZE;
|
||||
}
|
||||
if (!quota.isValid()) {
|
||||
throw invalid_throttle_quota_value();
|
||||
}
|
||||
ThrottleApi::setTagQuota(tr, tag, quota.reservedQuota, quota.totalQuota);
|
||||
wait(safeThreadFutureToFuture(tr->commit()));
|
||||
fmt::print("Successfully updated quota.\n");
|
||||
return Void();
|
||||
|
@ -115,6 +129,7 @@ ACTOR Future<Void> clearQuota(Reference<IDatabase> db, TransactionTag tag) {
|
|||
loop {
|
||||
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||
try {
|
||||
tr->clear(storageQuotaKey(tag));
|
||||
tr->clear(ThrottleApi::getTagQuotaKey(tag));
|
||||
wait(safeThreadFutureToFuture(tr->commit()));
|
||||
fmt::print("Successfully cleared quota.\n");
|
||||
|
@ -125,8 +140,8 @@ ACTOR Future<Void> clearQuota(Reference<IDatabase> db, TransactionTag tag) {
|
|||
}
|
||||
}
|
||||
|
||||
constexpr auto usage = "quota [get <tag> [reserved_throughput|total_throughput] | set <tag> "
|
||||
"[reserved_throughput|total_throughput] <value> | clear <tag>]";
|
||||
constexpr auto usage = "quota [get <tag> [reserved_throughput|total_throughput|storage] | set <tag> "
|
||||
"[reserved_throughput|total_throughput|storage] <value> | clear <tag>]";
|
||||
|
||||
bool exitFailure() {
|
||||
fmt::print(usage);
|
||||
|
@ -150,22 +165,22 @@ ACTOR Future<bool> quotaCommandActor(Reference<IDatabase> db, std::vector<String
|
|||
if (tokens.size() != 4) {
|
||||
return exitFailure();
|
||||
}
|
||||
auto const limitType = parseLimitType(tokens[3]);
|
||||
if (!limitType.present()) {
|
||||
auto const quotaType = parseQuotaType(tokens[3]);
|
||||
if (!quotaType.present()) {
|
||||
return exitFailure();
|
||||
}
|
||||
wait(getQuota(db, tag.get(), limitType.get()));
|
||||
wait(getQuota(db, tag.get(), quotaType.get()));
|
||||
return true;
|
||||
} else if (tokens[1] == "set"_sr) {
|
||||
if (tokens.size() != 5) {
|
||||
return exitFailure();
|
||||
}
|
||||
auto const limitType = parseLimitType(tokens[3]);
|
||||
auto const limitValue = parseLimitValue(tokens[4]);
|
||||
if (!limitType.present() || !limitValue.present()) {
|
||||
auto const quotaType = parseQuotaType(tokens[3]);
|
||||
auto const quotaValue = parseQuotaValue(tokens[4]);
|
||||
if (!quotaType.present() || !quotaValue.present()) {
|
||||
return exitFailure();
|
||||
}
|
||||
wait(setQuota(db, tag.get(), limitType.get(), limitValue.get()));
|
||||
wait(setQuota(db, tag.get(), quotaType.get(), quotaValue.get()));
|
||||
return true;
|
||||
} else if (tokens[1] == "clear"_sr) {
|
||||
if (tokens.size() != 3) {
|
||||
|
|
|
@ -137,6 +137,11 @@ def quota(logger):
|
|||
logger.debug(command + ' : ' + output)
|
||||
assert output == 'Successfully updated quota.'
|
||||
|
||||
command = 'quota set green storage 98765'
|
||||
output = run_fdbcli_command(command)
|
||||
logger.debug(command + ' : ' + output)
|
||||
assert output == 'Successfully updated quota.'
|
||||
|
||||
command = 'quota get green total_throughput'
|
||||
output = run_fdbcli_command(command)
|
||||
logger.debug(command + ' : ' + output)
|
||||
|
@ -147,6 +152,11 @@ def quota(logger):
|
|||
logger.debug(command + ' : ' + output)
|
||||
assert output == '16384'
|
||||
|
||||
command = 'quota get green storage'
|
||||
output = run_fdbcli_command(command)
|
||||
logger.debug(command + ' : ' + output)
|
||||
assert output == '98765'
|
||||
|
||||
command = 'quota clear green'
|
||||
output = run_fdbcli_command(command)
|
||||
logger.debug(command + ' : ' + output)
|
||||
|
@ -157,6 +167,11 @@ def quota(logger):
|
|||
logger.debug(command + ' : ' + output)
|
||||
assert output == '<empty>'
|
||||
|
||||
command = 'quota get green storage'
|
||||
output = run_fdbcli_command(command)
|
||||
logger.debug(command + ' : ' + output)
|
||||
assert output == '<empty>'
|
||||
|
||||
# Too few arguments, should log help message
|
||||
command = 'quota get green'
|
||||
output = run_fdbcli_command(command)
|
||||
|
|
|
@ -971,6 +971,11 @@ void sortDeltasByKey(const Standalone<GranuleDeltas>& deltasByVersion,
|
|||
// clearVersion as previous guy)
|
||||
}
|
||||
|
||||
void sortDeltasByKey(const Standalone<GranuleDeltas>& deltasByVersion, const KeyRangeRef& fileRange) {
|
||||
SortedDeltasT deltasByKey;
|
||||
sortDeltasByKey(deltasByVersion, fileRange, deltasByKey);
|
||||
}
|
||||
|
||||
// FIXME: Could maybe reduce duplicated code between this and chunkedSnapshot for chunking
|
||||
Value serializeChunkedDeltaFile(const Standalone<StringRef>& fileNameRef,
|
||||
const Standalone<GranuleDeltas>& deltas,
|
||||
|
|
|
@ -5924,7 +5924,6 @@ public:
|
|||
printf("Restoring backup to version: %lld\n", (long long)targetVersion);
|
||||
}
|
||||
|
||||
state int retryCount = 0;
|
||||
state Reference<ReadYourWritesTransaction> tr(new ReadYourWritesTransaction(cx));
|
||||
loop {
|
||||
try {
|
||||
|
@ -5948,17 +5947,9 @@ public:
|
|||
wait(tr->commit());
|
||||
break;
|
||||
} catch (Error& e) {
|
||||
if (e.code() == error_code_transaction_too_old) {
|
||||
retryCount++;
|
||||
}
|
||||
if (e.code() == error_code_restore_duplicate_tag) {
|
||||
throw;
|
||||
}
|
||||
if (g_network->isSimulated() && retryCount > 50) {
|
||||
CODE_PROBE(true, "submitRestore simulation speedup");
|
||||
// try to make the read window back to normal size (5 * version_per_sec)
|
||||
g_simulator->speedUpSimulation = true;
|
||||
}
|
||||
wait(tr->onError(e));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -2145,6 +2145,9 @@ ACTOR Future<Void> lockDatabase(Reference<ReadYourWritesTransaction> tr, UID id)
|
|||
|
||||
ACTOR Future<Void> lockDatabase(Database cx, UID id) {
|
||||
state Transaction tr(cx);
|
||||
UID debugID = deterministicRandom()->randomUniqueID();
|
||||
TraceEvent("LockDatabaseTransaction", debugID).log();
|
||||
tr.debugTransaction(debugID);
|
||||
loop {
|
||||
try {
|
||||
wait(lockDatabase(&tr, id));
|
||||
|
|
|
@ -965,7 +965,8 @@ ACTOR Future<MonitorLeaderInfo> monitorProxiesOneGeneration(
|
|||
allConnectionsFailed = false;
|
||||
} else {
|
||||
CODE_PROBE(rep.getError().code() == error_code_failed_to_progress,
|
||||
"Coordinator cant talk to cluster controller");
|
||||
"Coordinator cant talk to cluster controller",
|
||||
probe::decoration::rare);
|
||||
TraceEvent("MonitorProxiesConnectFailed")
|
||||
.detail("Error", rep.getError().name())
|
||||
.detail("Coordinator", clientLeaderServer.getAddressString());
|
||||
|
|
|
@ -2175,7 +2175,7 @@ void DatabaseContext::removeWatch() {
|
|||
ASSERT(outstandingWatches >= 0);
|
||||
}
|
||||
|
||||
Future<Void> DatabaseContext::onConnected() {
|
||||
Future<Void> DatabaseContext::onConnected() const {
|
||||
return connected;
|
||||
}
|
||||
|
||||
|
@ -2802,26 +2802,26 @@ void GetRangeLimits::decrement(MappedKeyValueRef const& data) {
|
|||
}
|
||||
|
||||
// True if either the row or byte limit has been reached
|
||||
bool GetRangeLimits::isReached() {
|
||||
bool GetRangeLimits::isReached() const {
|
||||
return rows == 0 || (bytes == 0 && minRows == 0);
|
||||
}
|
||||
|
||||
// True if data would cause the row or byte limit to be reached
|
||||
bool GetRangeLimits::reachedBy(VectorRef<KeyValueRef> const& data) {
|
||||
bool GetRangeLimits::reachedBy(VectorRef<KeyValueRef> const& data) const {
|
||||
return (rows != GetRangeLimits::ROW_LIMIT_UNLIMITED && data.size() >= rows) ||
|
||||
(bytes != GetRangeLimits::BYTE_LIMIT_UNLIMITED &&
|
||||
(int)data.expectedSize() + (8 - (int)sizeof(KeyValueRef)) * data.size() >= bytes && data.size() >= minRows);
|
||||
}
|
||||
|
||||
bool GetRangeLimits::hasByteLimit() {
|
||||
bool GetRangeLimits::hasByteLimit() const {
|
||||
return bytes != GetRangeLimits::BYTE_LIMIT_UNLIMITED;
|
||||
}
|
||||
|
||||
bool GetRangeLimits::hasRowLimit() {
|
||||
bool GetRangeLimits::hasRowLimit() const {
|
||||
return rows != GetRangeLimits::ROW_LIMIT_UNLIMITED;
|
||||
}
|
||||
|
||||
bool GetRangeLimits::hasSatisfiedMinRows() {
|
||||
bool GetRangeLimits::hasSatisfiedMinRows() const {
|
||||
return hasByteLimit() && minRows == 0;
|
||||
}
|
||||
|
||||
|
@ -4771,7 +4771,8 @@ static Future<Void> tssStreamComparison(Request request,
|
|||
TSS_traceMismatch(mismatchEvent, request, ssReply.get(), tssReply.get());
|
||||
|
||||
CODE_PROBE(FLOW_KNOBS->LOAD_BALANCE_TSS_MISMATCH_TRACE_FULL,
|
||||
"Tracing Full TSS Mismatch in stream comparison");
|
||||
"Tracing Full TSS Mismatch in stream comparison",
|
||||
probe::decoration::rare);
|
||||
CODE_PROBE(!FLOW_KNOBS->LOAD_BALANCE_TSS_MISMATCH_TRACE_FULL,
|
||||
"Tracing Partial TSS Mismatch in stream comparison and storing the rest in FDB");
|
||||
|
||||
|
@ -4813,7 +4814,7 @@ maybeDuplicateTSSStreamFragment(Request& req, QueueModel* model, RequestStream<R
|
|||
Optional<TSSEndpointData> tssData = model->getTssData(ssStream->getEndpoint().token.first());
|
||||
|
||||
if (tssData.present()) {
|
||||
CODE_PROBE(true, "duplicating stream to TSS");
|
||||
CODE_PROBE(true, "duplicating stream to TSS", probe::decoration::rare);
|
||||
resetReply(req);
|
||||
// FIXME: optimize to avoid creating new netNotifiedQueueWithAcknowledgements for each stream duplication
|
||||
RequestStream<Request> tssRequestStream(tssData.get().endpoint);
|
||||
|
@ -5952,6 +5953,7 @@ void TransactionOptions::clear() {
|
|||
useGrvCache = false;
|
||||
skipGrvCache = false;
|
||||
rawAccess = false;
|
||||
bypassStorageQuota = false;
|
||||
}
|
||||
|
||||
TransactionOptions::TransactionOptions() {
|
||||
|
@ -6693,6 +6695,9 @@ Future<Void> Transaction::commitMutations() {
|
|||
if (trState->options.firstInBatch) {
|
||||
tr.flags = tr.flags | CommitTransactionRequest::FLAG_FIRST_IN_BATCH;
|
||||
}
|
||||
if (trState->options.bypassStorageQuota) {
|
||||
tr.flags = tr.flags | CommitTransactionRequest::FLAG_BYPASS_STORAGE_QUOTA;
|
||||
}
|
||||
if (trState->options.reportConflictingKeys) {
|
||||
tr.transaction.report_conflicting_keys = true;
|
||||
}
|
||||
|
@ -6971,6 +6976,10 @@ void Transaction::setOption(FDBTransactionOptions::Option option, Optional<Strin
|
|||
trState->options.rawAccess = true;
|
||||
break;
|
||||
|
||||
case FDBTransactionOptions::BYPASS_STORAGE_QUOTA:
|
||||
trState->options.bypassStorageQuota = true;
|
||||
break;
|
||||
|
||||
case FDBTransactionOptions::AUTHORIZATION_TOKEN:
|
||||
if (value.present())
|
||||
trState->authToken = Standalone<StringRef>(value.get());
|
||||
|
@ -9406,7 +9415,8 @@ void handleTSSChangeFeedMismatch(const ChangeFeedStreamRequest& request,
|
|||
mismatchEvent.detail("TSSVersion", tssVersion);
|
||||
|
||||
CODE_PROBE(FLOW_KNOBS->LOAD_BALANCE_TSS_MISMATCH_TRACE_FULL,
|
||||
"Tracing Full TSS Feed Mismatch in stream comparison");
|
||||
"Tracing Full TSS Feed Mismatch in stream comparison",
|
||||
probe::decoration::rare);
|
||||
CODE_PROBE(!FLOW_KNOBS->LOAD_BALANCE_TSS_MISMATCH_TRACE_FULL,
|
||||
"Tracing Partial TSS Feed Mismatch in stream comparison and storing the rest in FDB");
|
||||
|
||||
|
|
|
@ -1654,7 +1654,7 @@ Future<RangeResult> ReadYourWritesTransaction::getRange(KeySelector begin,
|
|||
|
||||
// This optimization prevents nullptr operations from being added to the conflict range
|
||||
if (limits.isReached()) {
|
||||
CODE_PROBE(true, "RYW range read limit 0", probe::decoration::rare);
|
||||
CODE_PROBE(true, "RYW range read limit 0");
|
||||
return RangeResult();
|
||||
}
|
||||
|
||||
|
@ -1668,7 +1668,7 @@ Future<RangeResult> ReadYourWritesTransaction::getRange(KeySelector begin,
|
|||
end.removeOrEqual(end.arena());
|
||||
|
||||
if (begin.offset >= end.offset && begin.getKey() >= end.getKey()) {
|
||||
CODE_PROBE(true, "RYW range inverted", probe::decoration::rare);
|
||||
CODE_PROBE(true, "RYW range inverted");
|
||||
return RangeResult();
|
||||
}
|
||||
|
||||
|
@ -1698,7 +1698,7 @@ Future<MappedRangeResult> ReadYourWritesTransaction::getMappedRange(KeySelector
|
|||
if (getDatabase()->apiVersionAtLeast(630)) {
|
||||
if (specialKeys.contains(begin.getKey()) && specialKeys.begin <= end.getKey() &&
|
||||
end.getKey() <= specialKeys.end) {
|
||||
CODE_PROBE(true, "Special key space get range (getMappedRange)");
|
||||
CODE_PROBE(true, "Special key space get range (getMappedRange)", probe::decoration::rare);
|
||||
throw client_invalid_operation(); // Not support special keys.
|
||||
}
|
||||
} else {
|
||||
|
@ -1720,7 +1720,7 @@ Future<MappedRangeResult> ReadYourWritesTransaction::getMappedRange(KeySelector
|
|||
|
||||
// This optimization prevents nullptr operations from being added to the conflict range
|
||||
if (limits.isReached()) {
|
||||
CODE_PROBE(true, "RYW range read limit 0 (getMappedRange)");
|
||||
CODE_PROBE(true, "RYW range read limit 0 (getMappedRange)", probe::decoration::rare);
|
||||
return MappedRangeResult();
|
||||
}
|
||||
|
||||
|
@ -1734,7 +1734,7 @@ Future<MappedRangeResult> ReadYourWritesTransaction::getMappedRange(KeySelector
|
|||
end.removeOrEqual(end.arena());
|
||||
|
||||
if (begin.offset >= end.offset && begin.getKey() >= end.getKey()) {
|
||||
CODE_PROBE(true, "RYW range inverted (getMappedRange)");
|
||||
CODE_PROBE(true, "RYW range inverted (getMappedRange)", probe::decoration::rare);
|
||||
return MappedRangeResult();
|
||||
}
|
||||
|
||||
|
|
|
@ -821,10 +821,14 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
|
|||
init( QUICK_GET_KEY_VALUES_LIMIT, 2000 );
|
||||
init( QUICK_GET_KEY_VALUES_LIMIT_BYTES, 1e7 );
|
||||
init( STORAGE_FEED_QUERY_HARD_LIMIT, 100000 );
|
||||
// Read priority definitions in the form of a list of their relative concurrency share weights
|
||||
init( STORAGESERVER_READ_PRIORITIES, "120,10,20,40,60" );
|
||||
// The total concurrency which will be shared by active priorities according to their relative weights
|
||||
init( STORAGE_SERVER_READ_CONCURRENCY, 70 );
|
||||
// Priorities which each ReadType maps to, in enumeration order
|
||||
init( STORAGESERVER_READ_RANKS, "0,2,1,1,1" );
|
||||
init( STORAGESERVER_READ_PRIORITIES, "48,32,8" );
|
||||
// The priority number which each ReadType maps to in enumeration order
|
||||
// This exists for flexibility but assigning each ReadType to its own unique priority number makes the most sense
|
||||
// The enumeration is currently: eager, fetch, low, normal, high
|
||||
init( STORAGESERVER_READTYPE_PRIORITY_MAP, "0,1,2,3,4" );
|
||||
|
||||
//Wait Failure
|
||||
init( MAX_OUTSTANDING_WAIT_FAILURE_REQUESTS, 250 ); if( randomize && BUGGIFY ) MAX_OUTSTANDING_WAIT_FAILURE_REQUESTS = 2;
|
||||
|
@ -948,7 +952,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
|
|||
init( REDWOOD_HISTOGRAM_INTERVAL, 30.0 );
|
||||
init( REDWOOD_EVICT_UPDATED_PAGES, true ); if( randomize && BUGGIFY ) { REDWOOD_EVICT_UPDATED_PAGES = false; }
|
||||
init( REDWOOD_DECODECACHE_REUSE_MIN_HEIGHT, 2 ); if( randomize && BUGGIFY ) { REDWOOD_DECODECACHE_REUSE_MIN_HEIGHT = deterministicRandom()->randomInt(1, 7); }
|
||||
init( REDWOOD_PRIORITY_LAUNCHS, "32,32,32,32" );
|
||||
init( REDWOOD_IO_PRIORITIES, "32,32,32,32" );
|
||||
init( REDWOOD_SPLIT_ENCRYPTED_PAGES_BY_TENANT, false );
|
||||
|
||||
// Server request latency measurement
|
||||
|
@ -1022,6 +1026,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
|
|||
init( BLOB_MANIFEST_BACKUP_INTERVAL, isSimulated ? 5.0 : 30.0 );
|
||||
init( BLOB_FULL_RESTORE_MODE, false );
|
||||
init( BLOB_MIGRATOR_CHECK_INTERVAL, isSimulated ? 1.0 : 5.0);
|
||||
init( BLOB_MANIFEST_RW_ROWS, isSimulated ? 10 : 1000);
|
||||
|
||||
init( BGCC_TIMEOUT, isSimulated ? 10.0 : 120.0 );
|
||||
init( BGCC_MIN_INTERVAL, isSimulated ? 1.0 : 10.0 );
|
||||
|
|
|
@ -56,4 +56,7 @@ RangeResult materializeBlobGranule(const BlobGranuleChunkRef& chunk,
|
|||
|
||||
std::string randomBGFilename(UID blobWorkerID, UID granuleID, Version version, std::string suffix);
|
||||
|
||||
#endif
|
||||
// For benchmark testing only. It should never be called in prod.
|
||||
void sortDeltasByKey(const Standalone<GranuleDeltas>& deltasByVersion, const KeyRangeRef& fileRange);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -196,10 +196,11 @@ struct CommitID {
|
|||
|
||||
struct CommitTransactionRequest : TimedRequest {
|
||||
constexpr static FileIdentifier file_identifier = 93948;
|
||||
enum { FLAG_IS_LOCK_AWARE = 0x1, FLAG_FIRST_IN_BATCH = 0x2 };
|
||||
enum { FLAG_IS_LOCK_AWARE = 0x1, FLAG_FIRST_IN_BATCH = 0x2, FLAG_BYPASS_STORAGE_QUOTA = 0x4 };
|
||||
|
||||
bool isLockAware() const { return (flags & FLAG_IS_LOCK_AWARE) != 0; }
|
||||
bool firstInBatch() const { return (flags & FLAG_FIRST_IN_BATCH) != 0; }
|
||||
bool bypassStorageQuota() const { return (flags & FLAG_BYPASS_STORAGE_QUOTA) != 0; }
|
||||
|
||||
Arena arena;
|
||||
SpanContext spanContext;
|
||||
|
|
|
@ -353,8 +353,9 @@ public:
|
|||
|
||||
int apiVersionAtLeast(int minVersion) const { return apiVersion.version() >= minVersion; }
|
||||
|
||||
Future<Void> onConnected(); // Returns after a majority of coordination servers are available and have reported a
|
||||
// leader. The cluster file therefore is valid, but the database might be unavailable.
|
||||
Future<Void> onConnected()
|
||||
const; // Returns after a majority of coordination servers are available and have reported a
|
||||
// leader. The cluster file therefore is valid, but the database might be unavailable.
|
||||
Reference<IClusterConnectionRecord> getConnectionRecord();
|
||||
|
||||
// Switch the database to use the new connection file, and recreate all pending watches for committed transactions.
|
||||
|
|
|
@ -706,15 +706,15 @@ struct GetRangeLimits {
|
|||
void decrement(MappedKeyValueRef const& data);
|
||||
|
||||
// True if either the row or byte limit has been reached
|
||||
bool isReached();
|
||||
bool isReached() const;
|
||||
|
||||
// True if data would cause the row or byte limit to be reached
|
||||
bool reachedBy(VectorRef<KeyValueRef> const& data);
|
||||
bool reachedBy(VectorRef<KeyValueRef> const& data) const;
|
||||
|
||||
bool hasByteLimit();
|
||||
bool hasRowLimit();
|
||||
bool hasByteLimit() const;
|
||||
bool hasRowLimit() const;
|
||||
|
||||
bool hasSatisfiedMinRows();
|
||||
bool hasSatisfiedMinRows() const;
|
||||
bool isValid() const {
|
||||
return (rows >= 0 || rows == ROW_LIMIT_UNLIMITED) && (bytes >= 0 || bytes == BYTE_LIMIT_UNLIMITED) &&
|
||||
minRows >= 0 && (minRows <= rows || rows == ROW_LIMIT_UNLIMITED);
|
||||
|
|
|
@ -161,6 +161,7 @@ struct TransactionOptions {
|
|||
bool useGrvCache : 1;
|
||||
bool skipGrvCache : 1;
|
||||
bool rawAccess : 1;
|
||||
bool bypassStorageQuota : 1;
|
||||
|
||||
TransactionPriority priority;
|
||||
|
||||
|
|
|
@ -772,9 +772,9 @@ public:
|
|||
int QUICK_GET_KEY_VALUES_LIMIT;
|
||||
int QUICK_GET_KEY_VALUES_LIMIT_BYTES;
|
||||
int STORAGE_FEED_QUERY_HARD_LIMIT;
|
||||
int STORAGE_SERVER_READ_CONCURRENCY;
|
||||
std::string STORAGESERVER_READ_RANKS;
|
||||
std::string STORAGESERVER_READ_PRIORITIES;
|
||||
int STORAGE_SERVER_READ_CONCURRENCY;
|
||||
std::string STORAGESERVER_READTYPE_PRIORITY_MAP;
|
||||
|
||||
// Wait Failure
|
||||
int MAX_OUTSTANDING_WAIT_FAILURE_REQUESTS;
|
||||
|
@ -923,7 +923,7 @@ public:
|
|||
int REDWOOD_DECODECACHE_REUSE_MIN_HEIGHT; // Minimum height for which to keep and reuse page decode caches
|
||||
bool REDWOOD_SPLIT_ENCRYPTED_PAGES_BY_TENANT; // Whether to split pages by tenant if encryption is enabled
|
||||
|
||||
std::string REDWOOD_PRIORITY_LAUNCHS;
|
||||
std::string REDWOOD_IO_PRIORITIES;
|
||||
|
||||
// Server request latency measurement
|
||||
int LATENCY_SAMPLE_SIZE;
|
||||
|
@ -998,6 +998,7 @@ public:
|
|||
double BLOB_MANIFEST_BACKUP_INTERVAL;
|
||||
bool BLOB_FULL_RESTORE_MODE;
|
||||
double BLOB_MIGRATOR_CHECK_INTERVAL;
|
||||
int BLOB_MANIFEST_RW_ROWS;
|
||||
|
||||
// Blob metadata
|
||||
int64_t BLOB_METADATA_CACHE_TTL;
|
||||
|
|
|
@ -253,6 +253,8 @@ description is not currently required but encouraged.
|
|||
description="Allows this transaction to read system keys (those that start with the byte 0xFF). Implies raw_access."/>
|
||||
<Option name="raw_access" code="303"
|
||||
description="Allows this transaction to access the raw key-space when tenant mode is on."/>
|
||||
<Option name="bypass_storage_quota" code="304"
|
||||
description="Allows this transaction to bypass storage quota enforcement. Should only be used for transactions that directly or indirectly decrease the size of the tenant group's data."/>
|
||||
<Option name="debug_dump" code="400"
|
||||
hidden="true" />
|
||||
<Option name="debug_retry_logging" code="401" paramType="String" paramDescription="Optional transaction name" />
|
||||
|
|
|
@ -155,7 +155,15 @@ Future<Void> SimpleFailureMonitor::onDisconnectOrFailure(Endpoint const& endpoin
|
|||
// If the endpoint or address is already failed, return right away
|
||||
auto i = addressStatus.find(endpoint.getPrimaryAddress());
|
||||
if (i == addressStatus.end() || i->second.isFailed() || failedEndpoints.count(endpoint)) {
|
||||
TraceEvent("AlreadyDisconnected").detail("Addr", endpoint.getPrimaryAddress()).detail("Tok", endpoint.token);
|
||||
TraceEvent event("AlreadyDisconnected");
|
||||
if (endpoint.token.first() == 0xffffffffffffffff) {
|
||||
// well known endpoint
|
||||
event.suppressFor(5.0);
|
||||
}
|
||||
event.detail("Addr", endpoint.getPrimaryAddress())
|
||||
.detail("Reason", i == addressStatus.end() || i->second.isFailed() ? "Disconnected" : "EndpointFailed")
|
||||
.detail("Tok", endpoint.token)
|
||||
.log();
|
||||
return Void();
|
||||
}
|
||||
|
||||
|
|
|
@ -69,7 +69,7 @@ TEST_CASE("/flow/buggifiedDelay") {
|
|||
});
|
||||
wait(f1 && f2);
|
||||
if (last == 1) {
|
||||
CODE_PROBE(true, "Delays can become ready out of order");
|
||||
CODE_PROBE(true, "Delays can become ready out of order", probe::decoration::rare);
|
||||
return Void();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -216,7 +216,7 @@ bool TokenCacheImpl::validateAndAdd(double currentTime, StringRef token, Network
|
|||
Arena arena;
|
||||
authz::jwt::TokenRef t;
|
||||
if (!authz::jwt::parseToken(arena, t, token)) {
|
||||
CODE_PROBE(true, "Token can't be parsed");
|
||||
CODE_PROBE(true, "Token can't be parsed", probe::decoration::rare);
|
||||
TraceEvent(SevWarn, "InvalidToken")
|
||||
.detail("From", peer)
|
||||
.detail("Reason", "ParseError")
|
||||
|
@ -225,35 +225,35 @@ bool TokenCacheImpl::validateAndAdd(double currentTime, StringRef token, Network
|
|||
}
|
||||
auto key = FlowTransport::transport().getPublicKeyByName(t.keyId);
|
||||
if (!key.present()) {
|
||||
CODE_PROBE(true, "Token referencing non-existing key");
|
||||
CODE_PROBE(true, "Token referencing non-existing key", probe::decoration::rare);
|
||||
TRACE_INVALID_PARSED_TOKEN("UnknownKey", t);
|
||||
return false;
|
||||
} else if (!t.issuedAtUnixTime.present()) {
|
||||
CODE_PROBE(true, "Token has no issued-at field");
|
||||
CODE_PROBE(true, "Token has no issued-at field", probe::decoration::rare);
|
||||
TRACE_INVALID_PARSED_TOKEN("NoIssuedAt", t);
|
||||
return false;
|
||||
} else if (!t.expiresAtUnixTime.present()) {
|
||||
CODE_PROBE(true, "Token has no expiration time");
|
||||
CODE_PROBE(true, "Token has no expiration time", probe::decoration::rare);
|
||||
TRACE_INVALID_PARSED_TOKEN("NoExpirationTime", t);
|
||||
return false;
|
||||
} else if (double(t.expiresAtUnixTime.get()) <= currentTime) {
|
||||
CODE_PROBE(true, "Expired token");
|
||||
CODE_PROBE(true, "Expired token", probe::decoration::rare);
|
||||
TRACE_INVALID_PARSED_TOKEN("Expired", t);
|
||||
return false;
|
||||
} else if (!t.notBeforeUnixTime.present()) {
|
||||
CODE_PROBE(true, "Token has no not-before field");
|
||||
CODE_PROBE(true, "Token has no not-before field", probe::decoration::rare);
|
||||
TRACE_INVALID_PARSED_TOKEN("NoNotBefore", t);
|
||||
return false;
|
||||
} else if (double(t.notBeforeUnixTime.get()) > currentTime) {
|
||||
CODE_PROBE(true, "Tokens not-before is in the future");
|
||||
CODE_PROBE(true, "Tokens not-before is in the future", probe::decoration::rare);
|
||||
TRACE_INVALID_PARSED_TOKEN("TokenNotYetValid", t);
|
||||
return false;
|
||||
} else if (!t.tenants.present()) {
|
||||
CODE_PROBE(true, "Token with no tenants");
|
||||
CODE_PROBE(true, "Token with no tenants", probe::decoration::rare);
|
||||
TRACE_INVALID_PARSED_TOKEN("NoTenants", t);
|
||||
return false;
|
||||
} else if (!authz::jwt::verifyToken(token, key.get())) {
|
||||
CODE_PROBE(true, "Token with invalid signature");
|
||||
CODE_PROBE(true, "Token with invalid signature", probe::decoration::rare);
|
||||
TRACE_INVALID_PARSED_TOKEN("InvalidSignature", t);
|
||||
return false;
|
||||
} else {
|
||||
|
@ -300,7 +300,7 @@ bool TokenCacheImpl::validate(TenantNameRef name, StringRef token) {
|
|||
}
|
||||
}
|
||||
if (!tenantFound) {
|
||||
CODE_PROBE(true, "Valid token doesn't reference tenant");
|
||||
CODE_PROBE(true, "Valid token doesn't reference tenant", probe::decoration::rare);
|
||||
TraceEvent(SevWarn, "TenantTokenMismatch").detail("From", peer).detail("Tenant", name.toString());
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -25,6 +25,7 @@
|
|||
#include "flow/IAsyncFile.h"
|
||||
#include "flow/network.h"
|
||||
#include "flow/ActorCollection.h"
|
||||
#include "fdbrpc/simulator.h"
|
||||
|
||||
// template <class AsyncFileType>
|
||||
class AsyncFileChaos final : public IAsyncFile, public ReferenceCounted<AsyncFileChaos> {
|
||||
|
@ -35,7 +36,8 @@ private:
|
|||
public:
|
||||
explicit AsyncFileChaos(Reference<IAsyncFile> file) : file(file) {
|
||||
// We only allow chaos events on storage files
|
||||
enabled = (file->getFilename().find("storage-") != std::string::npos);
|
||||
enabled = file->getFilename().find("storage-") != std::string::npos &&
|
||||
file->getFilename().find("sqlite-wal") == std::string::npos;
|
||||
}
|
||||
|
||||
void addref() override { ReferenceCounted<AsyncFileChaos>::addref(); }
|
||||
|
@ -79,6 +81,7 @@ public:
|
|||
Future<Void> write(void const* data, int length, int64_t offset) override {
|
||||
Arena arena;
|
||||
char* pdata = nullptr;
|
||||
unsigned corruptedBlock = 0;
|
||||
|
||||
// Check if a bit flip event was injected, if so, copy the buffer contents
|
||||
// with a random bit flipped in a new buffer and use that for the write
|
||||
|
@ -91,32 +94,38 @@ public:
|
|||
pdata = (char*)arena.allocate4kAlignedBuffer(length);
|
||||
memcpy(pdata, data, length);
|
||||
// flip a random bit in the copied buffer
|
||||
pdata[deterministicRandom()->randomInt(0, length)] ^= (1 << deterministicRandom()->randomInt(0, 8));
|
||||
auto corruptedPos = deterministicRandom()->randomInt(0, length);
|
||||
pdata[corruptedPos] ^= (1 << deterministicRandom()->randomInt(0, 8));
|
||||
// mark the block as corrupted
|
||||
corruptedBlock = (offset + corruptedPos) / 4096;
|
||||
TraceEvent("CorruptedBlock")
|
||||
.detail("Filename", file->getFilename())
|
||||
.detail("Block", corruptedBlock)
|
||||
.log();
|
||||
|
||||
// increment the metric for bit flips
|
||||
auto res = g_network->global(INetwork::enChaosMetrics);
|
||||
if (res) {
|
||||
ChaosMetrics* chaosMetrics = static_cast<ChaosMetrics*>(res);
|
||||
auto chaosMetricsPointer = g_network->global(INetwork::enChaosMetrics);
|
||||
if (chaosMetricsPointer) {
|
||||
ChaosMetrics* chaosMetrics = static_cast<ChaosMetrics*>(chaosMetricsPointer);
|
||||
chaosMetrics->bitFlips++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
double diskDelay = getDelay();
|
||||
if (diskDelay == 0.0) {
|
||||
if (pdata)
|
||||
return holdWhile(arena, file->write(pdata, length, offset));
|
||||
|
||||
return file->write(data, length, offset);
|
||||
}
|
||||
|
||||
// Wait for diskDelay before submitting the I/O
|
||||
// Capture file by value in case this is destroyed during the delay
|
||||
return mapAsync<Void, std::function<Future<Void>(Void)>, Void>(
|
||||
delay(diskDelay), [=, file = file](Void _) -> Future<Void> {
|
||||
if (pdata)
|
||||
return holdWhile(arena, file->write(pdata, length, offset));
|
||||
delay(getDelay()), [=, file = file](Void _) -> Future<Void> {
|
||||
if (pdata) {
|
||||
return map(holdWhile(arena, file->write(pdata, length, offset)),
|
||||
[corruptedBlock, file = file](auto res) {
|
||||
if (g_network->isSimulated()) {
|
||||
g_simulator->corruptedBlocks.emplace(file->getFilename(), corruptedBlock);
|
||||
}
|
||||
return res;
|
||||
});
|
||||
}
|
||||
|
||||
return file->write(data, length, offset);
|
||||
});
|
||||
|
@ -130,7 +139,16 @@ public:
|
|||
// Wait for diskDelay before submitting the I/O
|
||||
// Capture file by value in case this is destroyed during the delay
|
||||
return mapAsync<Void, std::function<Future<Void>(Void)>, Void>(
|
||||
delay(diskDelay), [=, file = file](Void _) -> Future<Void> { return file->truncate(size); });
|
||||
delay(diskDelay), [size, file = file](Void _) -> Future<Void> {
|
||||
constexpr auto maxBlockValue =
|
||||
std::numeric_limits<decltype(g_simulator->corruptedBlocks)::key_type::second_type>::max();
|
||||
auto firstDeletedBlock =
|
||||
g_simulator->corruptedBlocks.lower_bound(std::make_pair(file->getFilename(), size / 4096));
|
||||
auto lastFileBlock =
|
||||
g_simulator->corruptedBlocks.upper_bound(std::make_pair(file->getFilename(), maxBlockValue));
|
||||
g_simulator->corruptedBlocks.erase(firstDeletedBlock, lastFileBlock);
|
||||
return file->truncate(size);
|
||||
});
|
||||
}
|
||||
|
||||
Future<Void> sync() override {
|
||||
|
|
|
@ -140,7 +140,7 @@ public:
|
|||
sav->addFutureRef();
|
||||
return Future<T>(sav);
|
||||
}
|
||||
bool isSet() { return sav->isSet(); }
|
||||
bool isSet() const { return sav->isSet(); }
|
||||
bool isValid() const { return sav != nullptr; }
|
||||
ReplyPromise() : sav(new NetSAV<T>(0, 1)) {}
|
||||
explicit ReplyPromise(const PeerCompatibilityPolicy& policy) : ReplyPromise() {
|
||||
|
@ -515,7 +515,7 @@ public:
|
|||
|
||||
void setRequestStreamEndpoint(const Endpoint& endpoint) { queue->requestStreamEndpoint = endpoint; }
|
||||
|
||||
bool connected() { return queue->acknowledgements.getRawEndpoint().isValid() || queue->error.isValid(); }
|
||||
bool connected() const { return queue->acknowledgements.getRawEndpoint().isValid() || queue->error.isValid(); }
|
||||
|
||||
Future<Void> onConnected() {
|
||||
if (connected()) {
|
||||
|
|
|
@ -133,7 +133,7 @@ Future<REPLY_TYPE(Req)> retryGetReplyFromHostname(Req request, Hostname hostname
|
|||
// Like tryGetReplyFromHostname, except that request_maybe_delivered results in re-resolving the hostname.
|
||||
// Suitable for use with hostname, where RequestStream is NOT initialized yet.
|
||||
// Not normally useful for endpoints initialized with NetworkAddress.
|
||||
state double reconnetInterval = FLOW_KNOBS->HOSTNAME_RECONNECT_INIT_INTERVAL;
|
||||
state double reconnectInterval = FLOW_KNOBS->HOSTNAME_RECONNECT_INIT_INTERVAL;
|
||||
state std::unique_ptr<RequestStream<Req>> to;
|
||||
loop {
|
||||
NetworkAddress address = wait(hostname.resolveWithRetry());
|
||||
|
@ -145,8 +145,8 @@ Future<REPLY_TYPE(Req)> retryGetReplyFromHostname(Req request, Hostname hostname
|
|||
resetReply(request);
|
||||
if (reply.getError().code() == error_code_request_maybe_delivered) {
|
||||
// Connection failure.
|
||||
wait(delay(reconnetInterval));
|
||||
reconnetInterval = std::min(2 * reconnetInterval, FLOW_KNOBS->HOSTNAME_RECONNECT_MAX_INTERVAL);
|
||||
wait(delay(reconnectInterval));
|
||||
reconnectInterval = std::min(2 * reconnectInterval, FLOW_KNOBS->HOSTNAME_RECONNECT_MAX_INTERVAL);
|
||||
INetworkConnections::net()->removeCachedDNS(hostname.host, hostname.service);
|
||||
} else {
|
||||
throw reply.getError();
|
||||
|
@ -165,7 +165,7 @@ Future<REPLY_TYPE(Req)> retryGetReplyFromHostname(Req request,
|
|||
// Like tryGetReplyFromHostname, except that request_maybe_delivered results in re-resolving the hostname.
|
||||
// Suitable for use with hostname, where RequestStream is NOT initialized yet.
|
||||
// Not normally useful for endpoints initialized with NetworkAddress.
|
||||
state double reconnetInterval = FLOW_KNOBS->HOSTNAME_RECONNECT_INIT_INTERVAL;
|
||||
state double reconnectInitInterval = FLOW_KNOBS->HOSTNAME_RECONNECT_INIT_INTERVAL;
|
||||
state std::unique_ptr<RequestStream<Req>> to;
|
||||
loop {
|
||||
NetworkAddress address = wait(hostname.resolveWithRetry());
|
||||
|
@ -177,8 +177,9 @@ Future<REPLY_TYPE(Req)> retryGetReplyFromHostname(Req request,
|
|||
resetReply(request);
|
||||
if (reply.getError().code() == error_code_request_maybe_delivered) {
|
||||
// Connection failure.
|
||||
wait(delay(reconnetInterval));
|
||||
reconnetInterval = std::min(2 * reconnetInterval, FLOW_KNOBS->HOSTNAME_RECONNECT_MAX_INTERVAL);
|
||||
wait(delay(reconnectInitInterval));
|
||||
reconnectInitInterval =
|
||||
std::min(2 * reconnectInitInterval, FLOW_KNOBS->HOSTNAME_RECONNECT_MAX_INTERVAL);
|
||||
INetworkConnections::net()->removeCachedDNS(hostname.host, hostname.service);
|
||||
} else {
|
||||
throw reply.getError();
|
||||
|
|
|
@ -26,6 +26,8 @@
|
|||
#include <random>
|
||||
#include <limits>
|
||||
|
||||
#include <boost/unordered_set.hpp>
|
||||
|
||||
#include "flow/flow.h"
|
||||
#include "flow/Histogram.h"
|
||||
#include "flow/ProtocolVersion.h"
|
||||
|
@ -520,6 +522,8 @@ public:
|
|||
|
||||
std::unordered_map<Standalone<StringRef>, PrivateKey> authKeys;
|
||||
|
||||
std::set<std::pair<std::string, unsigned>> corruptedBlocks;
|
||||
|
||||
flowGlobalType global(int id) const final { return getCurrentProcess()->global(id); };
|
||||
void setGlobal(size_t id, flowGlobalType v) final { getCurrentProcess()->setGlobal(id, v); };
|
||||
|
||||
|
|
|
@ -784,11 +784,25 @@ private:
|
|||
std::string sourceFilename = self->filename + ".part";
|
||||
|
||||
if (machineCache.count(sourceFilename)) {
|
||||
// it seems gcc has some trouble with these types. Aliasing with typename is ugly, but seems to work.
|
||||
using block_value_type = typename decltype(g_simulator->corruptedBlocks)::key_type::second_type;
|
||||
TraceEvent("SimpleFileRename")
|
||||
.detail("From", sourceFilename)
|
||||
.detail("To", self->filename)
|
||||
.detail("SourceCount", machineCache.count(sourceFilename))
|
||||
.detail("FileCount", machineCache.count(self->filename));
|
||||
auto maxBlockValue = std::numeric_limits<block_value_type>::max();
|
||||
g_simulator->corruptedBlocks.erase(
|
||||
g_simulator->corruptedBlocks.lower_bound(std::make_pair(sourceFilename, 0u)),
|
||||
g_simulator->corruptedBlocks.upper_bound(std::make_pair(self->filename, maxBlockValue)));
|
||||
// next we need to rename all files. In practice, the number of corruptions for a given file should be
|
||||
// very small
|
||||
auto begin = g_simulator->corruptedBlocks.lower_bound(std::make_pair(sourceFilename, 0u)),
|
||||
end = g_simulator->corruptedBlocks.upper_bound(std::make_pair(sourceFilename, maxBlockValue));
|
||||
for (auto iter = begin; iter != end; ++iter) {
|
||||
g_simulator->corruptedBlocks.emplace(self->filename, iter->second);
|
||||
}
|
||||
g_simulator->corruptedBlocks.erase(begin, end);
|
||||
renameFile(sourceFilename.c_str(), self->filename.c_str());
|
||||
|
||||
machineCache[self->filename] = machineCache[sourceFilename];
|
||||
|
@ -1219,13 +1233,15 @@ public:
|
|||
|
||||
static void runLoop(Sim2* self) {
|
||||
ISimulator::ProcessInfo* callingMachine = self->currentProcess;
|
||||
int lastPrintTime = 0;
|
||||
while (!self->isStopped) {
|
||||
if (self->taskQueue.canSleep()) {
|
||||
double sleepTime = self->taskQueue.getSleepTime(self->time);
|
||||
self->time +=
|
||||
sleepTime + FLOW_KNOBS->MAX_RUNLOOP_SLEEP_DELAY * pow(deterministicRandom()->random01(), 1000.0);
|
||||
if (self->printSimTime) {
|
||||
if (self->printSimTime && (int)self->time > lastPrintTime) {
|
||||
printf("Time: %d\n", (int)self->time);
|
||||
lastPrintTime = (int)self->time;
|
||||
}
|
||||
self->timerTime = std::max(self->timerTime, self->time);
|
||||
}
|
||||
|
@ -2361,7 +2377,7 @@ class UDPSimSocket : public IUDPSocket, ReferenceCounted<UDPSimSocket> {
|
|||
NetworkAddress _localAddress;
|
||||
bool randomDropPacket() {
|
||||
auto res = deterministicRandom()->random01() < .000001;
|
||||
CODE_PROBE(res, "UDP packet drop", probe::context::sim2, probe::assert::simOnly);
|
||||
CODE_PROBE(res, "UDP packet drop", probe::context::sim2, probe::assert::simOnly, probe::decoration::rare);
|
||||
return res;
|
||||
}
|
||||
|
||||
|
@ -2744,6 +2760,22 @@ Future<Void> Sim2FileSystem::deleteFile(const std::string& filename, bool mustBe
|
|||
|
||||
ACTOR Future<Void> renameFileImpl(std::string from, std::string to) {
|
||||
wait(delay(0.5 * deterministicRandom()->random01()));
|
||||
// rename all keys in the corrupted list
|
||||
// first we have to delete all corruption of the destination, since this file will be unlinked if it exists
|
||||
TraceEvent("RenamingFile").detail("From", from).detail("To", to).log();
|
||||
// it seems gcc has some trouble with these types. Aliasing with typename is ugly, but seems to work.
|
||||
using block_value_type = typename decltype(g_simulator->corruptedBlocks)::key_type::second_type;
|
||||
auto maxBlockValue = std::numeric_limits<block_value_type>::max();
|
||||
g_simulator->corruptedBlocks.erase(g_simulator->corruptedBlocks.lower_bound(std::make_pair(to, 0u)),
|
||||
g_simulator->corruptedBlocks.upper_bound(std::make_pair(to, maxBlockValue)));
|
||||
// next we need to rename all files. In practice, the number of corruptions for a given file should be very small
|
||||
auto begin = g_simulator->corruptedBlocks.lower_bound(std::make_pair(from, 0u)),
|
||||
end = g_simulator->corruptedBlocks.upper_bound(std::make_pair(from, maxBlockValue));
|
||||
for (auto iter = begin; iter != end; ++iter) {
|
||||
g_simulator->corruptedBlocks.emplace(to, iter->second);
|
||||
}
|
||||
g_simulator->corruptedBlocks.erase(begin, end);
|
||||
// do the rename
|
||||
::renameFile(from, to);
|
||||
wait(delay(0.5 * deterministicRandom()->random01()));
|
||||
return Void();
|
||||
|
|
|
@ -654,7 +654,7 @@ private:
|
|||
TraceEvent("WriteRecoveryKeySet", dbgid).log();
|
||||
if (!initialCommit)
|
||||
txnStateStore->set(KeyValueRef(m.param1, m.param2));
|
||||
CODE_PROBE(true, "Snapshot created, setting writeRecoveryKey in txnStateStore");
|
||||
CODE_PROBE(true, "Snapshot created, setting writeRecoveryKey in txnStateStore", probe::decoration::rare);
|
||||
}
|
||||
|
||||
void checkSetTenantMapPrefix(MutationRef m) {
|
||||
|
|
|
@ -32,6 +32,7 @@ struct ConnectionProviderTestSettings {
|
|||
uint32_t maxFileMemory;
|
||||
uint32_t maxFileSize;
|
||||
uint32_t threads;
|
||||
|
||||
bool uniformProviderChoice;
|
||||
double readWriteSplit;
|
||||
|
||||
|
@ -39,6 +40,7 @@ struct ConnectionProviderTestSettings {
|
|||
|
||||
int writeOps;
|
||||
int readOps;
|
||||
uint32_t targetBytesPerSec;
|
||||
|
||||
ConnectionProviderTestSettings() {
|
||||
numProviders = deterministicRandom()->randomSkewedUInt32(1, 1000);
|
||||
|
@ -56,6 +58,8 @@ struct ConnectionProviderTestSettings {
|
|||
|
||||
writeOps = 0;
|
||||
readOps = 0;
|
||||
|
||||
targetBytesPerSec = 100 * 1024 * 1024;
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -68,7 +72,7 @@ struct ProviderTestData {
|
|||
explicit ProviderTestData(Reference<BlobConnectionProvider> provider) : provider(provider) {}
|
||||
};
|
||||
|
||||
ACTOR Future<Void> createObject(ConnectionProviderTestSettings* settings, ProviderTestData* provider) {
|
||||
ACTOR Future<int64_t> createObject(ConnectionProviderTestSettings* settings, ProviderTestData* provider) {
|
||||
// pick object name before wait so no collisions between concurrent writes
|
||||
std::string objName;
|
||||
loop {
|
||||
|
@ -98,10 +102,10 @@ ACTOR Future<Void> createObject(ConnectionProviderTestSettings* settings, Provid
|
|||
// after write, put in the readable list
|
||||
provider->data.push_back({ fullPath, data });
|
||||
|
||||
return Void();
|
||||
return data.size();
|
||||
}
|
||||
|
||||
ACTOR Future<Void> readAndVerifyObject(ProviderTestData* provider, std::string objFullPath, Value expectedData) {
|
||||
ACTOR Future<int64_t> readAndVerifyObject(ProviderTestData* provider, std::string objFullPath, Value expectedData) {
|
||||
Reference<BackupContainerFileSystem> bstore = provider->provider->getForRead(objFullPath);
|
||||
state Reference<IAsyncFile> reader = wait(bstore->readFile(objFullPath));
|
||||
|
||||
|
@ -110,7 +114,7 @@ ACTOR Future<Void> readAndVerifyObject(ProviderTestData* provider, std::string o
|
|||
ASSERT_EQ(expectedData.size(), readSize);
|
||||
ASSERT(expectedData == actualData);
|
||||
|
||||
return Void();
|
||||
return expectedData.size();
|
||||
}
|
||||
|
||||
Future<Void> deleteObject(ProviderTestData* provider, std::string objFullPath) {
|
||||
|
@ -119,6 +123,10 @@ Future<Void> deleteObject(ProviderTestData* provider, std::string objFullPath) {
|
|||
}
|
||||
|
||||
ACTOR Future<Void> workerThread(ConnectionProviderTestSettings* settings, std::vector<ProviderTestData>* providers) {
|
||||
// This worker should average settings->targetBytesPerSec / settings->threads.
|
||||
// Then because we randomly 50% of the time don't consult the rateLimiter, bring the rate limiter's rate down by 2
|
||||
state int targetBytesPerSec = std::max((uint32_t)1, settings->targetBytesPerSec / settings->threads / 2);
|
||||
state Reference<IRateControl> rateLimiter = Reference<IRateControl>(new SpeedLimit(targetBytesPerSec, 1));
|
||||
state double endTime = now() + settings->runtime;
|
||||
try {
|
||||
while (now() < endTime) {
|
||||
|
@ -133,19 +141,24 @@ ACTOR Future<Void> workerThread(ConnectionProviderTestSettings* settings, std::v
|
|||
|
||||
// randomly pick create or read
|
||||
bool doWrite = deterministicRandom()->random01() < settings->readWriteSplit;
|
||||
state int64_t opSize = 0;
|
||||
if (provider->usedNames.size() < settings->filesPerProvider && (provider->data.empty() || doWrite)) {
|
||||
// create an object
|
||||
wait(createObject(settings, provider));
|
||||
wait(store(opSize, createObject(settings, provider)));
|
||||
settings->writeOps++;
|
||||
} else if (!provider->data.empty()) {
|
||||
// read a random object
|
||||
auto& readInfo = provider->data[deterministicRandom()->randomInt(0, provider->data.size())];
|
||||
wait(readAndVerifyObject(provider, readInfo.first, readInfo.second));
|
||||
wait(store(opSize, readAndVerifyObject(provider, readInfo.first, readInfo.second)));
|
||||
settings->readOps++;
|
||||
} else {
|
||||
// other threads are creating files up to filesPerProvider limit, but none finished yet. Just wait
|
||||
wait(delay(0.1));
|
||||
}
|
||||
|
||||
if (opSize > 0 && deterministicRandom()->coinflip()) {
|
||||
wait(rateLimiter->getAllowance(opSize) && delayJittered(0.01));
|
||||
}
|
||||
}
|
||||
|
||||
return Void();
|
||||
|
@ -161,7 +174,7 @@ ACTOR Future<Void> checkAndCleanUp(ProviderTestData* provider) {
|
|||
|
||||
for (i = 0; i < provider->data.size(); i++) {
|
||||
auto& readInfo = provider->data[i];
|
||||
wait(readAndVerifyObject(provider, readInfo.first, readInfo.second));
|
||||
wait(success(readAndVerifyObject(provider, readInfo.first, readInfo.second)));
|
||||
wait(deleteObject(provider, provider->data[i].first));
|
||||
}
|
||||
|
||||
|
|
|
@ -441,7 +441,7 @@ struct BlobManagerData : NonCopyable, ReferenceCounted<BlobManagerData> {
|
|||
// if this granule is not an active granule, it can't be merged
|
||||
auto gIt = workerAssignments.rangeContaining(range.begin);
|
||||
if (gIt->begin() != range.begin || gIt->end() != range.end) {
|
||||
CODE_PROBE(true, "non-active granule reported merge eligible, ignoring");
|
||||
CODE_PROBE(true, "non-active granule reported merge eligible, ignoring", probe::decoration::rare);
|
||||
if (BM_DEBUG) {
|
||||
fmt::print(
|
||||
"BM {0} Ignoring Merge Candidate [{1} - {2}): range mismatch with active granule [{3} - {4})\n",
|
||||
|
@ -1035,7 +1035,7 @@ static bool handleRangeIsAssign(Reference<BlobManagerData> bmData, RangeAssignme
|
|||
if (assignment.assign.get().type == AssignRequestType::Continue) {
|
||||
ASSERT(assignment.worker.present());
|
||||
if (i.range() != assignment.keyRange || i.cvalue() != assignment.worker.get()) {
|
||||
CODE_PROBE(true, "BM assignment out of date");
|
||||
CODE_PROBE(true, "BM assignment out of date", probe::decoration::rare);
|
||||
if (BM_DEBUG) {
|
||||
fmt::print("Out of date re-assign for ({0}, {1}). Assignment must have changed while "
|
||||
"checking split.\n Reassign: [{2} - {3}): {4}\n Existing: [{5} - {6}): {7}\n",
|
||||
|
@ -1602,10 +1602,10 @@ ACTOR Future<Void> reevaluateInitialSplit(Reference<BlobManagerData> bmData,
|
|||
if (retried && prevOwnerEpoch == bmData->epoch && prevGranuleID == granuleID &&
|
||||
prevOwnerSeqno == std::numeric_limits<int64_t>::max()) {
|
||||
// owner didn't change, last iteration of this transaction just succeeded but threw an error.
|
||||
CODE_PROBE(true, "split too big adjustment succeeded after retry");
|
||||
CODE_PROBE(true, "split too big adjustment succeeded after retry", probe::decoration::rare);
|
||||
break;
|
||||
}
|
||||
CODE_PROBE(true, "split too big was since moved to another worker");
|
||||
CODE_PROBE(true, "split too big was since moved to another worker", probe::decoration::rare);
|
||||
if (BM_DEBUG) {
|
||||
fmt::print("BM {0} re-evaluating initial split [{1} - {2}) too big: moved to another worker\n",
|
||||
bmData->epoch,
|
||||
|
@ -1839,7 +1839,7 @@ ACTOR Future<Void> maybeSplitRange(Reference<BlobManagerData> bmData,
|
|||
wait(checkManagerLock(tr, bmData));
|
||||
ForcedPurgeState purgeState = wait(getForcePurgedState(&tr->getTransaction(), granuleRange));
|
||||
if (purgeState != ForcedPurgeState::NonePurged) {
|
||||
CODE_PROBE(true, "Split stopped because of force purge");
|
||||
CODE_PROBE(true, "Split stopped because of force purge", probe::decoration::rare);
|
||||
TraceEvent("GranuleSplitCancelledForcePurge", bmData->id)
|
||||
.detail("Epoch", bmData->epoch)
|
||||
.detail("GranuleRange", granuleRange);
|
||||
|
@ -2635,7 +2635,9 @@ ACTOR Future<Void> attemptMerges(Reference<BlobManagerData> bmData,
|
|||
currentBytes + metrics.bytes > SERVER_KNOBS->BG_SNAPSHOT_FILE_TARGET_BYTES ||
|
||||
currentKeySumBytes >= CLIENT_KNOBS->VALUE_SIZE_LIMIT / 2) {
|
||||
ASSERT(currentBytes <= SERVER_KNOBS->BG_SNAPSHOT_FILE_TARGET_BYTES);
|
||||
CODE_PROBE(currentKeySumBytes >= CLIENT_KNOBS->VALUE_SIZE_LIMIT / 2, "merge early because of key size");
|
||||
CODE_PROBE(currentKeySumBytes >= CLIENT_KNOBS->VALUE_SIZE_LIMIT / 2,
|
||||
"merge early because of key size",
|
||||
probe::decoration::rare);
|
||||
attemptStartMerge(bmData, currentCandidates);
|
||||
currentCandidates.clear();
|
||||
currentBytes = 0;
|
||||
|
@ -3254,7 +3256,7 @@ static void addAssignment(KeyRangeMap<std::tuple<UID, int64_t, int64_t>>& map,
|
|||
if (oldEpoch > newEpoch || (oldEpoch == newEpoch && oldSeqno > newSeqno)) {
|
||||
newer.push_back(std::pair(old.range(), std::tuple(oldWorker, oldEpoch, oldSeqno)));
|
||||
if (old.range() != newRange) {
|
||||
CODE_PROBE(true, "BM Recovery: BWs disagree on range boundaries");
|
||||
CODE_PROBE(true, "BM Recovery: BWs disagree on range boundaries", probe::decoration::rare);
|
||||
anyConflicts = true;
|
||||
}
|
||||
} else {
|
||||
|
@ -3288,7 +3290,8 @@ static void addAssignment(KeyRangeMap<std::tuple<UID, int64_t, int64_t>>& map,
|
|||
std::get<0>(old.value()) = UID();
|
||||
}
|
||||
if (outOfDate.empty() || outOfDate.back() != std::pair(oldWorker, KeyRange(old.range()))) {
|
||||
CODE_PROBE(true, "BM Recovery: Two workers claim ownership of same granule");
|
||||
CODE_PROBE(
|
||||
true, "BM Recovery: Two workers claim ownership of same granule", probe::decoration::rare);
|
||||
outOfDate.push_back(std::pair(oldWorker, old.range()));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,6 +24,7 @@
|
|||
|
||||
#include "fdbclient/BackupContainer.h"
|
||||
#include "fdbclient/BlobGranuleCommon.h"
|
||||
#include "fdbclient/ClientBooleanParams.h"
|
||||
#include "fdbserver/Knobs.h"
|
||||
#include "flow/FastRef.h"
|
||||
#include "flow/Trace.h"
|
||||
|
@ -137,10 +138,23 @@ private:
|
|||
blobRangeKeys // Key ranges managed by blob
|
||||
};
|
||||
for (auto range : ranges) {
|
||||
// todo use getRangeStream for better performance
|
||||
RangeResult result = wait(tr.getRange(range, GetRangeLimits::BYTE_LIMIT_UNLIMITED));
|
||||
for (auto& row : result) {
|
||||
rows.push_back_deep(rows.arena(), KeyValueRef(row.key, row.value));
|
||||
state GetRangeLimits limits(SERVER_KNOBS->BLOB_MANIFEST_RW_ROWS);
|
||||
limits.minRows = 0;
|
||||
state KeySelectorRef begin = firstGreaterOrEqual(range.begin);
|
||||
state KeySelectorRef end = firstGreaterOrEqual(range.end);
|
||||
loop {
|
||||
RangeResult result = wait(tr.getRange(begin, end, limits, Snapshot::True));
|
||||
for (auto& row : result) {
|
||||
rows.push_back_deep(rows.arena(), KeyValueRef(row.key, row.value));
|
||||
}
|
||||
if (!result.more) {
|
||||
break;
|
||||
}
|
||||
if (result.readThrough.present()) {
|
||||
begin = firstGreaterOrEqual(result.readThrough.get());
|
||||
} else {
|
||||
begin = firstGreaterThan(result.end()[-1].key);
|
||||
}
|
||||
}
|
||||
}
|
||||
return rows;
|
||||
|
@ -152,6 +166,13 @@ private:
|
|||
|
||||
// Write data to blob manifest file
|
||||
ACTOR static Future<Void> writeToFile(Reference<BlobManifestDumper> self, Value data) {
|
||||
static int32_t lastWrittenBytes = 0;
|
||||
if (data.size() == lastWrittenBytes) {
|
||||
dprint("Skip writting blob manifest with same size {}\n", lastWrittenBytes);
|
||||
return Void();
|
||||
}
|
||||
lastWrittenBytes = data.size();
|
||||
|
||||
state Reference<BackupContainerFileSystem> writer;
|
||||
state std::string fullPath;
|
||||
|
||||
|
@ -212,7 +233,7 @@ public:
|
|||
ACTOR static Future<Void> execute(Reference<BlobManifestLoader> self) {
|
||||
try {
|
||||
Value data = wait(readFromFile(self));
|
||||
Standalone<BlobManifest> manifest = decode(data);
|
||||
state Standalone<BlobManifest> manifest = decode(data);
|
||||
wait(writeSystemKeys(self, manifest.rows));
|
||||
BlobGranuleRestoreVersionVector _ = wait(listGranules(self));
|
||||
} catch (Error& e) {
|
||||
|
@ -231,13 +252,32 @@ public:
|
|||
tr.setOption(FDBTransactionOptions::LOCK_AWARE);
|
||||
|
||||
try {
|
||||
std::vector<KeyRangeRef> granules;
|
||||
state Standalone<VectorRef<KeyRef>> blobRanges;
|
||||
// Read all granules
|
||||
state GetRangeLimits limits(SERVER_KNOBS->BLOB_MANIFEST_RW_ROWS);
|
||||
limits.minRows = 0;
|
||||
state KeySelectorRef begin = firstGreaterOrEqual(blobGranuleMappingKeys.begin);
|
||||
state KeySelectorRef end = firstGreaterOrEqual(blobGranuleMappingKeys.end);
|
||||
loop {
|
||||
RangeResult rows = wait(tr.getRange(begin, end, limits, Snapshot::True));
|
||||
for (auto& row : rows) {
|
||||
blobRanges.push_back_deep(blobRanges.arena(), row.key);
|
||||
}
|
||||
if (!rows.more) {
|
||||
break;
|
||||
}
|
||||
if (rows.readThrough.present()) {
|
||||
begin = firstGreaterOrEqual(rows.readThrough.get());
|
||||
} else {
|
||||
begin = firstGreaterThan(rows.end()[-1].key);
|
||||
}
|
||||
}
|
||||
|
||||
// check each granule range
|
||||
state int i = 0;
|
||||
auto limit = GetRangeLimits::BYTE_LIMIT_UNLIMITED;
|
||||
state RangeResult blobRanges = wait(tr.getRange(blobGranuleMappingKeys, limit));
|
||||
for (i = 0; i < blobRanges.size() - 1; i++) {
|
||||
Key startKey = blobRanges[i].key.removePrefix(blobGranuleMappingKeys.begin);
|
||||
Key endKey = blobRanges[i + 1].key.removePrefix(blobGranuleMappingKeys.begin);
|
||||
Key startKey = blobRanges[i].removePrefix(blobGranuleMappingKeys.begin);
|
||||
Key endKey = blobRanges[i + 1].removePrefix(blobGranuleMappingKeys.begin);
|
||||
state KeyRange granuleRange = KeyRangeRef(startKey, endKey);
|
||||
try {
|
||||
Standalone<BlobGranuleRestoreVersion> granule = wait(getGranule(&tr, granuleRange));
|
||||
|
@ -300,17 +340,32 @@ private:
|
|||
|
||||
// Write system keys to database
|
||||
ACTOR static Future<Void> writeSystemKeys(Reference<BlobManifestLoader> self, VectorRef<KeyValueRef> rows) {
|
||||
state int start = 0;
|
||||
state int end = 0;
|
||||
for (start = 0; start < rows.size(); start = end) {
|
||||
end = std::min(start + SERVER_KNOBS->BLOB_MANIFEST_RW_ROWS, rows.size());
|
||||
wait(writeSystemKeys(self, rows, start, end));
|
||||
}
|
||||
return Void();
|
||||
}
|
||||
|
||||
// Write system keys from start index to end(exclusive), so that we don't exceed the limit of transaction limit
|
||||
ACTOR static Future<Void> writeSystemKeys(Reference<BlobManifestLoader> self,
|
||||
VectorRef<KeyValueRef> rows,
|
||||
int start,
|
||||
int end) {
|
||||
state Transaction tr(self->db_);
|
||||
loop {
|
||||
tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||
tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
|
||||
tr.setOption(FDBTransactionOptions::LOCK_AWARE);
|
||||
try {
|
||||
for (auto& row : rows) {
|
||||
tr.set(row.key, row.value);
|
||||
for (int i = start; i < end; ++i) {
|
||||
tr.set(rows[i].key, rows[i].value);
|
||||
}
|
||||
wait(tr.commit());
|
||||
dprint("Blob manifest loaded {} rows\n", rows.size());
|
||||
dprint("Blob manifest loaded rows from {} to {}\n", start, end);
|
||||
TraceEvent("BlobManifestLoader").detail("RowStart", start).detail("RowEnd", end);
|
||||
return Void();
|
||||
} catch (Error& e) {
|
||||
wait(tr.onError(e));
|
||||
|
@ -324,8 +379,7 @@ private:
|
|||
KeyRange historyKeyRange = blobGranuleHistoryKeyRangeFor(range);
|
||||
// reverse lookup so that the first row is the newest version
|
||||
state RangeResult results =
|
||||
wait(tr->getRange(historyKeyRange, GetRangeLimits::BYTE_LIMIT_UNLIMITED, Snapshot::False, Reverse::True));
|
||||
|
||||
wait(tr->getRange(historyKeyRange, GetRangeLimits::BYTE_LIMIT_UNLIMITED, Snapshot::True, Reverse::True));
|
||||
for (KeyValueRef row : results) {
|
||||
state KeyRange keyRange;
|
||||
state Version version;
|
||||
|
@ -367,24 +421,39 @@ private:
|
|||
|
||||
// List all files for given granule
|
||||
ACTOR static Future<std::vector<GranuleFileVersion>> listGranuleFiles(Transaction* tr, UID granuleID) {
|
||||
state std::vector<GranuleFileVersion> files;
|
||||
|
||||
state KeyRange fileKeyRange = blobGranuleFileKeyRangeFor(granuleID);
|
||||
RangeResult results = wait(tr->getRange(fileKeyRange, GetRangeLimits::BYTE_LIMIT_UNLIMITED));
|
||||
state GetRangeLimits limits(SERVER_KNOBS->BLOB_MANIFEST_RW_ROWS);
|
||||
limits.minRows = 0;
|
||||
state KeySelectorRef begin = firstGreaterOrEqual(fileKeyRange.begin);
|
||||
state KeySelectorRef end = firstGreaterOrEqual(fileKeyRange.end);
|
||||
loop {
|
||||
RangeResult results = wait(tr->getRange(begin, end, limits, Snapshot::True));
|
||||
for (auto& row : results) {
|
||||
UID gid;
|
||||
Version version;
|
||||
uint8_t fileType;
|
||||
Standalone<StringRef> filename;
|
||||
int64_t offset;
|
||||
int64_t length;
|
||||
int64_t fullFileLength;
|
||||
Optional<BlobGranuleCipherKeysMeta> cipherKeysMeta;
|
||||
|
||||
std::vector<GranuleFileVersion> files;
|
||||
for (auto& row : results) {
|
||||
UID gid;
|
||||
Version version;
|
||||
uint8_t fileType;
|
||||
Standalone<StringRef> filename;
|
||||
int64_t offset;
|
||||
int64_t length;
|
||||
int64_t fullFileLength;
|
||||
Optional<BlobGranuleCipherKeysMeta> cipherKeysMeta;
|
||||
|
||||
std::tie(gid, version, fileType) = decodeBlobGranuleFileKey(row.key);
|
||||
std::tie(filename, offset, length, fullFileLength, cipherKeysMeta) = decodeBlobGranuleFileValue(row.value);
|
||||
GranuleFileVersion vs = { version, fileType, filename.toString(), length };
|
||||
files.push_back(vs);
|
||||
std::tie(gid, version, fileType) = decodeBlobGranuleFileKey(row.key);
|
||||
std::tie(filename, offset, length, fullFileLength, cipherKeysMeta) =
|
||||
decodeBlobGranuleFileValue(row.value);
|
||||
GranuleFileVersion vs = { version, fileType, filename.toString(), length };
|
||||
files.push_back(vs);
|
||||
}
|
||||
if (!results.more) {
|
||||
break;
|
||||
}
|
||||
if (results.readThrough.present()) {
|
||||
begin = firstGreaterOrEqual(results.readThrough.get());
|
||||
} else {
|
||||
begin = firstGreaterThan(results.end()[-1].key);
|
||||
}
|
||||
}
|
||||
return files;
|
||||
}
|
||||
|
@ -466,12 +535,26 @@ ACTOR Future<bool> isFullRestoreMode(Database db, KeyRangeRef keys) {
|
|||
tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||
tr.setOption(FDBTransactionOptions::LOCK_AWARE);
|
||||
try {
|
||||
RangeResult ranges = wait(tr.getRange(blobRestoreCommandKeys, CLIENT_KNOBS->TOO_MANY));
|
||||
for (auto& r : ranges) {
|
||||
KeyRange keyRange = decodeBlobRestoreCommandKeyFor(r.key);
|
||||
if (keyRange.contains(keys)) {
|
||||
Standalone<BlobRestoreStatus> status = decodeBlobRestoreStatus(r.value);
|
||||
return status.progress < 100; // progress is less than 100
|
||||
state GetRangeLimits limits(SERVER_KNOBS->BLOB_MANIFEST_RW_ROWS);
|
||||
limits.minRows = 0;
|
||||
state KeySelectorRef begin = firstGreaterOrEqual(blobRestoreCommandKeys.begin);
|
||||
state KeySelectorRef end = firstGreaterOrEqual(blobRestoreCommandKeys.end);
|
||||
loop {
|
||||
RangeResult ranges = wait(tr.getRange(begin, end, limits, Snapshot::True));
|
||||
for (auto& r : ranges) {
|
||||
KeyRange keyRange = decodeBlobRestoreCommandKeyFor(r.key);
|
||||
if (keyRange.contains(keys)) {
|
||||
Standalone<BlobRestoreStatus> status = decodeBlobRestoreStatus(r.value);
|
||||
return status.progress < 100; // progress is less than 100
|
||||
}
|
||||
}
|
||||
if (!ranges.more) {
|
||||
break;
|
||||
}
|
||||
if (ranges.readThrough.present()) {
|
||||
begin = firstGreaterOrEqual(ranges.readThrough.get());
|
||||
} else {
|
||||
begin = firstGreaterThan(ranges.end()[-1].key);
|
||||
}
|
||||
}
|
||||
return false;
|
||||
|
|
|
@ -157,7 +157,7 @@ struct GranuleMetadata : NonCopyable, ReferenceCounted<GranuleMetadata> {
|
|||
return (1.0 * readStats.deltaBytesRead) / (writeAmp * SERVER_KNOBS->BG_RDC_READ_FACTOR);
|
||||
}
|
||||
|
||||
bool isEligibleRDC() {
|
||||
bool isEligibleRDC() const {
|
||||
// granule should be reasonably read-hot to be eligible
|
||||
int64_t bytesWritten = bufferedDeltaBytes + bytesInNewDeltaFiles;
|
||||
return bytesWritten * SERVER_KNOBS->BG_RDC_READ_FACTOR < readStats.deltaBytesRead;
|
||||
|
@ -2173,13 +2173,16 @@ ACTOR Future<Void> blobGranuleUpdateFiles(Reference<BlobWorkerData> bwData,
|
|||
// will get an exception if we try to read any popped data, killing this actor
|
||||
readOldChangeFeed = true;
|
||||
|
||||
// because several feeds will be reading the same version range of this change feed at the same time, set
|
||||
// cache result to true
|
||||
oldChangeFeedFuture = bwData->db->getChangeFeedStream(cfData,
|
||||
oldCFKey.get(),
|
||||
startVersion + 1,
|
||||
startState.changeFeedStartVersion,
|
||||
metadata->keyRange,
|
||||
bwData->changeFeedStreamReplyBufferSize,
|
||||
false);
|
||||
false,
|
||||
{ ReadType::NORMAL, CacheResult::True });
|
||||
|
||||
} else {
|
||||
readOldChangeFeed = false;
|
||||
|
@ -2283,7 +2286,7 @@ ACTOR Future<Void> blobGranuleUpdateFiles(Reference<BlobWorkerData> bwData,
|
|||
// popped up to V+1 is ok. Or in other words, if the last delta @ V, we only missed data
|
||||
// at V+1 onward if popVersion >= V+2
|
||||
if (metadata->bufferedDeltaVersion < metadata->activeCFData.get()->popVersion - 1) {
|
||||
CODE_PROBE(true, "Blob Worker detected popped");
|
||||
CODE_PROBE(true, "Blob Worker detected popped", probe::decoration::rare);
|
||||
TraceEvent("BlobWorkerChangeFeedPopped", bwData->id)
|
||||
.detail("Granule", metadata->keyRange)
|
||||
.detail("GranuleID", startState.granuleID)
|
||||
|
@ -2462,6 +2465,8 @@ ACTOR Future<Void> blobGranuleUpdateFiles(Reference<BlobWorkerData> bwData,
|
|||
if (readOldChangeFeed) {
|
||||
ASSERT(cfRollbackVersion + 1 < startState.changeFeedStartVersion);
|
||||
ASSERT(oldCFKey.present());
|
||||
// because several feeds will be reading the same version range of this change
|
||||
// feed at the same time, set cache result to true
|
||||
oldChangeFeedFuture =
|
||||
bwData->db->getChangeFeedStream(cfData,
|
||||
oldCFKey.get(),
|
||||
|
@ -2469,7 +2474,8 @@ ACTOR Future<Void> blobGranuleUpdateFiles(Reference<BlobWorkerData> bwData,
|
|||
startState.changeFeedStartVersion,
|
||||
metadata->keyRange,
|
||||
bwData->changeFeedStreamReplyBufferSize,
|
||||
false);
|
||||
false,
|
||||
{ ReadType::NORMAL, CacheResult::True });
|
||||
|
||||
} else {
|
||||
if (cfRollbackVersion + 1 < startState.changeFeedStartVersion) {
|
||||
|
@ -3987,7 +3993,7 @@ ACTOR Future<GranuleStartState> openGranule(Reference<BlobWorkerData> bwData, As
|
|||
|
||||
ForcedPurgeState purgeState = wait(fForcedPurgeState);
|
||||
if (purgeState != ForcedPurgeState::NonePurged) {
|
||||
CODE_PROBE(true, "Worker trying to open force purged granule");
|
||||
CODE_PROBE(true, "Worker trying to open force purged granule", probe::decoration::rare);
|
||||
if (BW_DEBUG) {
|
||||
fmt::print("Granule [{0} - {1}) is force purged on BW {2}, abandoning\n",
|
||||
req.keyRange.begin.printable(),
|
||||
|
|
|
@ -414,7 +414,7 @@ ACTOR Future<Void> commitBatcher(ProxyCommitData* commitData,
|
|||
}
|
||||
|
||||
Optional<TenantNameRef> const& tenantName = req.tenantInfo.name;
|
||||
if (SERVER_KNOBS->STORAGE_QUOTA_ENABLED && tenantName.present() &&
|
||||
if (SERVER_KNOBS->STORAGE_QUOTA_ENABLED && !req.bypassStorageQuota() && tenantName.present() &&
|
||||
commitData->tenantsOverStorageQuota.count(tenantName.get()) > 0) {
|
||||
req.reply.sendError(storage_quota_exceeded());
|
||||
continue;
|
||||
|
@ -1310,7 +1310,7 @@ ACTOR Future<WriteMutationRefVar> writeMutationFetchEncryptKey(CommitBatchContex
|
|||
wait(getLatestEncryptCipherKey(self->pProxyCommitData->db, domainId, p.first, BlobCipherMetrics::TLOG));
|
||||
self->cipherKeys[domainId] = cipherKey;
|
||||
|
||||
CODE_PROBE(true, "Raw access mutation encryption");
|
||||
CODE_PROBE(true, "Raw access mutation encryption", probe::decoration::rare);
|
||||
ASSERT_NE(domainId, INVALID_ENCRYPT_DOMAIN_ID);
|
||||
encryptedMutation = mutation->encrypt(self->cipherKeys, domainId, *arena, BlobCipherMetrics::TLOG);
|
||||
self->toCommit.writeTypedMessage(encryptedMutation);
|
||||
|
@ -1436,11 +1436,13 @@ ACTOR Future<Void> assignMutationsToStorageServers(CommitBatchContext* self) {
|
|||
double prob = mul * cost / totalCosts;
|
||||
|
||||
if (deterministicRandom()->random01() < prob) {
|
||||
for (const auto& ssInfo : pProxyCommitData->keyInfo[m.param1].src_info) {
|
||||
const auto& storageServers = pProxyCommitData->keyInfo[m.param1].src_info;
|
||||
for (const auto& ssInfo : storageServers) {
|
||||
auto id = ssInfo->interf.id();
|
||||
// scale cost
|
||||
cost = cost < CLIENT_KNOBS->COMMIT_SAMPLE_COST ? CLIENT_KNOBS->COMMIT_SAMPLE_COST : cost;
|
||||
pProxyCommitData->updateSSTagCost(id, trs[self->transactionNum].tagSet.get(), m, cost);
|
||||
pProxyCommitData->updateSSTagCost(
|
||||
id, trs[self->transactionNum].tagSet.get(), m, cost / storageServers.size());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -316,7 +316,7 @@ class ConfigNodeImpl {
|
|||
ACTOR static Future<Void> getConfigClasses(ConfigNodeImpl* self, ConfigTransactionGetConfigClassesRequest req) {
|
||||
state Optional<CoordinatorsHash> locked = wait(getLocked(self));
|
||||
if (locked.present()) {
|
||||
CODE_PROBE(true, "attempting to read config classes from locked ConfigNode");
|
||||
CODE_PROBE(true, "attempting to read config classes from locked ConfigNode", probe::decoration::rare);
|
||||
req.reply.sendError(coordinators_changed());
|
||||
return Void();
|
||||
}
|
||||
|
@ -360,7 +360,7 @@ class ConfigNodeImpl {
|
|||
ACTOR static Future<Void> getKnobs(ConfigNodeImpl* self, ConfigTransactionGetKnobsRequest req) {
|
||||
state Optional<CoordinatorsHash> locked = wait(getLocked(self));
|
||||
if (locked.present()) {
|
||||
CODE_PROBE(true, "attempting to read knobs from locked ConfigNode");
|
||||
CODE_PROBE(true, "attempting to read knobs from locked ConfigNode", probe::decoration::rare);
|
||||
req.reply.sendError(coordinators_changed());
|
||||
return Void();
|
||||
}
|
||||
|
|
|
@ -531,7 +531,7 @@ struct LeaderRegisterCollection {
|
|||
return Void();
|
||||
}
|
||||
|
||||
Future<Void> onError() { return actors.getResult(); }
|
||||
Future<Void> onError() const { return actors.getResult(); }
|
||||
|
||||
// Check if the this coordinator is no longer the leader, and the new one was stored in the "forward" keyspace.
|
||||
// If the "forward" keyspace was set some time ago (as configured by knob), log an error to indicate the client is
|
||||
|
|
|
@ -697,6 +697,9 @@ struct DDQueue : public IDDRelocationQueue {
|
|||
RemoteTeamIsFull,
|
||||
RemoteTeamIsNotHealthy,
|
||||
NoAvailablePhysicalShard,
|
||||
UnknownForceNew,
|
||||
NoAnyHealthy,
|
||||
DstOverloaded,
|
||||
NumberOfTypes,
|
||||
};
|
||||
std::vector<int> retryFindDstReasonCount;
|
||||
|
@ -1423,6 +1426,7 @@ ACTOR Future<Void> dataDistributionRelocator(DDQueue* self,
|
|||
state double startTime = now();
|
||||
state std::vector<UID> destIds;
|
||||
state uint64_t debugID = deterministicRandom()->randomUInt64();
|
||||
state bool enableShardMove = SERVER_KNOBS->SHARD_ENCODE_LOCATION_METADATA && SERVER_KNOBS->ENABLE_DD_PHYSICAL_SHARD;
|
||||
|
||||
try {
|
||||
if (now() - self->lastInterval < 1.0) {
|
||||
|
@ -1539,8 +1543,7 @@ ACTOR Future<Void> dataDistributionRelocator(DDQueue* self,
|
|||
req.src = rd.src;
|
||||
req.completeSources = rd.completeSources;
|
||||
|
||||
if (SERVER_KNOBS->SHARD_ENCODE_LOCATION_METADATA && SERVER_KNOBS->ENABLE_DD_PHYSICAL_SHARD &&
|
||||
tciIndex == 1) {
|
||||
if (enableShardMove && tciIndex == 1) {
|
||||
ASSERT(physicalShardIDCandidate != UID().first() &&
|
||||
physicalShardIDCandidate != anonymousShardId.first());
|
||||
Optional<ShardsAffectedByTeamFailure::Team> remoteTeamWithPhysicalShard =
|
||||
|
@ -1587,64 +1590,65 @@ ACTOR Future<Void> dataDistributionRelocator(DDQueue* self,
|
|||
anyWithSource = true;
|
||||
}
|
||||
|
||||
if (SERVER_KNOBS->SHARD_ENCODE_LOCATION_METADATA && SERVER_KNOBS->ENABLE_DD_PHYSICAL_SHARD) {
|
||||
// critical to the correctness of team selection by PhysicalShardCollection
|
||||
// tryGetAvailableRemoteTeamWith() enforce to select a remote team paired with a primary
|
||||
// team Thus, tryGetAvailableRemoteTeamWith() may select an almost full remote team In this
|
||||
// case, we must re-select a remote team We set foundTeams = false to avoid finishing team
|
||||
// selection Then, forceToUseNewPhysicalShard is set, which enforce to use getTeam to select
|
||||
// a remote team
|
||||
if (enableShardMove) {
|
||||
if (tciIndex == 1 && !forceToUseNewPhysicalShard) {
|
||||
// critical to the correctness of team selection by PhysicalShardCollection
|
||||
// tryGetAvailableRemoteTeamWith() enforce to select a remote team paired with a primary
|
||||
// team Thus, tryGetAvailableRemoteTeamWith() may select an almost full remote team In
|
||||
// this case, we must re-select a remote team We set foundTeams = false to avoid
|
||||
// finishing team selection Then, forceToUseNewPhysicalShard is set, which enforce to
|
||||
// use getTeam to select a remote team
|
||||
bool minAvailableSpaceRatio = bestTeam.first.get()->getMinAvailableSpaceRatio(true);
|
||||
if (minAvailableSpaceRatio < SERVER_KNOBS->TARGET_AVAILABLE_SPACE_RATIO) {
|
||||
retryFindDstReason = DDQueue::RetryFindDstReason::RemoteTeamIsFull;
|
||||
foundTeams = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (SERVER_KNOBS->SHARD_ENCODE_LOCATION_METADATA && SERVER_KNOBS->ENABLE_DD_PHYSICAL_SHARD) {
|
||||
// critical to the correctness of team selection by PhysicalShardCollection
|
||||
// tryGetAvailableRemoteTeamWith() enforce to select a remote team paired with a primary
|
||||
// team Thus, tryGetAvailableRemoteTeamWith() may select an unhealthy remote team In
|
||||
// this case, we must re-select a remote team We set foundTeams = false to avoid
|
||||
// finishing team selection Then, forceToUseNewPhysicalShard is set, which enforce to
|
||||
// use getTeam to select a remote team
|
||||
if (!bestTeam.first.get()->isHealthy()) {
|
||||
retryFindDstReason = DDQueue::RetryFindDstReason::RemoteTeamIsNotHealthy;
|
||||
foundTeams = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
bestTeams.emplace_back(bestTeam.first.get(), true);
|
||||
// Always set bestTeams[i].second = true to disable optimization in data move between DCs
|
||||
// for the correctness of PhysicalShardCollection
|
||||
// Currently, enabling the optimization will break the invariant of PhysicalShardCollection
|
||||
// Invariant: once a physical shard is created with a specific set of SSes, this SS set will
|
||||
// never get changed.
|
||||
|
||||
if (tciIndex == 0) {
|
||||
ASSERT(foundTeams);
|
||||
ShardsAffectedByTeamFailure::Team primaryTeam =
|
||||
ShardsAffectedByTeamFailure::Team(bestTeams[0].first->getServerIDs(), true);
|
||||
if (forceToUseNewPhysicalShard &&
|
||||
retryFindDstReason == DDQueue::RetryFindDstReason::None) {
|
||||
// This is an abnormally state where we try to create new physical shard, but we
|
||||
// don't know why. This state is to track unknown reason for force creating new
|
||||
// physical shard.
|
||||
retryFindDstReason = DDQueue::RetryFindDstReason::UnknownForceNew;
|
||||
}
|
||||
physicalShardIDCandidate =
|
||||
self->physicalShardCollection->determinePhysicalShardIDGivenPrimaryTeam(
|
||||
primaryTeam, metrics, forceToUseNewPhysicalShard, debugID);
|
||||
ASSERT(physicalShardIDCandidate != UID().first() &&
|
||||
physicalShardIDCandidate != anonymousShardId.first());
|
||||
}
|
||||
} else {
|
||||
bestTeams.emplace_back(bestTeam.first.get(), bestTeam.second);
|
||||
}
|
||||
|
||||
// get physicalShardIDCandidate
|
||||
if (SERVER_KNOBS->SHARD_ENCODE_LOCATION_METADATA && SERVER_KNOBS->ENABLE_DD_PHYSICAL_SHARD &&
|
||||
tciIndex == 0) {
|
||||
ASSERT(foundTeams);
|
||||
ShardsAffectedByTeamFailure::Team primaryTeam =
|
||||
ShardsAffectedByTeamFailure::Team(bestTeams[0].first->getServerIDs(), true);
|
||||
physicalShardIDCandidate =
|
||||
self->physicalShardCollection->determinePhysicalShardIDGivenPrimaryTeam(
|
||||
primaryTeam, metrics, forceToUseNewPhysicalShard, debugID);
|
||||
ASSERT(physicalShardIDCandidate != UID().first() &&
|
||||
physicalShardIDCandidate != anonymousShardId.first());
|
||||
}
|
||||
}
|
||||
tciIndex++;
|
||||
}
|
||||
|
||||
// critical to the correctness of team selection by PhysicalShardCollection
|
||||
// tryGetAvailableRemoteTeamWith() enforce to select a remote team paired with a primary team
|
||||
// Thus, tryGetAvailableRemoteTeamWith() may select an unhealthy remote team
|
||||
// In this case, we must re-select a remote team
|
||||
// We set foundTeams = false to avoid finishing team selection
|
||||
// Then, forceToUseNewPhysicalShard is set, which enforce to use getTeam to select a remote team
|
||||
if (SERVER_KNOBS->SHARD_ENCODE_LOCATION_METADATA && SERVER_KNOBS->ENABLE_DD_PHYSICAL_SHARD &&
|
||||
bestTeams.size() > 1 && !forceToUseNewPhysicalShard) {
|
||||
if (!bestTeams[1].first->isHealthy()) {
|
||||
retryFindDstReason = DDQueue::RetryFindDstReason::RemoteTeamIsNotHealthy;
|
||||
foundTeams = false;
|
||||
}
|
||||
}
|
||||
|
||||
// once we've found healthy candidate teams, make sure they're not overloaded with outstanding moves
|
||||
// already
|
||||
anyDestOverloaded = !canLaunchDest(bestTeams, rd.priority, self->destBusymap);
|
||||
|
@ -1654,6 +1658,14 @@ ACTOR Future<Void> dataDistributionRelocator(DDQueue* self,
|
|||
break;
|
||||
}
|
||||
|
||||
if (retryFindDstReason == DDQueue::RetryFindDstReason::None && foundTeams) {
|
||||
if (!anyHealthy) {
|
||||
retryFindDstReason = DDQueue::RetryFindDstReason::NoAnyHealthy;
|
||||
} else if (anyDestOverloaded) {
|
||||
retryFindDstReason = DDQueue::RetryFindDstReason::DstOverloaded;
|
||||
}
|
||||
}
|
||||
|
||||
if (anyDestOverloaded) {
|
||||
CODE_PROBE(true, "Destination overloaded throttled move");
|
||||
destOverloadedCount++;
|
||||
|
@ -1665,7 +1677,7 @@ ACTOR Future<Void> dataDistributionRelocator(DDQueue* self,
|
|||
.detail("AnyDestOverloaded", anyDestOverloaded)
|
||||
.detail("NumOfTeamCollections", self->teamCollections.size())
|
||||
.detail("Servers", destServersString(bestTeams));
|
||||
if (SERVER_KNOBS->SHARD_ENCODE_LOCATION_METADATA && SERVER_KNOBS->ENABLE_DD_PHYSICAL_SHARD) {
|
||||
if (enableShardMove) {
|
||||
if (rd.isRestore() && destOverloadedCount > 50) {
|
||||
throw data_move_dest_team_not_found();
|
||||
}
|
||||
|
@ -1689,14 +1701,14 @@ ACTOR Future<Void> dataDistributionRelocator(DDQueue* self,
|
|||
// When forceToUseNewPhysicalShard = false, we get paired primary team and remote team
|
||||
// However, this may be failed
|
||||
// Any retry triggers to use new physicalShard which enters the normal routine
|
||||
if (SERVER_KNOBS->SHARD_ENCODE_LOCATION_METADATA && SERVER_KNOBS->ENABLE_DD_PHYSICAL_SHARD) {
|
||||
if (enableShardMove) {
|
||||
forceToUseNewPhysicalShard = true;
|
||||
}
|
||||
|
||||
// TODO different trace event + knob for overloaded? Could wait on an async var for done moves
|
||||
}
|
||||
|
||||
if (SERVER_KNOBS->SHARD_ENCODE_LOCATION_METADATA && SERVER_KNOBS->ENABLE_DD_PHYSICAL_SHARD) {
|
||||
if (enableShardMove) {
|
||||
if (!rd.isRestore()) {
|
||||
// when !rd.isRestore(), dataMoveId is just decided as physicalShardIDCandidate
|
||||
// thus, update the physicalShardIDCandidate to related data structures
|
||||
|
@ -1954,7 +1966,7 @@ ACTOR Future<Void> dataDistributionRelocator(DDQueue* self,
|
|||
self->shardsAffectedByTeamFailure->finishMove(rd.keys);
|
||||
relocationComplete.send(rd);
|
||||
|
||||
if (SERVER_KNOBS->SHARD_ENCODE_LOCATION_METADATA && SERVER_KNOBS->ENABLE_DD_PHYSICAL_SHARD) {
|
||||
if (enableShardMove) {
|
||||
// update physical shard collection
|
||||
std::vector<ShardsAffectedByTeamFailure::Team> selectedTeams;
|
||||
for (int i = 0; i < bestTeams.size(); i++) {
|
||||
|
@ -2525,6 +2537,12 @@ ACTOR Future<Void> dataDistributionQueue(Reference<IDDTxnProcessor> db,
|
|||
self.retryFindDstReasonCount[DDQueue::RetryFindDstReason::RemoteTeamIsFull])
|
||||
.detail("RemoteTeamIsNotHealthy",
|
||||
self.retryFindDstReasonCount[DDQueue::RetryFindDstReason::RemoteTeamIsNotHealthy])
|
||||
.detail("UnknownForceNew",
|
||||
self.retryFindDstReasonCount[DDQueue::RetryFindDstReason::UnknownForceNew])
|
||||
.detail("NoAnyHealthy",
|
||||
self.retryFindDstReasonCount[DDQueue::RetryFindDstReason::NoAnyHealthy])
|
||||
.detail("DstOverloaded",
|
||||
self.retryFindDstReasonCount[DDQueue::RetryFindDstReason::DstOverloaded])
|
||||
.detail(
|
||||
"NoAvailablePhysicalShard",
|
||||
self.retryFindDstReasonCount[DDQueue::RetryFindDstReason::NoAvailablePhysicalShard]);
|
||||
|
|
|
@ -623,7 +623,9 @@ std::vector<RangeToSplit> findTenantShardBoundaries(KeyRangeMap<ShardTrackedData
|
|||
result.emplace_back(shardContainingTenantEnd, faultLines);
|
||||
}
|
||||
} else {
|
||||
CODE_PROBE(true, "Shards that contain tenant key range not split since shard stats are unavailable");
|
||||
CODE_PROBE(true,
|
||||
"Shards that contain tenant key range not split since shard stats are unavailable",
|
||||
probe::decoration::rare);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1358,7 +1360,7 @@ ACTOR Future<Void> fetchTopKShardMetrics(DataDistributionTracker* self, GetTopKM
|
|||
when(wait(g_network->isSimulated() && BUGGIFY_WITH_PROB(0.01) ? Never()
|
||||
: fetchTopKShardMetrics_impl(self, req))) {}
|
||||
when(wait(delay(SERVER_KNOBS->DD_SHARD_METRICS_TIMEOUT))) {
|
||||
CODE_PROBE(true, "TopK DD_SHARD_METRICS_TIMEOUT", probe::decoration::rare);
|
||||
CODE_PROBE(true, "TopK DD_SHARD_METRICS_TIMEOUT");
|
||||
req.reply.send(GetTopKMetricsReply());
|
||||
}
|
||||
}
|
||||
|
@ -2087,4 +2089,4 @@ TEST_CASE("/DataDistributor/Tracker/FetchTopK") {
|
|||
ASSERT(reply.minReadLoad == -1);
|
||||
|
||||
return Void();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1538,14 +1538,18 @@ ACTOR Future<Void> dataDistributor(DataDistributorInterface di, Reference<AsyncV
|
|||
when(DistributorSnapRequest snapReq = waitNext(di.distributorSnapReq.getFuture())) {
|
||||
auto& snapUID = snapReq.snapUID;
|
||||
if (ddSnapReqResultMap.count(snapUID)) {
|
||||
CODE_PROBE(true, "Data distributor received a duplicate finished snapshot request");
|
||||
CODE_PROBE(true,
|
||||
"Data distributor received a duplicate finished snapshot request",
|
||||
probe::decoration::rare);
|
||||
auto result = ddSnapReqResultMap[snapUID];
|
||||
result.isError() ? snapReq.reply.sendError(result.getError()) : snapReq.reply.send(result.get());
|
||||
TraceEvent("RetryFinishedDistributorSnapRequest")
|
||||
.detail("SnapUID", snapUID)
|
||||
.detail("Result", result.isError() ? result.getError().code() : 0);
|
||||
} else if (ddSnapReqMap.count(snapReq.snapUID)) {
|
||||
CODE_PROBE(true, "Data distributor received a duplicate ongoing snapshot request");
|
||||
CODE_PROBE(true,
|
||||
"Data distributor received a duplicate ongoing snapshot request",
|
||||
probe::decoration::rare);
|
||||
TraceEvent("RetryOngoingDistributorSnapRequest").detail("SnapUID", snapUID);
|
||||
ASSERT(snapReq.snapPayload == ddSnapReqMap[snapUID].snapPayload);
|
||||
ddSnapReqMap[snapUID] = snapReq;
|
||||
|
|
|
@ -184,7 +184,7 @@ struct BlobMetadataCacheEntry {
|
|||
explicit BlobMetadataCacheEntry(Standalone<BlobMetadataDetailsRef> metadataDetails)
|
||||
: metadataDetails(metadataDetails), creationTimeSec(now()) {}
|
||||
|
||||
bool isValid() { return (now() - creationTimeSec) < SERVER_KNOBS->BLOB_METADATA_CACHE_TTL; }
|
||||
bool isValid() const { return (now() - creationTimeSec) < SERVER_KNOBS->BLOB_METADATA_CACHE_TTL; }
|
||||
};
|
||||
|
||||
// TODO: Bound the size of the cache (implement LRU/LFU...)
|
||||
|
|
|
@ -107,7 +107,7 @@ class GlobalTagThrottlerImpl {
|
|||
if (opType == OpType::READ) {
|
||||
readCost.setTotal(newCost);
|
||||
} else {
|
||||
writeCost.setTotal(CLIENT_KNOBS->GLOBAL_TAG_THROTTLING_RW_FUNGIBILITY_RATIO * newCost);
|
||||
writeCost.setTotal(newCost);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -226,7 +226,9 @@ class GlobalTagThrottlerImpl {
|
|||
return {};
|
||||
}
|
||||
auto const transactionRate = stats.get().getTransactionRate();
|
||||
if (transactionRate == 0.0) {
|
||||
// If there is less than one transaction per second, we do not have enough data
|
||||
// to accurately compute an average transaction cost.
|
||||
if (transactionRate < 1.0) {
|
||||
return {};
|
||||
} else {
|
||||
return std::max(static_cast<double>(CLIENT_KNOBS->TAG_THROTTLING_PAGE_SIZE), cost.get() / transactionRate);
|
||||
|
@ -475,7 +477,7 @@ public:
|
|||
if (targetTps.present()) {
|
||||
auto const smoothedTargetTps = stats.updateAndGetTargetLimit(targetTps.get());
|
||||
te.detail("SmoothedTargetTps", smoothedTargetTps).detail("NumProxies", numProxies);
|
||||
result[tag] = smoothedTargetTps / numProxies;
|
||||
result[tag] = std::max(1.0, smoothedTargetTps / numProxies);
|
||||
} else {
|
||||
te.disable();
|
||||
}
|
||||
|
|
|
@ -49,7 +49,7 @@ bool GrvProxyTagThrottler::TagQueue::isMaxThrottled(double maxThrottleDuration)
|
|||
}
|
||||
|
||||
void GrvProxyTagThrottler::TagQueue::rejectRequests(LatencyBandsMap& latencyBandsMap) {
|
||||
CODE_PROBE(true, "GrvProxyTagThrottler rejecting requests");
|
||||
CODE_PROBE(true, "GrvProxyTagThrottler rejecting requests", probe::decoration::rare);
|
||||
while (!requests.empty()) {
|
||||
auto& delayedReq = requests.front();
|
||||
delayedReq.updateProxyTagThrottledDuration(latencyBandsMap);
|
||||
|
@ -58,6 +58,14 @@ void GrvProxyTagThrottler::TagQueue::rejectRequests(LatencyBandsMap& latencyBand
|
|||
}
|
||||
}
|
||||
|
||||
void GrvProxyTagThrottler::TagQueue::endReleaseWindow(int64_t numStarted, double elapsed) {
|
||||
if (rateInfo.present()) {
|
||||
CODE_PROBE(requests.empty(), "Tag queue ending release window with empty request queue");
|
||||
CODE_PROBE(!requests.empty(), "Tag queue ending release window with requests still queued");
|
||||
rateInfo.get().endReleaseWindow(numStarted, requests.empty(), elapsed);
|
||||
}
|
||||
}
|
||||
|
||||
GrvProxyTagThrottler::GrvProxyTagThrottler(double maxThrottleDuration)
|
||||
: maxThrottleDuration(maxThrottleDuration),
|
||||
latencyBandsMap("GrvProxyTagThrottler",
|
||||
|
@ -202,16 +210,14 @@ void GrvProxyTagThrottler::releaseTransactions(double elapsed,
|
|||
}
|
||||
}
|
||||
|
||||
// End release windows for queues with valid rateInfo
|
||||
// End release windows for all tag queues
|
||||
{
|
||||
TransactionTagMap<uint32_t> transactionsReleasedMap;
|
||||
for (const auto& [tag, count] : transactionsReleased) {
|
||||
transactionsReleasedMap[tag] = count;
|
||||
}
|
||||
for (auto& [tag, queue] : queues) {
|
||||
if (queue.rateInfo.present()) {
|
||||
queue.rateInfo.get().endReleaseWindow(transactionsReleasedMap[tag], false, elapsed);
|
||||
}
|
||||
queue.endReleaseWindow(transactionsReleasedMap[tag], elapsed);
|
||||
}
|
||||
}
|
||||
// If the capacity is increased, that means the vector has been illegally resized, potentially
|
||||
|
@ -438,3 +444,33 @@ TEST_CASE("/GrvProxyTagThrottler/Fifo") {
|
|||
wait(mockFifoClient(&throttler));
|
||||
return Void();
|
||||
}
|
||||
|
||||
// Tests that while throughput is low, the tag throttler
|
||||
// does not accumulate too much budget.
|
||||
//
|
||||
// A server is setup to server 10 transactions per second,
|
||||
// then runs idly for 60 seconds. Then a client starts
|
||||
// and attempts 20 transactions per second for 60 seconds.
|
||||
// The server throttles the client to only achieve
|
||||
// 10 transactions per second during this 60 second window.
|
||||
// If the throttler is allowed to accumulate budget indefinitely
|
||||
// during the idle 60 seconds, this test will fail.
|
||||
TEST_CASE("/GrvProxyTagThrottler/LimitedIdleBudget") {
|
||||
state GrvProxyTagThrottler throttler(5.0);
|
||||
state TagSet tagSet;
|
||||
state TransactionTagMap<uint32_t> counters;
|
||||
{
|
||||
TransactionTagMap<double> rates;
|
||||
rates["sampleTag"_sr] = 10.0;
|
||||
throttler.updateRates(rates);
|
||||
}
|
||||
tagSet.addTag("sampleTag"_sr);
|
||||
|
||||
state Future<Void> server = mockServer(&throttler);
|
||||
wait(delay(60.0));
|
||||
state Future<Void> client = mockClient(&throttler, TransactionPriority::DEFAULT, tagSet, 1, 20.0, &counters);
|
||||
wait(timeout(client && server, 60.0, Void()));
|
||||
TraceEvent("TagQuotaTest_LimitedIdleBudget").detail("Counter", counters["sampleTag"_sr]);
|
||||
ASSERT(isNear(counters["sampleTag"_sr], 60.0 * 10.0));
|
||||
return Void();
|
||||
}
|
||||
|
|
|
@ -35,7 +35,7 @@ bool GrvTransactionRateInfo::canStart(int64_t numAlreadyStarted, int64_t count)
|
|||
std::min(limit + budget, SERVER_KNOBS->START_TRANSACTION_MAX_TRANSACTIONS_TO_START);
|
||||
}
|
||||
|
||||
void GrvTransactionRateInfo::endReleaseWindow(int64_t numStartedAtPriority, bool queueEmptyAtPriority, double elapsed) {
|
||||
void GrvTransactionRateInfo::endReleaseWindow(int64_t numStarted, bool queueEmpty, double elapsed) {
|
||||
// Update the budget to accumulate any extra capacity available or remove any excess that was used.
|
||||
// The actual delta is the portion of the limit we didn't use multiplied by the fraction of the rate window that
|
||||
// elapsed.
|
||||
|
@ -52,16 +52,15 @@ void GrvTransactionRateInfo::endReleaseWindow(int64_t numStartedAtPriority, bool
|
|||
//
|
||||
// Note that "rate window" here indicates a period of SERVER_KNOBS->START_TRANSACTION_RATE_WINDOW seconds,
|
||||
// whereas "release window" is the period between wait statements, with duration indicated by "elapsed."
|
||||
budget =
|
||||
std::max(0.0, budget + elapsed * (limit - numStartedAtPriority) / SERVER_KNOBS->START_TRANSACTION_RATE_WINDOW);
|
||||
budget = std::max(0.0, budget + elapsed * (limit - numStarted) / SERVER_KNOBS->START_TRANSACTION_RATE_WINDOW);
|
||||
|
||||
// If we are emptying out the queue of requests, then we don't need to carry much budget forward
|
||||
// If we did keep accumulating budget, then our responsiveness to changes in workflow could be compromised
|
||||
if (queueEmptyAtPriority) {
|
||||
if (queueEmpty) {
|
||||
budget = std::min(budget, SERVER_KNOBS->START_TRANSACTION_MAX_EMPTY_QUEUE_BUDGET);
|
||||
}
|
||||
|
||||
smoothReleased.addDelta(numStartedAtPriority);
|
||||
smoothReleased.addDelta(numStarted);
|
||||
}
|
||||
|
||||
void GrvTransactionRateInfo::disable() {
|
||||
|
|
|
@ -740,7 +740,8 @@ private:
|
|||
}
|
||||
|
||||
CODE_PROBE(self->enableEncryption && self->uncommittedBytes() > 0,
|
||||
"KeyValueStoreMemory recovered partial transaction while encryption-at-rest is enabled");
|
||||
"KeyValueStoreMemory recovered partial transaction while encryption-at-rest is enabled",
|
||||
probe::decoration::rare);
|
||||
self->semiCommit();
|
||||
|
||||
return Void();
|
||||
|
|
|
@ -149,7 +149,22 @@ struct PageChecksumCodec {
|
|||
}
|
||||
|
||||
if (!silent) {
|
||||
TraceEvent trEvent(SevError, "SQLitePageChecksumFailure");
|
||||
auto severity = SevError;
|
||||
if (g_network->isSimulated()) {
|
||||
auto firstBlock = pageNumber == 1 ? 0 : ((pageNumber - 1) * pageLen) / 4096,
|
||||
lastBlock = (pageNumber * pageLen) / 4096;
|
||||
auto iter = g_simulator->corruptedBlocks.lower_bound(std::make_pair(filename, firstBlock));
|
||||
if (iter != g_simulator->corruptedBlocks.end() && iter->first == filename && iter->second < lastBlock) {
|
||||
severity = SevWarnAlways;
|
||||
}
|
||||
TraceEvent("CheckCorruption")
|
||||
.detail("Filename", filename)
|
||||
.detail("NextFile", iter->first)
|
||||
.detail("FirstBlock", firstBlock)
|
||||
.detail("LastBlock", lastBlock)
|
||||
.detail("NextBlock", iter->second);
|
||||
}
|
||||
TraceEvent trEvent(severity, "SQLitePageChecksumFailure");
|
||||
trEvent.error(checksum_failed())
|
||||
.detail("CodecPageSize", pageSize)
|
||||
.detail("CodecReserveSize", reserveSize)
|
||||
|
|
|
@ -321,7 +321,7 @@ void LogPushData::writeMessage(StringRef rawMessageWithoutLength, bool usePrevio
|
|||
}
|
||||
}
|
||||
|
||||
std::vector<Standalone<StringRef>> LogPushData::getAllMessages() {
|
||||
std::vector<Standalone<StringRef>> LogPushData::getAllMessages() const {
|
||||
std::vector<Standalone<StringRef>> results;
|
||||
results.reserve(messagesWriter.size());
|
||||
for (int loc = 0; loc < messagesWriter.size(); loc++) {
|
||||
|
|
|
@ -451,7 +451,7 @@ Future<Void> ILogSystem::ServerPeekCursor::getMore(TaskPriority taskID) {
|
|||
return more;
|
||||
}
|
||||
|
||||
ACTOR Future<Void> serverPeekOnFailed(ILogSystem::ServerPeekCursor* self) {
|
||||
ACTOR Future<Void> serverPeekOnFailed(ILogSystem::ServerPeekCursor const* self) {
|
||||
loop {
|
||||
choose {
|
||||
when(wait(self->interf->get().present()
|
||||
|
@ -471,7 +471,7 @@ ACTOR Future<Void> serverPeekOnFailed(ILogSystem::ServerPeekCursor* self) {
|
|||
}
|
||||
}
|
||||
|
||||
Future<Void> ILogSystem::ServerPeekCursor::onFailed() {
|
||||
Future<Void> ILogSystem::ServerPeekCursor::onFailed() const {
|
||||
return serverPeekOnFailed(this);
|
||||
}
|
||||
|
||||
|
@ -757,7 +757,7 @@ Future<Void> ILogSystem::MergedPeekCursor::getMore(TaskPriority taskID) {
|
|||
return more;
|
||||
}
|
||||
|
||||
Future<Void> ILogSystem::MergedPeekCursor::onFailed() {
|
||||
Future<Void> ILogSystem::MergedPeekCursor::onFailed() const {
|
||||
ASSERT(false);
|
||||
return Never();
|
||||
}
|
||||
|
@ -1114,7 +1114,7 @@ Future<Void> ILogSystem::SetPeekCursor::getMore(TaskPriority taskID) {
|
|||
return more;
|
||||
}
|
||||
|
||||
Future<Void> ILogSystem::SetPeekCursor::onFailed() {
|
||||
Future<Void> ILogSystem::SetPeekCursor::onFailed() const {
|
||||
ASSERT(false);
|
||||
return Never();
|
||||
}
|
||||
|
@ -1226,7 +1226,7 @@ Future<Void> ILogSystem::MultiCursor::getMore(TaskPriority taskID) {
|
|||
return cursors.back()->getMore(taskID);
|
||||
}
|
||||
|
||||
Future<Void> ILogSystem::MultiCursor::onFailed() {
|
||||
Future<Void> ILogSystem::MultiCursor::onFailed() const {
|
||||
return cursors.back()->onFailed();
|
||||
}
|
||||
|
||||
|
@ -1503,7 +1503,7 @@ Future<Void> ILogSystem::BufferedCursor::getMore(TaskPriority taskID) {
|
|||
return more;
|
||||
}
|
||||
|
||||
Future<Void> ILogSystem::BufferedCursor::onFailed() {
|
||||
Future<Void> ILogSystem::BufferedCursor::onFailed() const {
|
||||
ASSERT(false);
|
||||
return Never();
|
||||
}
|
||||
|
|
|
@ -131,16 +131,16 @@ void MockStorageServer::setShardStatus(KeyRangeRef range, MockShardStatus status
|
|||
auto ranges = serverKeys.intersectingRanges(range);
|
||||
ASSERT(!ranges.empty());
|
||||
if (ranges.begin().range().contains(range)) {
|
||||
CODE_PROBE(true, "Implicitly split single shard to 3 pieces");
|
||||
CODE_PROBE(true, "Implicitly split single shard to 3 pieces", probe::decoration::rare);
|
||||
threeWayShardSplitting(ranges.begin().range(), range, ranges.begin().cvalue().shardSize, restrictSize);
|
||||
return;
|
||||
}
|
||||
if (ranges.begin().begin() < range.begin) {
|
||||
CODE_PROBE(true, "Implicitly split begin range to 2 pieces");
|
||||
CODE_PROBE(true, "Implicitly split begin range to 2 pieces", probe::decoration::rare);
|
||||
twoWayShardSplitting(ranges.begin().range(), range.begin, ranges.begin().cvalue().shardSize, restrictSize);
|
||||
}
|
||||
if (ranges.end().end() > range.end) {
|
||||
CODE_PROBE(true, "Implicitly split end range to 2 pieces");
|
||||
CODE_PROBE(true, "Implicitly split end range to 2 pieces", probe::decoration::rare);
|
||||
twoWayShardSplitting(ranges.end().range(), range.end, ranges.end().cvalue().shardSize, restrictSize);
|
||||
}
|
||||
ranges = serverKeys.containedRanges(range);
|
||||
|
@ -156,7 +156,7 @@ void MockStorageServer::setShardStatus(KeyRangeRef range, MockShardStatus status
|
|||
if (isStatusTransitionValid(oldStatus, status)) {
|
||||
it.value() = ShardInfo{ status, newSize };
|
||||
} else if (oldStatus == MockShardStatus::COMPLETED && status == MockShardStatus::INFLIGHT) {
|
||||
CODE_PROBE(true, "Shard already on server");
|
||||
CODE_PROBE(true, "Shard already on server", probe::decoration::rare);
|
||||
} else {
|
||||
TraceEvent(SevError, "MockShardStatusTransitionError")
|
||||
.detail("From", oldStatus)
|
||||
|
@ -382,7 +382,7 @@ Future<std::vector<KeyRangeLocationInfo>> MockGlobalState::getKeyRangeLocations(
|
|||
ASSERT_EQ(srcTeam.size(), 1);
|
||||
rep.results.emplace_back(it->range(), extractStorageServerInterfaces(srcTeam.front().servers));
|
||||
}
|
||||
CODE_PROBE(it != ranges.end(), "getKeyRangeLocations is limited", probe::decoration::rare);
|
||||
CODE_PROBE(it != ranges.end(), "getKeyRangeLocations is limited");
|
||||
|
||||
std::vector<KeyRangeLocationInfo> results;
|
||||
for (int shard = 0; shard < rep.results.size(); shard++) {
|
||||
|
|
|
@ -802,11 +802,13 @@ ACTOR Future<Void> waitForShardReady(StorageServerInterface server,
|
|||
try {
|
||||
GetShardStateReply rep =
|
||||
wait(server.getShardState.getReply(GetShardStateRequest(keys, mode), TaskPriority::MoveKeys));
|
||||
TraceEvent("GetShardStateReadyDD").detail("RepVersion", rep.first).detail("MinVersion", rep.second).log();
|
||||
if (rep.first >= minVersion) {
|
||||
return Void();
|
||||
}
|
||||
wait(delayJittered(SERVER_KNOBS->SHARD_READY_DELAY, TaskPriority::MoveKeys));
|
||||
} catch (Error& e) {
|
||||
TraceEvent("GetShardStateReadyError").error(e).log();
|
||||
if (e.code() != error_code_timed_out) {
|
||||
if (e.code() != error_code_broken_promise)
|
||||
throw e;
|
||||
|
@ -1699,7 +1701,9 @@ ACTOR static Future<Void> finishMoveShards(Database occ,
|
|||
state std::vector<UID> newDestinations;
|
||||
std::set<UID> completeSrcSet(completeSrc.begin(), completeSrc.end());
|
||||
for (const UID& id : destServers) {
|
||||
newDestinations.push_back(id);
|
||||
if (!hasRemote || !completeSrcSet.count(id)) {
|
||||
newDestinations.push_back(id);
|
||||
}
|
||||
}
|
||||
|
||||
state std::vector<StorageServerInterface> storageServerInterfaces;
|
||||
|
@ -1743,7 +1747,8 @@ ACTOR static Future<Void> finishMoveShards(Database occ,
|
|||
|
||||
TraceEvent(SevVerbose, "FinishMoveShardsWaitedServers", relocationIntervalId)
|
||||
.detail("DataMoveID", dataMoveId)
|
||||
.detail("ReadyServers", describe(readyServers));
|
||||
.detail("ReadyServers", describe(readyServers))
|
||||
.detail("NewDestinations", describe(newDestinations));
|
||||
|
||||
if (readyServers.size() == newDestinations.size()) {
|
||||
|
||||
|
|
|
@ -196,7 +196,7 @@ private:
|
|||
|
||||
Standalone<StringRef> e = wait(self->queue->readNext(payloadSize + 1));
|
||||
if (e.size() != payloadSize + 1) {
|
||||
CODE_PROBE(true, "Zero fill within payload");
|
||||
CODE_PROBE(true, "Zero fill within payload", probe::decoration::rare);
|
||||
zeroFillSize = payloadSize + 1 - e.size();
|
||||
break;
|
||||
}
|
||||
|
@ -210,7 +210,7 @@ private:
|
|||
}
|
||||
}
|
||||
if (zeroFillSize) {
|
||||
CODE_PROBE(true, "Fixing a partial commit at the end of the tlog queue");
|
||||
CODE_PROBE(true, "Fixing a partial commit at the end of the tlog queue", probe::decoration::rare);
|
||||
for (int i = 0; i < zeroFillSize; i++)
|
||||
self->queue->push(StringRef((const uint8_t*)"", 1));
|
||||
}
|
||||
|
|
|
@ -170,7 +170,7 @@ private:
|
|||
|
||||
Standalone<StringRef> e = wait(self->queue->readNext(payloadSize + 1));
|
||||
if (e.size() != payloadSize + 1) {
|
||||
CODE_PROBE(true, "Zero fill within payload");
|
||||
CODE_PROBE(true, "Zero fill within payload", probe::decoration::rare);
|
||||
zeroFillSize = payloadSize + 1 - e.size();
|
||||
break;
|
||||
}
|
||||
|
@ -186,7 +186,7 @@ private:
|
|||
}
|
||||
}
|
||||
if (zeroFillSize) {
|
||||
CODE_PROBE(true, "Fixing a partial commit at the end of the tlog queue");
|
||||
CODE_PROBE(true, "Fixing a partial commit at the end of the tlog queue", probe::decoration::rare);
|
||||
for (int i = 0; i < zeroFillSize; i++)
|
||||
self->queue->push(StringRef((const uint8_t*)"", 1));
|
||||
}
|
||||
|
|
|
@ -289,11 +289,7 @@ ACTOR Future<Void> resolveBatch(Reference<Resolver> self,
|
|||
// Detect conflicts
|
||||
double expire = now() + SERVER_KNOBS->SAMPLE_EXPIRATION_TIME;
|
||||
ConflictBatch conflictBatch(self->conflictSet, &reply.conflictingKeyRangeMap, &reply.arena);
|
||||
Version newOldestVersion = req.version - SERVER_KNOBS->MAX_WRITE_TRANSACTION_LIFE_VERSIONS;
|
||||
if (g_network->isSimulated() && g_simulator->speedUpSimulation) {
|
||||
newOldestVersion = req.version - std::max(5 * SERVER_KNOBS->VERSIONS_PER_SECOND,
|
||||
SERVER_KNOBS->MAX_WRITE_TRANSACTION_LIFE_VERSIONS);
|
||||
}
|
||||
const Version newOldestVersion = req.version - SERVER_KNOBS->MAX_WRITE_TRANSACTION_LIFE_VERSIONS;
|
||||
for (int t = 0; t < req.transactions.size(); t++) {
|
||||
conflictBatch.addTransaction(req.transactions[t], newOldestVersion);
|
||||
self->resolvedReadConflictRanges += req.transactions[t].read_conflict_ranges.size();
|
||||
|
@ -372,7 +368,7 @@ ACTOR Future<Void> resolveBatch(Reference<Resolver> self,
|
|||
isEncryptionOpSupported(EncryptOperationType::TLOG_ENCRYPTION) ? &cipherKeys
|
||||
: nullptr);
|
||||
}
|
||||
CODE_PROBE(self->forceRecovery, "Resolver detects forced recovery");
|
||||
CODE_PROBE(self->forceRecovery, "Resolver detects forced recovery", probe::decoration::rare);
|
||||
}
|
||||
|
||||
self->resolvedStateTransactions += req.txnStateTransactions.size();
|
||||
|
|
|
@ -2283,6 +2283,19 @@ void setupSimulatedSystem(std::vector<Future<Void>>* systemActors,
|
|||
}
|
||||
deterministicRandom()->randomShuffle(coordinatorAddresses);
|
||||
|
||||
for (const auto& coordinators : extraCoordinatorAddresses) {
|
||||
for (int i = 0; i < (coordinators.size() / 2) + 1; i++) {
|
||||
TraceEvent("ProtectCoordinator")
|
||||
.detail("Address", coordinators[i])
|
||||
.detail("Coordinators", describe(coordinators));
|
||||
g_simulator->protectedAddresses.insert(
|
||||
NetworkAddress(coordinators[i].ip, coordinators[i].port, true, coordinators[i].isTLS()));
|
||||
if (coordinators[i].port == 2) {
|
||||
g_simulator->protectedAddresses.insert(NetworkAddress(coordinators[i].ip, 1, true, true));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ASSERT_EQ(coordinatorAddresses.size(), coordinatorCount);
|
||||
ClusterConnectionString conn(coordinatorAddresses, "TestCluster:0"_sr);
|
||||
if (useHostname) {
|
||||
|
|
|
@ -172,7 +172,7 @@ private:
|
|||
|
||||
Standalone<StringRef> e = wait(self->queue->readNext(payloadSize + 1));
|
||||
if (e.size() != payloadSize + 1) {
|
||||
CODE_PROBE(true, "Zero fill within payload");
|
||||
CODE_PROBE(true, "Zero fill within payload", probe::decoration::rare);
|
||||
zeroFillSize = payloadSize + 1 - e.size();
|
||||
break;
|
||||
}
|
||||
|
@ -188,7 +188,7 @@ private:
|
|||
}
|
||||
}
|
||||
if (zeroFillSize) {
|
||||
CODE_PROBE(true, "Fixing a partial commit at the end of the tlog queue");
|
||||
CODE_PROBE(true, "Fixing a partial commit at the end of the tlog queue", probe::decoration::rare);
|
||||
for (int i = 0; i < zeroFillSize; i++)
|
||||
self->queue->push(StringRef((const uint8_t*)"", 1));
|
||||
}
|
||||
|
@ -1262,7 +1262,7 @@ ACTOR Future<Void> processPopRequests(TLogData* self, Reference<LogData> logData
|
|||
TraceEvent("PlayIgnoredPop", logData->logId).detail("Tag", tag.toString()).detail("Version", version);
|
||||
ignoredPops.push_back(tLogPopCore(self, tag, version, logData));
|
||||
if (++ignoredPopsPlayed % SERVER_KNOBS->TLOG_POP_BATCH_SIZE == 0) {
|
||||
CODE_PROBE(true, "Yielding while processing pop requests");
|
||||
CODE_PROBE(true, "Yielding while processing pop requests", probe::decoration::rare);
|
||||
wait(yield());
|
||||
}
|
||||
}
|
||||
|
@ -1857,7 +1857,8 @@ Future<Void> tLogPeekMessages(PromiseType replyPromise,
|
|||
}
|
||||
if (sequenceData.isSet()) {
|
||||
if (sequenceData.getFuture().get().first != rep.end) {
|
||||
CODE_PROBE(true, "tlog peek second attempt ended at a different version");
|
||||
CODE_PROBE(
|
||||
true, "tlog peek second attempt ended at a different version", probe::decoration::rare);
|
||||
replyPromise.sendError(operation_obsolete());
|
||||
return Void();
|
||||
}
|
||||
|
|
|
@ -305,7 +305,7 @@ Reference<ILogSystem> TagPartitionedLogSystem::fromOldLogSystemConfig(UID const&
|
|||
return logSystem;
|
||||
}
|
||||
|
||||
void TagPartitionedLogSystem::toCoreState(DBCoreState& newState) {
|
||||
void TagPartitionedLogSystem::toCoreState(DBCoreState& newState) const {
|
||||
if (recoveryComplete.isValid() && recoveryComplete.isError())
|
||||
throw recoveryComplete.getError();
|
||||
|
||||
|
@ -343,11 +343,11 @@ void TagPartitionedLogSystem::toCoreState(DBCoreState& newState) {
|
|||
newState.logSystemType = logSystemType;
|
||||
}
|
||||
|
||||
bool TagPartitionedLogSystem::remoteStorageRecovered() {
|
||||
bool TagPartitionedLogSystem::remoteStorageRecovered() const {
|
||||
return remoteRecoveryComplete.isValid() && remoteRecoveryComplete.isReady();
|
||||
}
|
||||
|
||||
Future<Void> TagPartitionedLogSystem::onCoreStateChanged() {
|
||||
Future<Void> TagPartitionedLogSystem::onCoreStateChanged() const {
|
||||
std::vector<Future<Void>> changes;
|
||||
changes.push_back(Never());
|
||||
if (recoveryComplete.isValid() && !recoveryComplete.isReady()) {
|
||||
|
@ -376,11 +376,11 @@ void TagPartitionedLogSystem::coreStateWritten(DBCoreState const& newState) {
|
|||
}
|
||||
}
|
||||
|
||||
Future<Void> TagPartitionedLogSystem::onError() {
|
||||
Future<Void> TagPartitionedLogSystem::onError() const {
|
||||
return onError_internal(this);
|
||||
}
|
||||
|
||||
ACTOR Future<Void> TagPartitionedLogSystem::onError_internal(TagPartitionedLogSystem* self) {
|
||||
ACTOR Future<Void> TagPartitionedLogSystem::onError_internal(TagPartitionedLogSystem const* self) {
|
||||
// Never returns normally, but throws an error if the subsystem stops working
|
||||
loop {
|
||||
std::vector<Future<Void>> failed;
|
||||
|
|
|
@ -92,6 +92,8 @@ static FILE* g_debugStream = stdout;
|
|||
#define TRACE \
|
||||
debug_printf_always("%s: %s line %d %s\n", __FUNCTION__, __FILE__, __LINE__, platform::get_backtrace().c_str());
|
||||
|
||||
using namespace std::string_view_literals;
|
||||
|
||||
// Returns a string where every line in lines is prefixed with prefix
|
||||
std::string addPrefix(std::string prefix, std::string lines) {
|
||||
StringRef m = lines;
|
||||
|
@ -489,7 +491,7 @@ public:
|
|||
}
|
||||
|
||||
// Returns true if the mutex cannot be immediately taken.
|
||||
bool isBusy() { return !mutex.available(); }
|
||||
bool isBusy() const { return !mutex.available(); }
|
||||
|
||||
// Wait for all operations started before now to be ready, which is done by
|
||||
// obtaining and releasing the mutex.
|
||||
|
@ -1026,17 +1028,31 @@ public:
|
|||
// These pages are not encrypted
|
||||
page->postReadPayload(c.pageID);
|
||||
} catch (Error& e) {
|
||||
TraceEvent(SevError, "RedwoodChecksumFailed")
|
||||
bool isInjected = false;
|
||||
if (g_network->isSimulated()) {
|
||||
auto num4kBlocks = std::max(self->pager->getPhysicalPageSize() / 4096, 1);
|
||||
auto startBlock = (c.pageID * self->pager->getPhysicalPageSize()) / 4096;
|
||||
auto iter = g_simulator->corruptedBlocks.lower_bound(
|
||||
std::make_pair(self->pager->getName(), startBlock));
|
||||
if (iter->first == self->pager->getName() && iter->second < startBlock + num4kBlocks) {
|
||||
isInjected = true;
|
||||
}
|
||||
}
|
||||
TraceEvent(isInjected ? SevWarnAlways : SevError, "RedwoodChecksumFailed")
|
||||
.error(e)
|
||||
.detail("PageID", c.pageID)
|
||||
.detail("PageSize", self->pager->getPhysicalPageSize())
|
||||
.detail("Offset", c.pageID * self->pager->getPhysicalPageSize());
|
||||
.detail("Offset", c.pageID * self->pager->getPhysicalPageSize())
|
||||
.detail("Filename", self->pager->getName());
|
||||
|
||||
debug_printf("FIFOQueue::Cursor(%s) peekALLExt getSubPage error=%s for %s. Offset %d ",
|
||||
c.toString().c_str(),
|
||||
e.what(),
|
||||
toString(c.pageID).c_str(),
|
||||
c.pageID * self->pager->getPhysicalPageSize());
|
||||
if (isInjected) {
|
||||
throw e.asInjectedFault();
|
||||
}
|
||||
throw;
|
||||
}
|
||||
|
||||
|
@ -1168,7 +1184,7 @@ public:
|
|||
headWriter.write(item);
|
||||
}
|
||||
|
||||
bool isBusy() {
|
||||
bool isBusy() const {
|
||||
return headWriter.isBusy() || headReader.isBusy() || tailWriter.isBusy() || !newTailPage.isReady();
|
||||
}
|
||||
|
||||
|
@ -2025,7 +2041,8 @@ public:
|
|||
bool memoryOnly,
|
||||
Reference<IPageEncryptionKeyProvider> keyProvider,
|
||||
Promise<Void> errorPromise = {})
|
||||
: keyProvider(keyProvider), ioLock(FLOW_KNOBS->MAX_OUTSTANDING, SERVER_KNOBS->REDWOOD_PRIORITY_LAUNCHS),
|
||||
: keyProvider(keyProvider),
|
||||
ioLock(makeReference<PriorityMultiLock>(FLOW_KNOBS->MAX_OUTSTANDING, SERVER_KNOBS->REDWOOD_IO_PRIORITIES)),
|
||||
pageCacheBytes(pageCacheSizeBytes), desiredPageSize(desiredPageSize), desiredExtentSize(desiredExtentSize),
|
||||
filename(filename), memoryOnly(memoryOnly), errorPromise(errorPromise),
|
||||
remapCleanupWindowBytes(remapCleanupWindowBytes), concurrentExtentReads(new FlowLock(concurrentExtentReads)) {
|
||||
|
@ -2037,7 +2054,7 @@ public:
|
|||
// This sets the page cache size for all PageCacheT instances using the same evictor
|
||||
pageCache.evictor().sizeLimit = pageCacheBytes;
|
||||
|
||||
g_redwoodMetrics.ioLock = &ioLock;
|
||||
g_redwoodMetrics.ioLock = ioLock.getPtr();
|
||||
if (!g_redwoodMetricsActor.isValid()) {
|
||||
g_redwoodMetricsActor = redwoodMetricsLogger();
|
||||
}
|
||||
|
@ -2499,7 +2516,7 @@ public:
|
|||
unsigned int level,
|
||||
bool header) {
|
||||
|
||||
state PriorityMultiLock::Lock lock = wait(self->ioLock.lock(header ? ioMaxPriority : ioMinPriority));
|
||||
state PriorityMultiLock::Lock lock = wait(self->ioLock->lock(header ? ioMaxPriority : ioMinPriority));
|
||||
++g_redwoodMetrics.metric.pagerDiskWrite;
|
||||
g_redwoodMetrics.level(level).metrics.events.addEventReason(PagerEvents::PageWrite, reason);
|
||||
if (self->memoryOnly) {
|
||||
|
@ -2779,7 +2796,7 @@ public:
|
|||
int blockSize,
|
||||
int64_t offset,
|
||||
int priority) {
|
||||
state PriorityMultiLock::Lock lock = wait(self->ioLock.lock(std::min(priority, ioMaxPriority)));
|
||||
state PriorityMultiLock::Lock lock = wait(self->ioLock->lock(std::min(priority, ioMaxPriority)));
|
||||
++g_redwoodMetrics.metric.pagerDiskRead;
|
||||
int bytes = wait(self->pageFile->read(pageBuffer->rawData() + pageOffset, blockSize, offset));
|
||||
return bytes;
|
||||
|
@ -3593,7 +3610,7 @@ public:
|
|||
|
||||
// The next section explicitly cancels all pending operations held in the pager
|
||||
debug_printf("DWALPager(%s) shutdown kill ioLock\n", self->filename.c_str());
|
||||
self->ioLock.kill();
|
||||
self->ioLock->kill();
|
||||
|
||||
debug_printf("DWALPager(%s) shutdown cancel recovery\n", self->filename.c_str());
|
||||
self->recoverFuture.cancel();
|
||||
|
@ -3802,7 +3819,7 @@ private:
|
|||
|
||||
Reference<IPageEncryptionKeyProvider> keyProvider;
|
||||
|
||||
PriorityMultiLock ioLock;
|
||||
Reference<PriorityMultiLock> ioLock;
|
||||
|
||||
int64_t pageCacheBytes;
|
||||
|
||||
|
@ -8894,32 +8911,25 @@ void RedwoodMetrics::getIOLockFields(TraceEvent* e, std::string* s) {
|
|||
int maxPriority = ioLock->maxPriority();
|
||||
|
||||
if (e != nullptr) {
|
||||
e->detail("ActiveReads", ioLock->totalRunners());
|
||||
e->detail("AwaitReads", ioLock->totalWaiters());
|
||||
e->detail("IOActiveTotal", ioLock->getRunnersCount());
|
||||
e->detail("IOWaitingTotal", ioLock->getWaitersCount());
|
||||
|
||||
for (int priority = 0; priority <= maxPriority; ++priority) {
|
||||
e->detail(format("ActiveP%d", priority), ioLock->numRunners(priority));
|
||||
e->detail(format("AwaitP%d", priority), ioLock->numWaiters(priority));
|
||||
e->detail(format("IOActiveP%d", priority), ioLock->getRunnersCount(priority));
|
||||
e->detail(format("IOWaitingP%d", priority), ioLock->getWaitersCount(priority));
|
||||
}
|
||||
}
|
||||
|
||||
if (s != nullptr) {
|
||||
std::string active = "Active";
|
||||
std::string await = "Await";
|
||||
|
||||
*s += "\n";
|
||||
*s += format("%-15s %-8u ", "ActiveReads", ioLock->totalRunners());
|
||||
*s += format("%-15s %-8u ", "AwaitReads", ioLock->totalWaiters());
|
||||
*s += "\n";
|
||||
|
||||
*s += format("%-15s %-8u ", "IOActiveTotal", ioLock->getRunnersCount());
|
||||
for (int priority = 0; priority <= maxPriority; ++priority) {
|
||||
*s +=
|
||||
format("%-15s %-8u ", (active + 'P' + std::to_string(priority)).c_str(), ioLock->numRunners(priority));
|
||||
*s += format("IOActiveP%-6d %-8u ", priority, ioLock->getRunnersCount(priority));
|
||||
}
|
||||
*s += "\n";
|
||||
*s += format("%-15s %-8u ", "IOWaitingTotal", ioLock->getWaitersCount());
|
||||
for (int priority = 0; priority <= maxPriority; ++priority) {
|
||||
*s +=
|
||||
format("%-15s %-8u ", (await + 'P' + std::to_string(priority)).c_str(), ioLock->numWaiters(priority));
|
||||
*s += format("IOWaitingP%-5d %-8u ", priority, ioLock->getWaitersCount(priority));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -11407,57 +11417,3 @@ TEST_CASE(":/redwood/performance/histograms") {
|
|||
|
||||
return Void();
|
||||
}
|
||||
|
||||
ACTOR Future<Void> waitLockIncrement(PriorityMultiLock* pml, int priority, int* pout) {
|
||||
state PriorityMultiLock::Lock lock = wait(pml->lock(priority));
|
||||
wait(delay(deterministicRandom()->random01() * .1));
|
||||
++*pout;
|
||||
return Void();
|
||||
}
|
||||
|
||||
TEST_CASE("/redwood/PriorityMultiLock") {
|
||||
state std::vector<int> priorities = { 10, 20, 40 };
|
||||
state int concurrency = 25;
|
||||
state PriorityMultiLock* pml = new PriorityMultiLock(concurrency, priorities);
|
||||
state std::vector<int> counts;
|
||||
counts.resize(priorities.size(), 0);
|
||||
|
||||
// Clog the lock buy taking concurrency locks at each level
|
||||
state std::vector<Future<PriorityMultiLock::Lock>> lockFutures;
|
||||
for (int i = 0; i < priorities.size(); ++i) {
|
||||
for (int j = 0; j < concurrency; ++j) {
|
||||
lockFutures.push_back(pml->lock(i));
|
||||
}
|
||||
}
|
||||
|
||||
// Wait for n = concurrency locks to be acquired
|
||||
wait(quorum(lockFutures, concurrency));
|
||||
|
||||
state std::vector<Future<Void>> futures;
|
||||
for (int i = 0; i < 10e3; ++i) {
|
||||
int p = i % priorities.size();
|
||||
futures.push_back(waitLockIncrement(pml, p, &counts[p]));
|
||||
}
|
||||
|
||||
state Future<Void> f = waitForAll(futures);
|
||||
|
||||
// Release the locks
|
||||
lockFutures.clear();
|
||||
|
||||
// Print stats and wait for all futures to be ready
|
||||
loop {
|
||||
choose {
|
||||
when(wait(delay(1))) {
|
||||
printf("counts: ");
|
||||
for (auto c : counts) {
|
||||
printf("%d ", c);
|
||||
}
|
||||
printf(" pml: %s\n", pml->toString().c_str());
|
||||
}
|
||||
when(wait(f)) { break; }
|
||||
}
|
||||
}
|
||||
|
||||
delete pml;
|
||||
return Void();
|
||||
}
|
||||
|
|
|
@ -60,6 +60,7 @@ class GrvProxyTagThrottler {
|
|||
void setRate(double rate);
|
||||
bool isMaxThrottled(double maxThrottleDuration) const;
|
||||
void rejectRequests(LatencyBandsMap&);
|
||||
void endReleaseWindow(int64_t numStarted, double elapsed);
|
||||
};
|
||||
|
||||
// Track the budgets for each tag
|
||||
|
|
|
@ -55,7 +55,7 @@ public:
|
|||
|
||||
// Updates the budget to accumulate any extra capacity available or remove any excess that was used.
|
||||
// Call at the end of a release window.
|
||||
void endReleaseWindow(int64_t numStartedAtPriority, bool queueEmptyAtPriority, double elapsed);
|
||||
void endReleaseWindow(int64_t numStarted, bool queueEmpty, double elapsed);
|
||||
|
||||
// Smoothly sets rate. If currently disabled, reenable
|
||||
void setRate(double rate);
|
||||
|
|
|
@ -163,7 +163,7 @@ struct ILogSystem {
|
|||
virtual Future<Void> getMore(TaskPriority taskID = TaskPriority::TLogPeekReply) = 0;
|
||||
|
||||
// returns when the failure monitor detects that the servers associated with the cursor are failed
|
||||
virtual Future<Void> onFailed() = 0;
|
||||
virtual Future<Void> onFailed() const = 0;
|
||||
|
||||
// returns false if:
|
||||
// (1) the failure monitor detects that the servers associated with the cursor is failed
|
||||
|
@ -251,7 +251,7 @@ struct ILogSystem {
|
|||
VectorRef<Tag> getTags() const override;
|
||||
void advanceTo(LogMessageVersion n) override;
|
||||
Future<Void> getMore(TaskPriority taskID = TaskPriority::TLogPeekReply) override;
|
||||
Future<Void> onFailed() override;
|
||||
Future<Void> onFailed() const override;
|
||||
bool isActive() const override;
|
||||
bool isExhausted() const override;
|
||||
const LogMessageVersion& version() const override;
|
||||
|
@ -313,7 +313,7 @@ struct ILogSystem {
|
|||
VectorRef<Tag> getTags() const override;
|
||||
void advanceTo(LogMessageVersion n) override;
|
||||
Future<Void> getMore(TaskPriority taskID = TaskPriority::TLogPeekReply) override;
|
||||
Future<Void> onFailed() override;
|
||||
Future<Void> onFailed() const override;
|
||||
bool isActive() const override;
|
||||
bool isExhausted() const override;
|
||||
const LogMessageVersion& version() const override;
|
||||
|
@ -369,7 +369,7 @@ struct ILogSystem {
|
|||
VectorRef<Tag> getTags() const override;
|
||||
void advanceTo(LogMessageVersion n) override;
|
||||
Future<Void> getMore(TaskPriority taskID = TaskPriority::TLogPeekReply) override;
|
||||
Future<Void> onFailed() override;
|
||||
Future<Void> onFailed() const override;
|
||||
bool isActive() const override;
|
||||
bool isExhausted() const override;
|
||||
const LogMessageVersion& version() const override;
|
||||
|
@ -401,7 +401,7 @@ struct ILogSystem {
|
|||
VectorRef<Tag> getTags() const override;
|
||||
void advanceTo(LogMessageVersion n) override;
|
||||
Future<Void> getMore(TaskPriority taskID = TaskPriority::TLogPeekReply) override;
|
||||
Future<Void> onFailed() override;
|
||||
Future<Void> onFailed() const override;
|
||||
bool isActive() const override;
|
||||
bool isExhausted() const override;
|
||||
const LogMessageVersion& version() const override;
|
||||
|
@ -480,7 +480,7 @@ struct ILogSystem {
|
|||
VectorRef<Tag> getTags() const override;
|
||||
void advanceTo(LogMessageVersion n) override;
|
||||
Future<Void> getMore(TaskPriority taskID = TaskPriority::TLogPeekReply) override;
|
||||
Future<Void> onFailed() override;
|
||||
Future<Void> onFailed() const override;
|
||||
bool isActive() const override;
|
||||
bool isExhausted() const override;
|
||||
const LogMessageVersion& version() const override;
|
||||
|
@ -500,18 +500,18 @@ struct ILogSystem {
|
|||
virtual std::string describe() const = 0;
|
||||
virtual UID getDebugID() const = 0;
|
||||
|
||||
virtual void toCoreState(DBCoreState&) = 0;
|
||||
virtual void toCoreState(DBCoreState&) const = 0;
|
||||
|
||||
virtual bool remoteStorageRecovered() = 0;
|
||||
virtual bool remoteStorageRecovered() const = 0;
|
||||
|
||||
virtual Future<Void> onCoreStateChanged() = 0;
|
||||
virtual Future<Void> onCoreStateChanged() const = 0;
|
||||
// Returns if and when the output of toCoreState() would change (for example, when older logs can be discarded from
|
||||
// the state)
|
||||
|
||||
virtual void coreStateWritten(DBCoreState const& newState) = 0;
|
||||
// Called when a core state has been written to the coordinators
|
||||
|
||||
virtual Future<Void> onError() = 0;
|
||||
virtual Future<Void> onError() const = 0;
|
||||
// Never returns normally, but throws an error if the subsystem stops working
|
||||
|
||||
// Future<Void> push( UID bundle, int64_t seq, VectorRef<TaggedMessageRef> messages );
|
||||
|
@ -791,10 +791,10 @@ struct LogPushData : NonCopyable {
|
|||
template <class T>
|
||||
void writeTypedMessage(T const& item, bool metadataMessage = false, bool allLocations = false);
|
||||
|
||||
Standalone<StringRef> getMessages(int loc) { return messagesWriter[loc].toValue(); }
|
||||
Standalone<StringRef> getMessages(int loc) const { return messagesWriter[loc].toValue(); }
|
||||
|
||||
// Returns all locations' messages, including empty ones.
|
||||
std::vector<Standalone<StringRef>> getAllMessages();
|
||||
std::vector<Standalone<StringRef>> getAllMessages() const;
|
||||
|
||||
// Records if a tlog (specified by "loc") will receive an empty version batch message.
|
||||
// "value" is the message returned by getMessages() call.
|
||||
|
|
|
@ -208,7 +208,7 @@ class Ratekeeper {
|
|||
Deque<std::pair<double, Version>> blobWorkerVersionHistory;
|
||||
Optional<Key> remoteDC;
|
||||
|
||||
double getRecoveryDuration(Version ver) {
|
||||
double getRecoveryDuration(Version ver) const {
|
||||
auto it = version_recovery.lower_bound(ver);
|
||||
double recoveryDuration = 0;
|
||||
while (it != version_recovery.end()) {
|
||||
|
|
|
@ -185,7 +185,7 @@ struct StagingKey {
|
|||
}
|
||||
|
||||
// Does the key has at least 1 set or clear mutation to get the base value
|
||||
bool hasBaseValue() {
|
||||
bool hasBaseValue() const {
|
||||
if (version.version > 0) {
|
||||
ASSERT(type == MutationRef::SetValue || type == MutationRef::ClearRange);
|
||||
}
|
||||
|
@ -193,12 +193,12 @@ struct StagingKey {
|
|||
}
|
||||
|
||||
// Has all pendingMutations been pre-applied to the val?
|
||||
bool hasPrecomputed() {
|
||||
bool hasPrecomputed() const {
|
||||
ASSERT(pendingMutations.empty() || pendingMutations.rbegin()->first >= pendingMutations.begin()->first);
|
||||
return pendingMutations.empty() || version >= pendingMutations.rbegin()->first;
|
||||
}
|
||||
|
||||
int totalSize() { return MutationRef::OVERHEAD_BYTES + key.size() + val.size(); }
|
||||
int totalSize() const { return MutationRef::OVERHEAD_BYTES + key.size() + val.size(); }
|
||||
};
|
||||
|
||||
// The range mutation received on applier.
|
||||
|
@ -231,7 +231,7 @@ public:
|
|||
|
||||
void operator=(int newState) override { vbState = newState; }
|
||||
|
||||
int get() override { return vbState; }
|
||||
int get() const override { return vbState; }
|
||||
};
|
||||
|
||||
struct ApplierBatchData : public ReferenceCounted<ApplierBatchData> {
|
||||
|
@ -324,7 +324,7 @@ struct ApplierBatchData : public ReferenceCounted<ApplierBatchData> {
|
|||
dbApplier = Optional<Future<Void>>();
|
||||
}
|
||||
|
||||
void sanityCheckMutationOps() {
|
||||
void sanityCheckMutationOps() const {
|
||||
if (kvOps.empty())
|
||||
return;
|
||||
|
||||
|
@ -332,7 +332,7 @@ struct ApplierBatchData : public ReferenceCounted<ApplierBatchData> {
|
|||
ASSERT_WE_THINK(allOpsAreKnown());
|
||||
}
|
||||
|
||||
bool isKVOpsSorted() {
|
||||
bool isKVOpsSorted() const {
|
||||
auto prev = kvOps.begin();
|
||||
for (auto it = kvOps.begin(); it != kvOps.end(); ++it) {
|
||||
if (prev->first > it->first) {
|
||||
|
@ -343,7 +343,7 @@ struct ApplierBatchData : public ReferenceCounted<ApplierBatchData> {
|
|||
return true;
|
||||
}
|
||||
|
||||
bool allOpsAreKnown() {
|
||||
bool allOpsAreKnown() const {
|
||||
for (auto it = kvOps.begin(); it != kvOps.end(); ++it) {
|
||||
for (auto m = it->second.begin(); m != it->second.end(); ++m) {
|
||||
if (m->type == MutationRef::SetValue || m->type == MutationRef::ClearRange ||
|
||||
|
@ -380,8 +380,8 @@ struct RestoreApplierData : RestoreRoleData, public ReferenceCounted<RestoreAppl
|
|||
|
||||
// getVersionBatchState may be called periodically to dump version batch state,
|
||||
// even when no version batch has been started.
|
||||
int getVersionBatchState(int batchIndex) final {
|
||||
std::map<int, Reference<ApplierBatchData>>::iterator item = batch.find(batchIndex);
|
||||
int getVersionBatchState(int batchIndex) const final {
|
||||
auto item = batch.find(batchIndex);
|
||||
if (item == batch.end()) { // Batch has not been initialized when we blindly profile the state
|
||||
return ApplierVersionBatchState::INVALID;
|
||||
} else {
|
||||
|
@ -404,7 +404,7 @@ struct RestoreApplierData : RestoreRoleData, public ReferenceCounted<RestoreAppl
|
|||
finishedBatch = NotifiedVersion(0);
|
||||
}
|
||||
|
||||
std::string describeNode() override {
|
||||
std::string describeNode() const override {
|
||||
std::stringstream ss;
|
||||
ss << "NodeID:" << nodeID.toString() << " nodeIndex:" << nodeIndex;
|
||||
return ss.str();
|
||||
|
|
|
@ -56,7 +56,7 @@ struct VersionBatch {
|
|||
std::tie(rhs.batchIndex, rhs.beginVersion, rhs.endVersion, rhs.logFiles, rhs.rangeFiles, rhs.size);
|
||||
}
|
||||
|
||||
bool isEmpty() { return logFiles.empty() && rangeFiles.empty(); }
|
||||
bool isEmpty() const { return logFiles.empty() && rangeFiles.empty(); }
|
||||
void reset() {
|
||||
beginVersion = 0;
|
||||
endVersion = 0;
|
||||
|
@ -164,7 +164,7 @@ struct RestoreControllerData : RestoreRoleData, public ReferenceCounted<RestoreC
|
|||
|
||||
~RestoreControllerData() override = default;
|
||||
|
||||
int getVersionBatchState(int batchIndex) final { return RoleVersionBatchState::INVALID; }
|
||||
int getVersionBatchState(int batchIndex) const final { return RoleVersionBatchState::INVALID; }
|
||||
void setVersionBatchState(int batchIndex, int vbState) final {}
|
||||
|
||||
void initVersionBatch(int batchIndex) override {
|
||||
|
@ -182,13 +182,13 @@ struct RestoreControllerData : RestoreRoleData, public ReferenceCounted<RestoreC
|
|||
ASSERT(runningVersionBatches.get() == 0);
|
||||
}
|
||||
|
||||
std::string describeNode() override {
|
||||
std::string describeNode() const override {
|
||||
std::stringstream ss;
|
||||
ss << "Controller";
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
void dumpVersionBatches(const std::map<Version, VersionBatch>& versionBatches) {
|
||||
void dumpVersionBatches(const std::map<Version, VersionBatch>& versionBatches) const {
|
||||
int i = 1;
|
||||
double rangeFiles = 0;
|
||||
double rangeSize = 0;
|
||||
|
|
|
@ -56,7 +56,7 @@ public:
|
|||
|
||||
void operator=(int newState) override { vbState = newState; }
|
||||
|
||||
int get() override { return vbState; }
|
||||
int get() const override { return vbState; }
|
||||
};
|
||||
|
||||
struct LoaderBatchData : public ReferenceCounted<LoaderBatchData> {
|
||||
|
@ -193,15 +193,15 @@ struct RestoreLoaderData : RestoreRoleData, public ReferenceCounted<RestoreLoade
|
|||
|
||||
~RestoreLoaderData() override = default;
|
||||
|
||||
std::string describeNode() override {
|
||||
std::string describeNode() const override {
|
||||
std::stringstream ss;
|
||||
ss << "[Role: Loader] [NodeID:" << nodeID.toString().c_str() << "] [NodeIndex:" << std::to_string(nodeIndex)
|
||||
<< "]";
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
int getVersionBatchState(int batchIndex) final {
|
||||
std::map<int, Reference<LoaderBatchData>>::iterator item = batch.find(batchIndex);
|
||||
int getVersionBatchState(int batchIndex) const final {
|
||||
auto item = batch.find(batchIndex);
|
||||
if (item == batch.end()) { // Batch has not been initialized when we blindly profile the state
|
||||
return LoaderVersionBatchState::INVALID;
|
||||
} else {
|
||||
|
|
|
@ -69,7 +69,7 @@ class RoleVersionBatchState {
|
|||
public:
|
||||
static const int INVALID = -1;
|
||||
|
||||
virtual int get() { return vbState; }
|
||||
virtual int get() const { return vbState; }
|
||||
|
||||
virtual void operator=(int newState) { vbState = newState; }
|
||||
|
||||
|
@ -109,7 +109,7 @@ public:
|
|||
|
||||
virtual void initVersionBatch(int batchIndex) = 0;
|
||||
virtual void resetPerRestoreRequest() = 0;
|
||||
virtual int getVersionBatchState(int batchIndex) = 0;
|
||||
virtual int getVersionBatchState(int batchIndex) const = 0;
|
||||
virtual void setVersionBatchState(int batchIndex, int vbState) = 0;
|
||||
|
||||
void clearInterfaces() {
|
||||
|
@ -117,7 +117,7 @@ public:
|
|||
appliersInterf.clear();
|
||||
}
|
||||
|
||||
virtual std::string describeNode() = 0;
|
||||
virtual std::string describeNode() const = 0;
|
||||
};
|
||||
|
||||
void updateProcessStats(Reference<RestoreRoleData> self);
|
||||
|
|
|
@ -189,7 +189,7 @@ Future<Void> serveStorageMetricsRequests(ServiceType* self, StorageServerInterfa
|
|||
choose {
|
||||
when(state WaitMetricsRequest req = waitNext(ssi.waitMetrics.getFuture())) {
|
||||
if (!req.tenantInfo.present() && !self->isReadable(req.keys)) {
|
||||
CODE_PROBE(true, "waitMetrics immediate wrong_shard_server()");
|
||||
CODE_PROBE(true, "waitMetrics immediate wrong_shard_server()", probe::decoration::rare);
|
||||
self->sendErrorWithPenalty(req.reply, wrong_shard_server(), self->getPenalty());
|
||||
} else {
|
||||
self->addActor(self->waitMetricsTenantAware(req));
|
||||
|
@ -231,4 +231,4 @@ Future<Void> serveStorageMetricsRequests(ServiceType* self, StorageServerInterfa
|
|||
}
|
||||
|
||||
#include "flow/unactorcompiler.h"
|
||||
#endif // FDBSERVER_STORAGEMETRICS_H
|
||||
#endif // FDBSERVER_STORAGEMETRICS_H
|
||||
|
|
|
@ -170,17 +170,17 @@ struct TagPartitionedLogSystem final : ILogSystem, ReferenceCounted<TagPartition
|
|||
LogSystemConfig const& lsConf);
|
||||
|
||||
// Convert TagPartitionedLogSystem to DBCoreState and override input newState as return value
|
||||
void toCoreState(DBCoreState& newState) final;
|
||||
void toCoreState(DBCoreState& newState) const final;
|
||||
|
||||
bool remoteStorageRecovered() final;
|
||||
bool remoteStorageRecovered() const final;
|
||||
|
||||
Future<Void> onCoreStateChanged() final;
|
||||
Future<Void> onCoreStateChanged() const final;
|
||||
|
||||
void coreStateWritten(DBCoreState const& newState) final;
|
||||
|
||||
Future<Void> onError() final;
|
||||
Future<Void> onError() const final;
|
||||
|
||||
ACTOR static Future<Void> onError_internal(TagPartitionedLogSystem* self);
|
||||
ACTOR static Future<Void> onError_internal(TagPartitionedLogSystem const* self);
|
||||
|
||||
ACTOR static Future<Void> pushResetChecker(Reference<ConnectionResetInfo> self, NetworkAddress addr);
|
||||
|
||||
|
|
|
@ -344,7 +344,7 @@ struct ApiWorkload : TestWorkload {
|
|||
virtual Future<Void> performTest(Database const& cx, Standalone<VectorRef<KeyValueRef>> const& data) = 0;
|
||||
|
||||
// Returns whether or not success is false
|
||||
bool hasFailed();
|
||||
bool hasFailed() const;
|
||||
|
||||
// Clears the keyspace used by this test
|
||||
Future<Void> clearKeyspace();
|
||||
|
|
|
@ -567,6 +567,8 @@ struct ChangeFeedInfo : ReferenceCounted<ChangeFeedInfo> {
|
|||
// back, we can avoid notifying other SS of change feeds that don't durably exist
|
||||
Version metadataCreateVersion = invalidVersion;
|
||||
|
||||
FlowLock fetchLock = FlowLock(1);
|
||||
|
||||
bool removing = false;
|
||||
bool destroyed = false;
|
||||
|
||||
|
@ -1004,7 +1006,7 @@ public:
|
|||
// investigate, but preventing a new storage process from replacing the TSS on the worker. It will still get removed
|
||||
// from the cluster if it falls behind on the mutation stream, or if its tss pair gets removed and its tag is no
|
||||
// longer valid.
|
||||
bool isTSSInQuarantine() { return tssPairID.present() && tssInQuarantine; }
|
||||
bool isTSSInQuarantine() const { return tssPairID.present() && tssInQuarantine; }
|
||||
|
||||
void startTssQuarantine() {
|
||||
if (!tssInQuarantine) {
|
||||
|
@ -1054,6 +1056,11 @@ public:
|
|||
// when the disk permits
|
||||
NotifiedVersion oldestVersion; // See also storageVersion()
|
||||
NotifiedVersion durableVersion; // At least this version will be readable from storage after a power failure
|
||||
// In the event of the disk corruption, sqlite and redwood will either not recover, recover to durableVersion
|
||||
// but be unable to read some data, or they could lose the last commit. If we lose the last commit, the storage
|
||||
// might not be able to peek from the tlog (depending on when it sent the last pop). So this version just keeps
|
||||
// track of the version we committed to the storage engine before we did commit durableVersion.
|
||||
Version storageMinRecoverVersion = 0;
|
||||
Version rebootAfterDurableVersion;
|
||||
int8_t primaryLocality;
|
||||
NotifiedVersion knownCommittedVersion;
|
||||
|
@ -1110,15 +1117,13 @@ public:
|
|||
|
||||
FlowLock serveFetchCheckpointParallelismLock;
|
||||
|
||||
PriorityMultiLock ssLock;
|
||||
Reference<PriorityMultiLock> ssLock;
|
||||
std::vector<int> readPriorityRanks;
|
||||
|
||||
Future<PriorityMultiLock::Lock> getReadLock(const Optional<ReadOptions>& options) {
|
||||
// TODO: Fix perf regression in 100% cache read case where taking this lock adds too much overhead
|
||||
return PriorityMultiLock::Lock();
|
||||
// int readType = (int)(options.present() ? options.get().type : ReadType::NORMAL);
|
||||
// readType = std::clamp<int>(readType, 0, readPriorityRanks.size() - 1);
|
||||
// return ssLock.lock(readPriorityRanks[readType]);
|
||||
int readType = (int)(options.present() ? options.get().type : ReadType::NORMAL);
|
||||
readType = std::clamp<int>(readType, 0, readPriorityRanks.size() - 1);
|
||||
return ssLock->lock(readPriorityRanks[readType]);
|
||||
}
|
||||
|
||||
FlowLock serveAuditStorageParallelismLock;
|
||||
|
@ -1407,7 +1412,8 @@ public:
|
|||
fetchKeysParallelismFullLock(SERVER_KNOBS->FETCH_KEYS_PARALLELISM_FULL),
|
||||
fetchKeysBytesBudget(SERVER_KNOBS->STORAGE_FETCH_BYTES), fetchKeysBudgetUsed(false),
|
||||
serveFetchCheckpointParallelismLock(SERVER_KNOBS->SERVE_FETCH_CHECKPOINT_PARALLELISM),
|
||||
ssLock(SERVER_KNOBS->STORAGE_SERVER_READ_CONCURRENCY, SERVER_KNOBS->STORAGESERVER_READ_PRIORITIES),
|
||||
ssLock(makeReference<PriorityMultiLock>(SERVER_KNOBS->STORAGE_SERVER_READ_CONCURRENCY,
|
||||
SERVER_KNOBS->STORAGESERVER_READ_PRIORITIES)),
|
||||
serveAuditStorageParallelismLock(SERVER_KNOBS->SERVE_AUDIT_STORAGE_PARALLELISM),
|
||||
instanceID(deterministicRandom()->randomUniqueID().first()), shuttingDown(false), behind(false),
|
||||
versionBehind(false), debug_inApplyUpdate(false), debug_lastValidateTime(0), lastBytesInputEBrake(0),
|
||||
|
@ -1415,7 +1421,7 @@ public:
|
|||
busiestWriteTagContext(ssi.id()), counters(this),
|
||||
storageServerSourceTLogIDEventHolder(
|
||||
makeReference<EventCacheHolder>(ssi.id().toString() + "/StorageServerSourceTLogID")) {
|
||||
readPriorityRanks = parseStringToVector<int>(SERVER_KNOBS->STORAGESERVER_READ_RANKS, ',');
|
||||
readPriorityRanks = parseStringToVector<int>(SERVER_KNOBS->STORAGESERVER_READTYPE_PRIORITY_MAP, ',');
|
||||
ASSERT(readPriorityRanks.size() > (int)ReadType::MAX);
|
||||
version.initMetric("StorageServer.Version"_sr, counters.cc.getId());
|
||||
oldestVersion.initMetric("StorageServer.OldestVersion"_sr, counters.cc.getId());
|
||||
|
@ -1509,6 +1515,7 @@ public:
|
|||
desiredOldestVersion = ver;
|
||||
oldestVersion = ver;
|
||||
durableVersion = ver;
|
||||
storageMinRecoverVersion = ver;
|
||||
lastVersionWithData = ver;
|
||||
restoredVersion = ver;
|
||||
|
||||
|
@ -5687,6 +5694,7 @@ bool changeDurableVersion(StorageServer* data, Version desiredDurableVersion) {
|
|||
data->freeable.erase(data->freeable.begin(), data->freeable.lower_bound(nextDurableVersion));
|
||||
|
||||
Future<Void> checkFatalError = data->otherError.getFuture();
|
||||
data->storageMinRecoverVersion = data->durableVersion.get();
|
||||
data->durableVersion.set(nextDurableVersion);
|
||||
setDataDurableVersion(data->thisServerID, data->durableVersion.get());
|
||||
if (checkFatalError.isReady())
|
||||
|
@ -6309,6 +6317,15 @@ ACTOR Future<Version> fetchChangeFeedApplier(StorageServer* data,
|
|||
Version beginVersion,
|
||||
Version endVersion,
|
||||
ReadOptions readOptions) {
|
||||
state FlowLock::Releaser feedFetchReleaser;
|
||||
|
||||
// avoid fetching the same version range of the same change feed multiple times.
|
||||
choose {
|
||||
when(wait(changeFeedInfo->fetchLock.take())) {
|
||||
feedFetchReleaser = FlowLock::Releaser(changeFeedInfo->fetchLock);
|
||||
}
|
||||
when(wait(changeFeedInfo->durableFetchVersion.whenAtLeast(endVersion))) { return invalidVersion; }
|
||||
}
|
||||
|
||||
state Version startVersion = beginVersion;
|
||||
startVersion = std::max(startVersion, emptyVersion + 1);
|
||||
|
@ -6328,6 +6345,7 @@ ACTOR Future<Version> fetchChangeFeedApplier(StorageServer* data,
|
|||
return invalidVersion;
|
||||
}
|
||||
|
||||
// FIXME: if this feed range is not wholly contained within the shard, set cache to true on reading
|
||||
state Reference<ChangeFeedData> feedResults = makeReference<ChangeFeedData>();
|
||||
state Future<Void> feed = data->cx->getChangeFeedStream(feedResults,
|
||||
rangeId,
|
||||
|
@ -6843,6 +6861,16 @@ ACTOR Future<std::vector<Key>> fetchChangeFeedMetadata(StorageServer* data,
|
|||
return feedIds;
|
||||
}
|
||||
|
||||
ReadOptions readOptionsForFeedFetch(const ReadOptions& options, const KeyRangeRef& keys, const KeyRangeRef& feedRange) {
|
||||
if (!feedRange.contains(keys)) {
|
||||
return options;
|
||||
}
|
||||
// If feed range wholly contains shard range, cache on fetch because other shards will likely also fetch it
|
||||
ReadOptions newOptions = options;
|
||||
newOptions.cacheResult = true;
|
||||
return newOptions;
|
||||
}
|
||||
|
||||
// returns max version fetched for each feed
|
||||
// newFeedIds is used for the second fetch to get data for new feeds that weren't there for the first fetch
|
||||
ACTOR Future<std::unordered_map<Key, Version>> dispatchChangeFeeds(StorageServer* data,
|
||||
|
@ -6867,8 +6895,9 @@ ACTOR Future<std::unordered_map<Key, Version>> dispatchChangeFeeds(StorageServer
|
|||
auto feedIt = data->uidChangeFeed.find(feedId);
|
||||
// feed may have been moved away or deleted after move was scheduled, do nothing in that case
|
||||
if (feedIt != data->uidChangeFeed.end() && !feedIt->second->removing) {
|
||||
ReadOptions fetchReadOptions = readOptionsForFeedFetch(readOptions, keys, feedIt->second->range);
|
||||
feedFetches[feedIt->second->id] =
|
||||
fetchChangeFeed(data, feedIt->second, beginVersion, endVersion, readOptions);
|
||||
fetchChangeFeed(data, feedIt->second, beginVersion, endVersion, fetchReadOptions);
|
||||
}
|
||||
}
|
||||
for (auto& feedId : newFeedIds) {
|
||||
|
@ -6876,7 +6905,8 @@ ACTOR Future<std::unordered_map<Key, Version>> dispatchChangeFeeds(StorageServer
|
|||
// we just read the change feed data map earlier in fetchKeys without yielding, so these feeds must exist
|
||||
ASSERT(feedIt != data->uidChangeFeed.end());
|
||||
ASSERT(!feedIt->second->removing);
|
||||
feedFetches[feedIt->second->id] = fetchChangeFeed(data, feedIt->second, 0, endVersion, readOptions);
|
||||
ReadOptions fetchReadOptions = readOptionsForFeedFetch(readOptions, keys, feedIt->second->range);
|
||||
feedFetches[feedIt->second->id] = fetchChangeFeed(data, feedIt->second, 0, endVersion, fetchReadOptions);
|
||||
}
|
||||
|
||||
loop {
|
||||
|
@ -9426,7 +9456,7 @@ ACTOR Future<Void> updateStorage(StorageServer* data) {
|
|||
wait(ioTimeoutError(durable, SERVER_KNOBS->MAX_STORAGE_COMMIT_TIME));
|
||||
data->storageCommitLatencyHistogram->sampleSeconds(now() - beforeStorageCommit);
|
||||
|
||||
debug_advanceMinCommittedVersion(data->thisServerID, newOldestVersion);
|
||||
debug_advanceMinCommittedVersion(data->thisServerID, data->storageMinRecoverVersion);
|
||||
|
||||
if (removeKVSRanges) {
|
||||
TraceEvent(SevDebug, "RemoveKVSRangesComitted", data->thisServerID)
|
||||
|
@ -9568,7 +9598,7 @@ ACTOR Future<Void> updateStorage(StorageServer* data) {
|
|||
// loaded.
|
||||
state double beforeSSDurableVersionUpdate = now();
|
||||
wait(data->durableVersionLock.take());
|
||||
data->popVersion(data->durableVersion.get() + 1);
|
||||
data->popVersion(data->storageMinRecoverVersion + 1);
|
||||
|
||||
while (!changeDurableVersion(data, newOldestVersion)) {
|
||||
if (g_network->check_yield(TaskPriority::UpdateStorage)) {
|
||||
|
@ -10431,20 +10461,20 @@ ACTOR Future<Void> metricsCore(StorageServer* self, StorageServerInterface ssi)
|
|||
te.detail("StorageEngine", self->storage.getKeyValueStoreType().toString());
|
||||
te.detail("Tag", self->tag.toString());
|
||||
std::vector<int> rpr = self->readPriorityRanks;
|
||||
te.detail("ReadsActive", self->ssLock.totalRunners());
|
||||
te.detail("ReadsWaiting", self->ssLock.totalWaiters());
|
||||
te.detail("ReadsTotalActive", self->ssLock->getRunnersCount());
|
||||
te.detail("ReadsTotalWaiting", self->ssLock->getWaitersCount());
|
||||
int type = (int)ReadType::FETCH;
|
||||
te.detail("ReadFetchActive", self->ssLock.numRunners(rpr[type]));
|
||||
te.detail("ReadFetchWaiting", self->ssLock.numWaiters(rpr[type]));
|
||||
te.detail("ReadFetchActive", self->ssLock->getRunnersCount(rpr[type]));
|
||||
te.detail("ReadFetchWaiting", self->ssLock->getWaitersCount(rpr[type]));
|
||||
type = (int)ReadType::LOW;
|
||||
te.detail("ReadLowActive", self->ssLock.numRunners(rpr[type]));
|
||||
te.detail("ReadLowWaiting", self->ssLock.numWaiters(rpr[type]));
|
||||
te.detail("ReadLowActive", self->ssLock->getRunnersCount(rpr[type]));
|
||||
te.detail("ReadLowWaiting", self->ssLock->getWaitersCount(rpr[type]));
|
||||
type = (int)ReadType::NORMAL;
|
||||
te.detail("ReadNormalActive", self->ssLock.numRunners(rpr[type]));
|
||||
te.detail("ReadNormalWaiting", self->ssLock.numWaiters(rpr[type]));
|
||||
te.detail("ReadNormalActive", self->ssLock->getRunnersCount(rpr[type]));
|
||||
te.detail("ReadNormalWaiting", self->ssLock->getWaitersCount(rpr[type]));
|
||||
type = (int)ReadType::HIGH;
|
||||
te.detail("ReadHighActive", self->ssLock.numRunners(rpr[type]));
|
||||
te.detail("ReadHighWaiting", self->ssLock.numWaiters(rpr[type]));
|
||||
te.detail("ReadHighActive", self->ssLock->getRunnersCount(rpr[type]));
|
||||
te.detail("ReadHighWaiting", self->ssLock->getWaitersCount(rpr[type]));
|
||||
StorageBytes sb = self->storage.getStorageBytes();
|
||||
te.detail("KvstoreBytesUsed", sb.used);
|
||||
te.detail("KvstoreBytesFree", sb.free);
|
||||
|
@ -10821,7 +10851,7 @@ ACTOR Future<Void> storageServerCore(StorageServer* self, StorageServerInterface
|
|||
}
|
||||
self->logCursor = self->logSystem->peekSingle(
|
||||
self->thisServerID, self->version.get() + 1, self->tag, self->history);
|
||||
self->popVersion(self->durableVersion.get() + 1, true);
|
||||
self->popVersion(self->storageMinRecoverVersion + 1, true);
|
||||
}
|
||||
// If update() is waiting for results from the tlog, it might never get them, so needs to be
|
||||
// cancelled. But if it is waiting later, cancelling it could cause problems (e.g. fetchKeys
|
||||
|
@ -11260,7 +11290,7 @@ ACTOR Future<Void> storageServer(IKeyValueStore* persistentData,
|
|||
// If the storage server dies while something that uses self is still on the stack,
|
||||
// we want that actor to complete before we terminate and that memory goes out of scope
|
||||
|
||||
self.ssLock.kill();
|
||||
self.ssLock->kill();
|
||||
|
||||
state Error err = e;
|
||||
if (storageServerTerminated(self, persistentData, err)) {
|
||||
|
@ -11358,7 +11388,7 @@ ACTOR Future<Void> storageServer(IKeyValueStore* persistentData,
|
|||
throw internal_error();
|
||||
} catch (Error& e) {
|
||||
|
||||
self.ssLock.kill();
|
||||
self.ssLock->kill();
|
||||
|
||||
if (self.byteSampleRecovery.isValid()) {
|
||||
self.byteSampleRecovery.cancel();
|
||||
|
|
|
@ -2331,7 +2331,7 @@ ACTOR Future<Void> workerServer(Reference<IClusterConnectionRecord> connRecord,
|
|||
recruited.initEndpoints();
|
||||
if (blobMigratorInterf->get().present()) {
|
||||
recruited = blobMigratorInterf->get().get();
|
||||
CODE_PROBE(true, "Recruited while already a blob migrator.");
|
||||
CODE_PROBE(true, "Recruited while already a blob migrator.", probe::decoration::rare);
|
||||
} else {
|
||||
startRole(Role::BLOB_MIGRATOR, recruited.id(), interf.id());
|
||||
DUMPTOKEN(recruited.haltBlobMigrator);
|
||||
|
@ -2796,7 +2796,7 @@ ACTOR Future<Void> workerServer(Reference<IClusterConnectionRecord> connRecord,
|
|||
when(state WorkerSnapRequest snapReq = waitNext(interf.workerSnapReq.getFuture())) {
|
||||
std::string snapReqKey = snapReq.snapUID.toString() + snapReq.role.toString();
|
||||
if (snapReqResultMap.count(snapReqKey)) {
|
||||
CODE_PROBE(true, "Worker received a duplicate finished snapshot request");
|
||||
CODE_PROBE(true, "Worker received a duplicate finished snapshot request", probe::decoration::rare);
|
||||
auto result = snapReqResultMap[snapReqKey];
|
||||
result.isError() ? snapReq.reply.sendError(result.getError()) : snapReq.reply.send(result.get());
|
||||
TraceEvent("RetryFinishedWorkerSnapRequest")
|
||||
|
@ -2804,7 +2804,7 @@ ACTOR Future<Void> workerServer(Reference<IClusterConnectionRecord> connRecord,
|
|||
.detail("Role", snapReq.role)
|
||||
.detail("Result", result.isError() ? result.getError().code() : success().code());
|
||||
} else if (snapReqMap.count(snapReqKey)) {
|
||||
CODE_PROBE(true, "Worker received a duplicate ongoing snapshot request");
|
||||
CODE_PROBE(true, "Worker received a duplicate ongoing snapshot request", probe::decoration::rare);
|
||||
TraceEvent("RetryOngoingWorkerSnapRequest")
|
||||
.detail("SnapUID", snapReq.snapUID.toString())
|
||||
.detail("Role", snapReq.role);
|
||||
|
|
|
@ -328,6 +328,6 @@ Reference<TransactionWrapper> ApiWorkload::createTransaction() {
|
|||
return transactionFactory->createTransaction();
|
||||
}
|
||||
|
||||
bool ApiWorkload::hasFailed() {
|
||||
bool ApiWorkload::hasFailed() const {
|
||||
return !success;
|
||||
}
|
||||
|
|
|
@ -172,6 +172,7 @@ struct BlobGranuleVerifierWorkload : TestWorkload {
|
|||
}
|
||||
}
|
||||
}
|
||||
void disableFailureInjectionWorkloads(std::set<std::string>& out) const override { out.emplace("Attrition"); }
|
||||
|
||||
Future<Void> setup(Database const& cx) override { return _setup(cx, this); }
|
||||
|
||||
|
|
|
@ -62,7 +62,9 @@ struct DataLossRecoveryWorkload : TestWorkload {
|
|||
|
||||
Future<Void> setup(Database const& cx) override { return Void(); }
|
||||
|
||||
void disableFailureInjectionWorkloads(std::set<std::string>& out) const override { out.insert("RandomMoveKeys"); }
|
||||
void disableFailureInjectionWorkloads(std::set<std::string>& out) const override {
|
||||
out.insert({ "RandomMoveKeys", "Attrition" });
|
||||
}
|
||||
|
||||
Future<Void> start(Database const& cx) override {
|
||||
if (!enabled) {
|
||||
|
|
|
@ -23,7 +23,6 @@
|
|||
#include "fdbserver/workloads/workloads.actor.h"
|
||||
#include "fdbrpc/simulator.h"
|
||||
#include "fdbserver/WorkerInterface.actor.h"
|
||||
#include "fdbserver/ServerDBInfo.h"
|
||||
#include "fdbserver/QuietDatabase.h"
|
||||
#include "fdbserver/Status.actor.h"
|
||||
#include "flow/actorcompiler.h" // This must be the last #include.
|
||||
|
@ -43,7 +42,6 @@ struct DiskFailureInjectionWorkload : FailureInjectionWorkload {
|
|||
double percentBitFlips = 10;
|
||||
double periodicBroadcastInterval = 5.0;
|
||||
std::vector<NetworkAddress> chosenWorkers;
|
||||
std::vector<Future<Void>> clients;
|
||||
// Verification Mode: We run the workload indefinitely in this mode.
|
||||
// The idea is to keep going until we get a non-zero chaosMetric to ensure
|
||||
// that we haven't lost the chaos event. testDuration is ignored in this mode
|
||||
|
@ -76,23 +74,20 @@ struct DiskFailureInjectionWorkload : FailureInjectionWorkload {
|
|||
// 2. Starting the actor that injects failures on chosen storage servers
|
||||
Future<Void> start(Database const& cx) override {
|
||||
if (enabled) {
|
||||
clients.push_back(timeout(diskFailureInjectionClient<WorkerInterface>(cx, this), testDuration, Void()));
|
||||
// In verification mode, we want to wait until periodicEventBroadcast actor returns which indicates that
|
||||
// a non-zero chaosMetric was found.
|
||||
auto result = diskFailureInjectionClient<WorkerInterface>(cx, this);
|
||||
// In verification mode, we want to wait until periodicEventBroadcast actor returns which indicates that
|
||||
// a non-zero chaosMetric was found.
|
||||
if (verificationMode) {
|
||||
clients.push_back(periodicEventBroadcast(this));
|
||||
} else
|
||||
return (periodicEventBroadcast(this) && delay(testDuration)) || result;
|
||||
} else {
|
||||
// Else we honor the testDuration
|
||||
clients.push_back(timeout(periodicEventBroadcast(this), testDuration, Void()));
|
||||
return waitForAll(clients);
|
||||
return timeout(periodicEventBroadcast(this) && result, testDuration, Void());
|
||||
}
|
||||
} else
|
||||
return Void();
|
||||
}
|
||||
|
||||
Future<bool> check(Database const& cx) override {
|
||||
clients.clear();
|
||||
return true;
|
||||
}
|
||||
Future<bool> check(Database const& cx) override { return true; }
|
||||
|
||||
void getMetrics(std::vector<PerfMetric>& m) override {}
|
||||
|
||||
|
@ -160,6 +155,7 @@ struct DiskFailureInjectionWorkload : FailureInjectionWorkload {
|
|||
} catch (Error& e) {
|
||||
// If we failed to get a complete list of storage servers, we can't inject failure events
|
||||
// But don't throw the error in that case
|
||||
TraceEvent("ChaosCouldNotGetStorages").error(e);
|
||||
continue;
|
||||
}
|
||||
auto machine = deterministicRandom()->randomChoice(machines);
|
||||
|
@ -192,11 +188,20 @@ struct DiskFailureInjectionWorkload : FailureInjectionWorkload {
|
|||
for (auto worker : workers) {
|
||||
workersMap[worker.interf.address()] = worker.interf;
|
||||
}
|
||||
TraceEvent("ResendChaos")
|
||||
.detail("ChosenWorkersSize", self->chosenWorkers.size())
|
||||
.detail("FoundWorkers", workersMap.size())
|
||||
.detail(
|
||||
"ResendToNumber",
|
||||
std::count_if(self->chosenWorkers.begin(),
|
||||
self->chosenWorkers.end(),
|
||||
[&map = std::as_const(workersMap)](auto const& addr) { return map.count(addr) > 0; }));
|
||||
for (auto& workerAddress : self->chosenWorkers) {
|
||||
auto itr = workersMap.find(workerAddress);
|
||||
if (itr != workersMap.end()) {
|
||||
if (self->throttleDisk && (throttledWorkers++ < self->workersToThrottle))
|
||||
if (self->throttleDisk && (throttledWorkers++ < self->workersToThrottle)) {
|
||||
self->injectDiskDelays(itr->second, self->stallInterval, self->stallPeriod, self->throttlePeriod);
|
||||
}
|
||||
if (self->corruptFile && (corruptedWorkers++ < self->workersToCorrupt)) {
|
||||
if (g_simulator == g_network)
|
||||
g_simulator->corruptWorkerMap[workerAddress] = true;
|
||||
|
|
|
@ -154,7 +154,7 @@ struct EncryptionOpsWorkload : TestWorkload {
|
|||
|
||||
~EncryptionOpsWorkload() { TraceEvent("EncryptionOpsWorkloadDone").log(); }
|
||||
|
||||
bool isFixedSizePayload() { return mode == 1; }
|
||||
bool isFixedSizePayload() const { return mode == 1; }
|
||||
|
||||
std::string getModeStr() const {
|
||||
if (mode == 1) {
|
||||
|
@ -166,7 +166,7 @@ struct EncryptionOpsWorkload : TestWorkload {
|
|||
throw internal_error();
|
||||
}
|
||||
|
||||
void generateRandomBaseCipher(const int maxLen, uint8_t* buff, int* retLen) {
|
||||
static void generateRandomBaseCipher(const int maxLen, uint8_t* buff, int* retLen) {
|
||||
memset(buff, 0, maxLen);
|
||||
*retLen = deterministicRandom()->randomInt(maxLen / 2, maxLen);
|
||||
deterministicRandom()->randomBytes(buff, *retLen);
|
||||
|
|
|
@ -43,6 +43,12 @@ struct FastTriggeredWatchesWorkload : TestWorkload {
|
|||
keyBytes = std::max(getOption(options, "keyBytes"_sr, 16), 16);
|
||||
}
|
||||
|
||||
void disableFailureInjectionWorkloads(std::set<std::string>& out) const override {
|
||||
// This test asserts that watches fire within a certain version range. Attrition will make this assertion fail
|
||||
// since it can cause recoveries which will bump the cluster version significantly
|
||||
out.emplace("Attrition");
|
||||
}
|
||||
|
||||
Future<Void> setup(Database const& cx) override {
|
||||
if (clientId == 0)
|
||||
return _setup(cx, this);
|
||||
|
|
|
@ -18,10 +18,7 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <cstdint>
|
||||
#include <limits>
|
||||
#include <algorithm>
|
||||
#include "fdbrpc/simulator.h"
|
||||
#include "fdbclient/MutationLogReader.actor.h"
|
||||
#include "fdbclient/Tuple.h"
|
||||
#include "fdbserver/workloads/ApiWorkload.h"
|
||||
|
@ -55,6 +52,9 @@ struct GetMappedRangeWorkload : ApiWorkload {
|
|||
enabled = !clientId; // only do this on the "first" client
|
||||
}
|
||||
|
||||
// TODO: Currently this workload doesn't play well with MachineAttrition, but it probably should
|
||||
void disableFailureInjectionWorkloads(std::set<std::string>& out) const override { out.insert("Attrition"); }
|
||||
|
||||
Future<Void> start(Database const& cx) override {
|
||||
// This workload is generated different from typical ApiWorkload. So don't use ApiWorkload::_start.
|
||||
if (enabled) {
|
||||
|
|
|
@ -228,7 +228,7 @@ struct IDDTxnProcessorApiWorkload : TestWorkload {
|
|||
|
||||
// test finish or started but cancelled movement
|
||||
if (deterministicRandom()->coinflip()) {
|
||||
CODE_PROBE(true, "RawMovementApi partial started");
|
||||
CODE_PROBE(true, "RawMovementApi partial started", probe::decoration::rare);
|
||||
return Void();
|
||||
}
|
||||
|
||||
|
@ -320,4 +320,4 @@ struct IDDTxnProcessorApiWorkload : TestWorkload {
|
|||
void getMetrics(std::vector<PerfMetric>& m) override {}
|
||||
};
|
||||
|
||||
WorkloadFactory<IDDTxnProcessorApiWorkload> IDDTxnProcessorApiWorkload;
|
||||
WorkloadFactory<IDDTxnProcessorApiWorkload> IDDTxnProcessorApiWorkload;
|
||||
|
|
|
@ -49,6 +49,8 @@ struct LowLatencyWorkload : TestWorkload {
|
|||
testKey = getOption(options, "testKey"_sr, "testKey"_sr);
|
||||
}
|
||||
|
||||
void disableFailureInjectionWorkloads(std::set<std::string>& out) const override { out.insert("Attrition"); }
|
||||
|
||||
Future<Void> setup(Database const& cx) override {
|
||||
if (g_network->isSimulated()) {
|
||||
IKnobCollection::getMutableGlobalKnobCollection().setKnob("min_delay_cc_worst_fit_candidacy_seconds",
|
||||
|
|
|
@ -121,6 +121,10 @@ struct MachineAttritionWorkload : FailureInjectionWorkload {
|
|||
bool shouldInject(DeterministicRandom& random,
|
||||
const WorkloadRequest& work,
|
||||
const unsigned alreadyAdded) const override {
|
||||
if (g_network->isSimulated() && !g_simulator->extraDatabases.empty()) {
|
||||
// Remove this as soon as we track extra databases properly
|
||||
return false;
|
||||
}
|
||||
return work.useDatabase && random.random01() < 1.0 / (2.0 + alreadyAdded);
|
||||
}
|
||||
|
||||
|
@ -482,5 +486,4 @@ struct MachineAttritionWorkload : FailureInjectionWorkload {
|
|||
};
|
||||
|
||||
WorkloadFactory<MachineAttritionWorkload> MachineAttritionWorkloadFactory;
|
||||
// TODO: Enable MachineAttritionWorkload injection once this is bug-free
|
||||
// FailureInjectorFactory<MachineAttritionWorkload> MachineAttritionFailureWorkloadFactory;
|
||||
FailureInjectorFactory<MachineAttritionWorkload> MachineAttritionFailureWorkloadFactory;
|
||||
|
|
|
@ -70,7 +70,10 @@ struct PhysicalShardMoveWorkLoad : TestWorkload {
|
|||
return _start(this, cx);
|
||||
}
|
||||
|
||||
void disableFailureInjectionWorkloads(std::set<std::string>& out) const override { out.insert("RandomMoveKeys"); }
|
||||
void disableFailureInjectionWorkloads(std::set<std::string>& out) const override {
|
||||
out.insert("RandomMoveKeys");
|
||||
out.insert("Attrition");
|
||||
}
|
||||
|
||||
ACTOR Future<Void> _start(PhysicalShardMoveWorkLoad* self, Database cx) {
|
||||
int ignore = wait(setDDMode(cx, 0));
|
||||
|
|
|
@ -1143,6 +1143,8 @@ struct SpecialKeySpaceCorrectnessWorkload : TestWorkload {
|
|||
state KeyRange coordinators_key_range =
|
||||
KeyRangeRef("process/"_sr, "process0"_sr)
|
||||
.withPrefix(SpecialKeySpace::getManagementApiCommandPrefix("coordinators"));
|
||||
state unsigned retries = 0;
|
||||
state bool changeCoordinatorsSucceeded = true;
|
||||
loop {
|
||||
try {
|
||||
tx->setOption(FDBTransactionOptions::READ_SYSTEM_KEYS);
|
||||
|
@ -1222,11 +1224,18 @@ struct SpecialKeySpaceCorrectnessWorkload : TestWorkload {
|
|||
.detail("ErrorMessage", valueObj["message"].get_str());
|
||||
ASSERT(valueObj["command"].get_str() == "coordinators");
|
||||
if (valueObj["retriable"].get_bool()) { // coordinators not reachable, retry
|
||||
if (++retries >= 10) {
|
||||
CODE_PROBE(true, "ChangeCoordinators Exceeded retry limit");
|
||||
changeCoordinatorsSucceeded = false;
|
||||
tx->reset();
|
||||
break;
|
||||
}
|
||||
tx->reset();
|
||||
} else {
|
||||
ASSERT(valueObj["message"].get_str() ==
|
||||
"No change (existing configuration satisfies request)");
|
||||
tx->reset();
|
||||
CODE_PROBE(true, "Successfully changed coordinators");
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
|
@ -1242,8 +1251,10 @@ struct SpecialKeySpaceCorrectnessWorkload : TestWorkload {
|
|||
ASSERT(res.present()); // Otherwise, database is in a bad state
|
||||
ClusterConnectionString csNew(res.get().toString());
|
||||
// verify the cluster decription
|
||||
ASSERT(new_cluster_description == csNew.clusterKeyName().toString());
|
||||
ASSERT(csNew.hostnames.size() + csNew.coords.size() == old_coordinators_processes.size() + 1);
|
||||
ASSERT(!changeCoordinatorsSucceeded ||
|
||||
new_cluster_description == csNew.clusterKeyName().toString());
|
||||
ASSERT(!changeCoordinatorsSucceeded ||
|
||||
csNew.hostnames.size() + csNew.coords.size() == old_coordinators_processes.size() + 1);
|
||||
std::vector<NetworkAddress> newCoordinators = wait(csNew.tryResolveHostnames());
|
||||
// verify the coordinators' addresses
|
||||
for (const auto& network_address : newCoordinators) {
|
||||
|
@ -1259,7 +1270,7 @@ struct SpecialKeySpaceCorrectnessWorkload : TestWorkload {
|
|||
wait(delay(FLOW_KNOBS->PREVENT_FAST_SPIN_DELAY));
|
||||
}
|
||||
// change back to original settings
|
||||
loop {
|
||||
while (changeCoordinatorsSucceeded) {
|
||||
try {
|
||||
std::string new_processes_key;
|
||||
tx->setOption(FDBTransactionOptions::RAW_ACCESS);
|
||||
|
|
|
@ -92,11 +92,15 @@ struct StorageQuotaWorkload : TestWorkload {
|
|||
}
|
||||
|
||||
// Check that writes to both the tenants are rejected when the group is over quota.
|
||||
state bool rejected1 = wait(tryWrite(self, cx, self->tenant, /*expectOk=*/false));
|
||||
state bool rejected1 = wait(tryWrite(self, cx, self->tenant, /*bypassQuota=*/false, /*expectOk=*/false));
|
||||
ASSERT(rejected1);
|
||||
state bool rejected2 = wait(tryWrite(self, cx, self->emptyTenant, /*expectOk=*/false));
|
||||
state bool rejected2 = wait(tryWrite(self, cx, self->emptyTenant, /*bypassQuota=*/false, /*expectOk=*/false));
|
||||
ASSERT(rejected2);
|
||||
|
||||
// Check that transaction is able to commit if we use the FDBTransactionOptions to bypass quota.
|
||||
state bool bypassed = wait(tryWrite(self, cx, self->tenant, /*bypassQuota=*/true, /*expectOk=*/true));
|
||||
ASSERT(bypassed);
|
||||
|
||||
// Increase the quota or clear the quota. Check that writes to both the tenants are now able to commit.
|
||||
if (deterministicRandom()->coinflip()) {
|
||||
quota = size * 2;
|
||||
|
@ -104,9 +108,9 @@ struct StorageQuotaWorkload : TestWorkload {
|
|||
} else {
|
||||
wait(clearStorageQuotaHelper(cx, self->group));
|
||||
}
|
||||
state bool committed1 = wait(tryWrite(self, cx, self->tenant, /*expectOk=*/true));
|
||||
state bool committed1 = wait(tryWrite(self, cx, self->tenant, /*bypassQuota=*/false, /*expectOk=*/true));
|
||||
ASSERT(committed1);
|
||||
state bool committed2 = wait(tryWrite(self, cx, self->emptyTenant, /*expectOk=*/true));
|
||||
state bool committed2 = wait(tryWrite(self, cx, self->emptyTenant, /*bypassQuota=*/false, /*expectOk=*/true));
|
||||
ASSERT(committed2);
|
||||
|
||||
return Void();
|
||||
|
@ -173,13 +177,20 @@ struct StorageQuotaWorkload : TestWorkload {
|
|||
}
|
||||
}
|
||||
|
||||
ACTOR static Future<bool> tryWrite(StorageQuotaWorkload* self, Database cx, TenantName tenant, bool expectOk) {
|
||||
ACTOR static Future<bool> tryWrite(StorageQuotaWorkload* self,
|
||||
Database cx,
|
||||
TenantName tenant,
|
||||
bool bypassQuota,
|
||||
bool expectOk) {
|
||||
state int i;
|
||||
// Retry the transaction a few times if needed; this allows us wait for a while for all
|
||||
// the storage usage and quota related monitors to fetch and propagate the latest information
|
||||
// about the tenants that are over storage quota.
|
||||
for (i = 0; i < 10; i++) {
|
||||
state Transaction tr(cx, tenant);
|
||||
if (bypassQuota) {
|
||||
tr.setOption(FDBTransactionOptions::BYPASS_STORAGE_QUOTA);
|
||||
}
|
||||
loop {
|
||||
try {
|
||||
Standalone<KeyValueRef> kv =
|
||||
|
|
|
@ -66,7 +66,10 @@ struct SSCheckpointRestoreWorkload : TestWorkload {
|
|||
return _start(this, cx);
|
||||
}
|
||||
|
||||
void disableFailureInjectionWorkloads(std::set<std::string>& out) const override { out.insert("RandomMoveKeys"); }
|
||||
void disableFailureInjectionWorkloads(std::set<std::string>& out) const override {
|
||||
out.insert("RandomMoveKeys");
|
||||
out.insert("Attrition");
|
||||
}
|
||||
|
||||
ACTOR Future<Void> _start(SSCheckpointRestoreWorkload* self, Database cx) {
|
||||
state Key key = "TestKey"_sr;
|
||||
|
|
|
@ -26,8 +26,8 @@
|
|||
// This workload sets the throughput quota of a tag during the setup phase
|
||||
class ThroughputQuotaWorkload : public TestWorkload {
|
||||
TransactionTag transactionTag;
|
||||
double reservedQuota{ 0.0 };
|
||||
double totalQuota{ 0.0 };
|
||||
int64_t reservedQuota{ 0 };
|
||||
int64_t totalQuota{ 0 };
|
||||
|
||||
ACTOR static Future<Void> setup(ThroughputQuotaWorkload* self, Database cx) {
|
||||
state Reference<ReadYourWritesTransaction> tr = makeReference<ReadYourWritesTransaction>(cx);
|
||||
|
|
|
@ -209,6 +209,7 @@ const std::set<int> transactionRetryableErrors = { error_code_not_committed,
|
|||
error_code_batch_transaction_throttled,
|
||||
error_code_tag_throttled,
|
||||
error_code_unknown_tenant,
|
||||
error_code_proxy_tag_throttled,
|
||||
// maybe committed error
|
||||
error_code_cluster_version_changed,
|
||||
error_code_commit_unknown_result };
|
||||
|
|
|
@ -341,7 +341,7 @@ public:
|
|||
BindPromise(BindPromise const& r) : p(r.p), errContext(r.errContext), errID(r.errID) {}
|
||||
BindPromise(BindPromise&& r) noexcept : p(std::move(r.p)), errContext(r.errContext), errID(r.errID) {}
|
||||
|
||||
Future<Void> getFuture() { return p.getFuture(); }
|
||||
Future<Void> getFuture() const { return p.getFuture(); }
|
||||
|
||||
void operator()(const boost::system::error_code& error, size_t bytesWritten = 0) {
|
||||
try {
|
||||
|
|
|
@ -88,7 +88,7 @@ public:
|
|||
sendError(broken_promise());
|
||||
}
|
||||
|
||||
Future<T> getFuture() { // Call only on the originating thread!
|
||||
Future<T> getFuture() const { // Call only on the originating thread!
|
||||
return promise.getFuture();
|
||||
}
|
||||
|
||||
|
@ -107,7 +107,7 @@ public:
|
|||
g_network->isOnMainThread() ? incrementPriorityIfEven(g_network->getCurrentTask())
|
||||
: TaskPriority::DefaultOnMainThread);
|
||||
}
|
||||
bool isValid() { return promise.isValid(); }
|
||||
bool isValid() const { return promise.isValid(); }
|
||||
|
||||
private:
|
||||
Promise<T> promise;
|
||||
|
|
|
@ -29,21 +29,25 @@
|
|||
#define PRIORITYMULTILOCK_ACTOR_H
|
||||
|
||||
#include "flow/flow.h"
|
||||
#include <boost/intrusive/list.hpp>
|
||||
#include "flow/actorcompiler.h" // This must be the last #include.
|
||||
|
||||
#define PRIORITYMULTILOCK_DEBUG 0
|
||||
|
||||
#if PRIORITYMULTILOCK_DEBUG || !defined(NO_INTELLISENSE)
|
||||
#define pml_debug_printf(...) \
|
||||
if (now() > 0) \
|
||||
printf(__VA_ARGS__)
|
||||
if (now() > 0) { \
|
||||
printf("pml line=%04d ", __LINE__); \
|
||||
printf(__VA_ARGS__); \
|
||||
}
|
||||
#else
|
||||
#define pml_debug_printf(...)
|
||||
#endif
|
||||
|
||||
// A multi user lock with a concurrent holder limit where waiters request a lock with a priority
|
||||
// id and are granted locks based on a total concurrency and relative weights of the current active
|
||||
// priorities. Priority id's must start at 0 and are sequential integers.
|
||||
// priorities. Priority id's must start at 0 and are sequential integers. Priority id numbers
|
||||
// are not related to the importance of the priority in execution.
|
||||
//
|
||||
// Scheduling logic
|
||||
// Let
|
||||
|
@ -64,17 +68,17 @@
|
|||
// The interface is similar to FlowMutex except that lock holders can just drop the lock to release it.
|
||||
//
|
||||
// Usage:
|
||||
// Lock lock = wait(prioritylock.lock(priorityLevel));
|
||||
// Lock lock = wait(prioritylock.lock(priority_id));
|
||||
// lock.release(); // Explicit release, or
|
||||
// // let lock and all copies of lock go out of scope to release
|
||||
class PriorityMultiLock {
|
||||
|
||||
class PriorityMultiLock : public ReferenceCounted<PriorityMultiLock> {
|
||||
public:
|
||||
// Waiting on the lock returns a Lock, which is really just a Promise<Void>
|
||||
// Calling release() is not necessary, it exists in case the Lock holder wants to explicitly release
|
||||
// the Lock before it goes out of scope.
|
||||
struct Lock {
|
||||
void release() { promise.send(Void()); }
|
||||
bool isLocked() const { return promise.canBeSet(); }
|
||||
|
||||
// This is exposed in case the caller wants to use/copy it directly
|
||||
Promise<Void> promise;
|
||||
|
@ -84,10 +88,11 @@ public:
|
|||
: PriorityMultiLock(concurrency, parseStringToVector<int>(weights, ',')) {}
|
||||
|
||||
PriorityMultiLock(int concurrency, std::vector<int> weightsByPriority)
|
||||
: concurrency(concurrency), available(concurrency), waiting(0), totalPendingWeights(0), releaseDebugID(0) {
|
||||
: concurrency(concurrency), available(concurrency), waiting(0), totalPendingWeights(0) {
|
||||
|
||||
priorities.resize(weightsByPriority.size());
|
||||
for (int i = 0; i < priorities.size(); ++i) {
|
||||
priorities[i].priority = i;
|
||||
priorities[i].weight = weightsByPriority[i];
|
||||
}
|
||||
|
||||
|
@ -102,7 +107,8 @@ public:
|
|||
|
||||
// If this priority currently has no waiters
|
||||
if (q.empty()) {
|
||||
// Add this priority's weight to the total for priorities with pending work
|
||||
// Add this priority's weight to the total for priorities with pending work. This must be done
|
||||
// so that currenctCapacity() below will assign capacaity to this priority.
|
||||
totalPendingWeights += p.weight;
|
||||
|
||||
// If there are slots available and the priority has capacity then don't make the caller wait
|
||||
|
@ -114,80 +120,71 @@ public:
|
|||
Lock lock;
|
||||
addRunner(lock, &p);
|
||||
|
||||
pml_debug_printf("lock nowait line %d priority %d %s\n", __LINE__, priority, toString().c_str());
|
||||
pml_debug_printf("lock nowait priority %d %s\n", priority, toString().c_str());
|
||||
return lock;
|
||||
}
|
||||
|
||||
// If we didn't return above then add the priority to the waitingPriorities list
|
||||
waitingPriorities.push_back(p);
|
||||
}
|
||||
|
||||
Waiter w;
|
||||
q.push_back(w);
|
||||
Waiter& w = q.emplace_back();
|
||||
++waiting;
|
||||
|
||||
pml_debug_printf("lock wait line %d priority %d %s\n", __LINE__, priority, toString().c_str());
|
||||
pml_debug_printf("lock wait priority %d %s\n", priority, toString().c_str());
|
||||
return w.lockPromise.getFuture();
|
||||
}
|
||||
|
||||
void kill() {
|
||||
pml_debug_printf("kill %s\n", toString().c_str());
|
||||
brokenOnDestruct.reset();
|
||||
|
||||
// handleRelease will not free up any execution slots when it ends via cancel
|
||||
fRunner.cancel();
|
||||
available = 0;
|
||||
runners.clear();
|
||||
priorities.clear();
|
||||
|
||||
waitingPriorities.clear();
|
||||
for (auto& p : priorities) {
|
||||
p.queue.clear();
|
||||
}
|
||||
}
|
||||
|
||||
std::string toString() const {
|
||||
int runnersDone = 0;
|
||||
for (int i = 0; i < runners.size(); ++i) {
|
||||
if (runners[i].isReady()) {
|
||||
++runnersDone;
|
||||
}
|
||||
}
|
||||
|
||||
std::string s = format("{ ptr=%p concurrency=%d available=%d running=%d waiting=%d runnersQueue=%d "
|
||||
"runnersDone=%d pendingWeights=%d ",
|
||||
std::string s = format("{ ptr=%p concurrency=%d available=%d running=%d waiting=%d "
|
||||
"pendingWeights=%d ",
|
||||
this,
|
||||
concurrency,
|
||||
available,
|
||||
concurrency - available,
|
||||
waiting,
|
||||
runners.size(),
|
||||
runnersDone,
|
||||
totalPendingWeights);
|
||||
|
||||
for (int i = 0; i < priorities.size(); ++i) {
|
||||
s += format("p%d:{%s} ", i, priorities[i].toString(this).c_str());
|
||||
for (auto& p : priorities) {
|
||||
s += format("{%s} ", p.toString(this).c_str());
|
||||
}
|
||||
|
||||
s += "}";
|
||||
|
||||
if (concurrency - available != runners.size() - runnersDone) {
|
||||
pml_debug_printf("%s\n", s.c_str());
|
||||
ASSERT_EQ(concurrency - available, runners.size() - runnersDone);
|
||||
}
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
int maxPriority() const { return priorities.size() - 1; }
|
||||
|
||||
int totalWaiters() const { return waiting; }
|
||||
int getRunnersCount() const { return concurrency - available; }
|
||||
int getWaitersCount() const { return waiting; }
|
||||
|
||||
int numWaiters(const unsigned int priority) const {
|
||||
int getWaitersCount(const unsigned int priority) const {
|
||||
ASSERT(priority < priorities.size());
|
||||
return priorities[priority].queue.size();
|
||||
}
|
||||
|
||||
int totalRunners() const { return concurrency - available; }
|
||||
|
||||
int numRunners(const unsigned int priority) const {
|
||||
int getRunnersCount(const unsigned int priority) const {
|
||||
ASSERT(priority < priorities.size());
|
||||
return priorities[priority].runners;
|
||||
}
|
||||
|
||||
private:
|
||||
struct Waiter {
|
||||
Waiter() {}
|
||||
Promise<Lock> lockPromise;
|
||||
};
|
||||
|
||||
|
@ -202,8 +199,8 @@ private:
|
|||
|
||||
typedef Deque<Waiter> Queue;
|
||||
|
||||
struct Priority {
|
||||
Priority() : runners(0), weight(0) {}
|
||||
struct Priority : boost::intrusive::list_base_hook<> {
|
||||
Priority() : runners(0), weight(0), priority(-1) {}
|
||||
|
||||
// Queue of waiters at this priority
|
||||
Queue queue;
|
||||
|
@ -211,9 +208,12 @@ private:
|
|||
int runners;
|
||||
// Configured weight for this priority
|
||||
int weight;
|
||||
// Priority number for convenience, matches *this's index in PML priorities vector
|
||||
int priority;
|
||||
|
||||
std::string toString(const PriorityMultiLock* pml) const {
|
||||
return format("weight=%d run=%d wait=%d cap=%d",
|
||||
return format("priority=%d weight=%d run=%d wait=%d cap=%d",
|
||||
priority,
|
||||
weight,
|
||||
runners,
|
||||
queue.size(),
|
||||
|
@ -222,51 +222,41 @@ private:
|
|||
};
|
||||
|
||||
std::vector<Priority> priorities;
|
||||
typedef boost::intrusive::list<Priority, boost::intrusive::constant_time_size<false>> WaitingPrioritiesList;
|
||||
|
||||
// Current or recent (ended) runners
|
||||
Deque<Future<Void>> runners;
|
||||
// List of all priorities with 1 or more waiters. This list exists so that the scheduling loop
|
||||
// does not have to iterage over the priorities vector checking priorities without waiters.
|
||||
WaitingPrioritiesList waitingPriorities;
|
||||
|
||||
Future<Void> fRunner;
|
||||
AsyncTrigger wakeRunner;
|
||||
Promise<Void> brokenOnDestruct;
|
||||
|
||||
// Used for debugging, can roll over without issue
|
||||
unsigned int releaseDebugID;
|
||||
|
||||
ACTOR static Future<Void> handleRelease(PriorityMultiLock* self, Future<Void> f, Priority* priority) {
|
||||
state [[maybe_unused]] unsigned int id = self->releaseDebugID++;
|
||||
|
||||
pml_debug_printf("%f handleRelease self=%p id=%u start \n", now(), self, id);
|
||||
ACTOR static void handleRelease(Reference<PriorityMultiLock> self, Priority* priority, Future<Void> holder) {
|
||||
pml_debug_printf("%f handleRelease self=%p start\n", now(), self.getPtr());
|
||||
try {
|
||||
wait(f);
|
||||
pml_debug_printf("%f handleRelease self=%p id=%u success\n", now(), self, id);
|
||||
wait(holder);
|
||||
pml_debug_printf("%f handleRelease self=%p success\n", now(), self.getPtr());
|
||||
} catch (Error& e) {
|
||||
pml_debug_printf("%f handleRelease self=%p id=%u error %s\n", now(), self, id, e.what());
|
||||
if (e.code() == error_code_actor_cancelled) {
|
||||
throw;
|
||||
}
|
||||
pml_debug_printf("%f handleRelease self=%p error %s\n", now(), self.getPtr(), e.what());
|
||||
}
|
||||
|
||||
pml_debug_printf("lock release line %d priority %d %s\n",
|
||||
__LINE__,
|
||||
(int)(priority - &self->priorities.front()),
|
||||
self->toString().c_str());
|
||||
pml_debug_printf("lock release priority %d %s\n", (int)(priority->priority), self->toString().c_str());
|
||||
|
||||
pml_debug_printf("%f handleRelease self=%p id=%u releasing\n", now(), self, id);
|
||||
pml_debug_printf("%f handleRelease self=%p releasing\n", now(), self.getPtr());
|
||||
++self->available;
|
||||
priority->runners -= 1;
|
||||
|
||||
// If there are any waiters or if the runners array is getting large, trigger the runner loop
|
||||
if (self->waiting > 0 || self->runners.size() > 1000) {
|
||||
if (self->waiting > 0) {
|
||||
self->wakeRunner.trigger();
|
||||
}
|
||||
return Void();
|
||||
}
|
||||
|
||||
void addRunner(Lock& lock, Priority* p) {
|
||||
p->runners += 1;
|
||||
void addRunner(Lock& lock, Priority* priority) {
|
||||
priority->runners += 1;
|
||||
--available;
|
||||
runners.push_back(handleRelease(this, lock.promise.getFuture(), p));
|
||||
handleRelease(Reference<PriorityMultiLock>::addRef(this), priority, lock.promise.getFuture());
|
||||
}
|
||||
|
||||
// Current maximum running tasks for the specified priority, which must have waiters
|
||||
|
@ -278,76 +268,50 @@ private:
|
|||
}
|
||||
|
||||
ACTOR static Future<Void> runner(PriorityMultiLock* self) {
|
||||
state int sinceYield = 0;
|
||||
state Future<Void> error = self->brokenOnDestruct.getFuture();
|
||||
|
||||
// Priority to try to run tasks from next
|
||||
state int priority = 0;
|
||||
state WaitingPrioritiesList::iterator p = self->waitingPriorities.end();
|
||||
|
||||
loop {
|
||||
pml_debug_printf(
|
||||
"runner loop start line %d priority=%d %s\n", __LINE__, priority, self->toString().c_str());
|
||||
|
||||
// Cleanup finished runner futures at the front of the runner queue.
|
||||
while (!self->runners.empty() && self->runners.front().isReady()) {
|
||||
self->runners.pop_front();
|
||||
}
|
||||
pml_debug_printf("runner loop start priority=%d %s\n", p->priority, self->toString().c_str());
|
||||
|
||||
// Wait for a runner to release its lock
|
||||
pml_debug_printf(
|
||||
"runner loop waitTrigger line %d priority=%d %s\n", __LINE__, priority, self->toString().c_str());
|
||||
pml_debug_printf("runner loop waitTrigger priority=%d %s\n", p->priority, self->toString().c_str());
|
||||
wait(self->wakeRunner.onTrigger());
|
||||
pml_debug_printf(
|
||||
"%f runner loop wake line %d priority=%d %s\n", now(), __LINE__, priority, self->toString().c_str());
|
||||
|
||||
if (++sinceYield == 100) {
|
||||
sinceYield = 0;
|
||||
pml_debug_printf(
|
||||
" runner waitDelay line %d priority=%d %s\n", __LINE__, priority, self->toString().c_str());
|
||||
wait(delay(0));
|
||||
pml_debug_printf(
|
||||
" runner afterDelay line %d priority=%d %s\n", __LINE__, priority, self->toString().c_str());
|
||||
}
|
||||
pml_debug_printf("%f runner loop wake priority=%d %s\n", now(), p->priority, self->toString().c_str());
|
||||
|
||||
// While there are available slots and there are waiters, launch tasks
|
||||
while (self->available > 0 && self->waiting > 0) {
|
||||
pml_debug_printf(
|
||||
" launch loop start line %d priority=%d %s\n", __LINE__, priority, self->toString().c_str());
|
||||
|
||||
Priority* pPriority;
|
||||
pml_debug_printf(" launch loop start priority=%d %s\n", p->priority, self->toString().c_str());
|
||||
|
||||
// Find the next priority with waiters and capacity. There must be at least one.
|
||||
loop {
|
||||
// Rotate to next priority
|
||||
if (++priority == self->priorities.size()) {
|
||||
priority = 0;
|
||||
if (p == self->waitingPriorities.end()) {
|
||||
p = self->waitingPriorities.begin();
|
||||
}
|
||||
|
||||
pPriority = &self->priorities[priority];
|
||||
pml_debug_printf(" launch loop scan priority=%d %s\n", p->priority, self->toString().c_str());
|
||||
|
||||
pml_debug_printf(" launch loop scan line %d priority=%d %s\n",
|
||||
__LINE__,
|
||||
priority,
|
||||
self->toString().c_str());
|
||||
|
||||
if (!pPriority->queue.empty() && pPriority->runners < self->currentCapacity(pPriority->weight)) {
|
||||
if (!p->queue.empty() && p->runners < self->currentCapacity(p->weight)) {
|
||||
break;
|
||||
}
|
||||
++p;
|
||||
}
|
||||
|
||||
Queue& queue = pPriority->queue;
|
||||
|
||||
Queue& queue = p->queue;
|
||||
Waiter w = queue.front();
|
||||
queue.pop_front();
|
||||
|
||||
// If this priority is now empty, subtract its weight from the total pending weights
|
||||
// If this priority is now empty, subtract its weight from the total pending weights an remove it
|
||||
// from the waitingPriorities list
|
||||
Priority* pPriority = &*p;
|
||||
if (queue.empty()) {
|
||||
p = self->waitingPriorities.erase(p);
|
||||
self->totalPendingWeights -= pPriority->weight;
|
||||
|
||||
pml_debug_printf(" emptied priority line %d priority=%d %s\n",
|
||||
__LINE__,
|
||||
priority,
|
||||
self->toString().c_str());
|
||||
pml_debug_printf(
|
||||
" emptied priority priority=%d %s\n", pPriority->priority, self->toString().c_str());
|
||||
}
|
||||
|
||||
--self->waiting;
|
||||
|
@ -365,10 +329,9 @@ private:
|
|||
self->addRunner(lock, pPriority);
|
||||
}
|
||||
|
||||
pml_debug_printf(" launched line %d alreadyDone=%d priority=%d %s\n",
|
||||
__LINE__,
|
||||
pml_debug_printf(" launched alreadyDone=%d priority=%d %s\n",
|
||||
!lock.promise.canBeSet(),
|
||||
priority,
|
||||
pPriority->priority,
|
||||
self->toString().c_str());
|
||||
}
|
||||
}
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue