Merge branch 'main' of github.com:apple/foundationdb into tenant-list-filter
This commit is contained in:
commit
25e1721e75
|
@ -70,10 +70,13 @@ void ApiWorkload::start() {
|
||||||
schedule([this]() {
|
schedule([this]() {
|
||||||
// 1. Clear data
|
// 1. Clear data
|
||||||
clearData([this]() {
|
clearData([this]() {
|
||||||
// 2. Populate initial data
|
// 2. Workload setup
|
||||||
populateData([this]() {
|
setup([this]() {
|
||||||
// 3. Generate random workload
|
// 3. Populate initial data
|
||||||
runTests();
|
populateData([this]() {
|
||||||
|
// 4. Generate random workload
|
||||||
|
runTests();
|
||||||
|
});
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
@ -249,6 +252,10 @@ void ApiWorkload::populateData(TTaskFct cont) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ApiWorkload::setup(TTaskFct cont) {
|
||||||
|
schedule(cont);
|
||||||
|
}
|
||||||
|
|
||||||
void ApiWorkload::randomInsertOp(TTaskFct cont, std::optional<int> tenantId) {
|
void ApiWorkload::randomInsertOp(TTaskFct cont, std::optional<int> tenantId) {
|
||||||
int numKeys = Random::get().randomInt(1, maxKeysPerTransaction);
|
int numKeys = Random::get().randomInt(1, maxKeysPerTransaction);
|
||||||
auto kvPairs = std::make_shared<std::vector<fdb::KeyValue>>();
|
auto kvPairs = std::make_shared<std::vector<fdb::KeyValue>>();
|
||||||
|
@ -322,4 +329,85 @@ std::optional<fdb::BytesRef> ApiWorkload::getTenant(std::optional<int> tenantId)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::string ApiWorkload::debugTenantStr(std::optional<int> tenantId) {
|
||||||
|
return tenantId.has_value() ? fmt::format("(tenant {0})", tenantId.value()) : "()";
|
||||||
|
}
|
||||||
|
|
||||||
|
// BlobGranule setup.
|
||||||
|
// This blobbifies ['\x00', '\xff') per tenant or for the whole database if there are no tenants.
|
||||||
|
void ApiWorkload::setupBlobGranules(TTaskFct cont) {
|
||||||
|
// This count is used to synchronize the # of tenant blobbifyRange() calls to ensure
|
||||||
|
// we only start the workload once blobbification has fully finished.
|
||||||
|
auto blobbifiedCount = std::make_shared<std::atomic<int>>(1);
|
||||||
|
|
||||||
|
if (tenants.empty()) {
|
||||||
|
blobbifiedCount->store(1);
|
||||||
|
blobbifyTenant({}, blobbifiedCount, cont);
|
||||||
|
} else {
|
||||||
|
blobbifiedCount->store(tenants.size());
|
||||||
|
for (int i = 0; i < tenants.size(); i++) {
|
||||||
|
schedule([=]() { blobbifyTenant(i, blobbifiedCount, cont); });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void ApiWorkload::blobbifyTenant(std::optional<int> tenantId,
|
||||||
|
std::shared_ptr<std::atomic<int>> blobbifiedCount,
|
||||||
|
TTaskFct cont) {
|
||||||
|
auto retBlobbifyRange = std::make_shared<bool>(false);
|
||||||
|
execOperation(
|
||||||
|
[=](auto ctx) {
|
||||||
|
fdb::Key begin(1, '\x00');
|
||||||
|
fdb::Key end(1, '\xff');
|
||||||
|
|
||||||
|
info(fmt::format("setup: blobbifying {}: [\\x00 - \\xff)\n", debugTenantStr(tenantId)));
|
||||||
|
|
||||||
|
fdb::Future f = ctx->dbOps()->blobbifyRange(begin, end).eraseType();
|
||||||
|
ctx->continueAfter(f, [ctx, retBlobbifyRange, f]() {
|
||||||
|
*retBlobbifyRange = f.get<fdb::future_var::Bool>();
|
||||||
|
ctx->done();
|
||||||
|
});
|
||||||
|
},
|
||||||
|
[=]() {
|
||||||
|
if (!*retBlobbifyRange) {
|
||||||
|
schedule([=]() { blobbifyTenant(tenantId, blobbifiedCount, cont); });
|
||||||
|
} else {
|
||||||
|
schedule([=]() { verifyTenant(tenantId, blobbifiedCount, cont); });
|
||||||
|
}
|
||||||
|
},
|
||||||
|
/*tenant=*/getTenant(tenantId),
|
||||||
|
/* failOnError = */ false);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ApiWorkload::verifyTenant(std::optional<int> tenantId,
|
||||||
|
std::shared_ptr<std::atomic<int>> blobbifiedCount,
|
||||||
|
TTaskFct cont) {
|
||||||
|
auto retVerifyVersion = std::make_shared<int64_t>(-1);
|
||||||
|
|
||||||
|
execOperation(
|
||||||
|
[=](auto ctx) {
|
||||||
|
fdb::Key begin(1, '\x00');
|
||||||
|
fdb::Key end(1, '\xff');
|
||||||
|
|
||||||
|
info(fmt::format("setup: verifying {}: [\\x00 - \\xff)\n", debugTenantStr(tenantId)));
|
||||||
|
|
||||||
|
fdb::Future f = ctx->dbOps()->verifyBlobRange(begin, end, /*latest_version*/ -2).eraseType();
|
||||||
|
ctx->continueAfter(f, [ctx, retVerifyVersion, f]() {
|
||||||
|
*retVerifyVersion = f.get<fdb::future_var::Int64>();
|
||||||
|
ctx->done();
|
||||||
|
});
|
||||||
|
},
|
||||||
|
[=]() {
|
||||||
|
if (*retVerifyVersion == -1) {
|
||||||
|
schedule([=]() { verifyTenant(tenantId, blobbifiedCount, cont); });
|
||||||
|
} else {
|
||||||
|
if (blobbifiedCount->fetch_sub(1) == 1) {
|
||||||
|
schedule(cont);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
/*tenant=*/getTenant(tenantId),
|
||||||
|
/* failOnError = */ false);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace FdbApiTester
|
} // namespace FdbApiTester
|
||||||
|
|
|
@ -41,6 +41,9 @@ public:
|
||||||
|
|
||||||
virtual void checkProgress() override;
|
virtual void checkProgress() override;
|
||||||
|
|
||||||
|
// Workload specific setup phase.
|
||||||
|
virtual void setup(TTaskFct cont);
|
||||||
|
|
||||||
// Running specific tests
|
// Running specific tests
|
||||||
// The default implementation generates a workload consisting of
|
// The default implementation generates a workload consisting of
|
||||||
// random operations generated by randomOperation
|
// random operations generated by randomOperation
|
||||||
|
@ -126,6 +129,12 @@ protected:
|
||||||
void randomClearRangeOp(TTaskFct cont, std::optional<int> tenantId);
|
void randomClearRangeOp(TTaskFct cont, std::optional<int> tenantId);
|
||||||
|
|
||||||
std::optional<fdb::BytesRef> getTenant(std::optional<int> tenantId);
|
std::optional<fdb::BytesRef> getTenant(std::optional<int> tenantId);
|
||||||
|
std::string debugTenantStr(std::optional<int> tenantId);
|
||||||
|
|
||||||
|
// Generic BlobGranules setup.
|
||||||
|
void setupBlobGranules(TTaskFct cont);
|
||||||
|
void blobbifyTenant(std::optional<int> tenantId, std::shared_ptr<std::atomic<int>> blobbifiedCount, TTaskFct cont);
|
||||||
|
void verifyTenant(std::optional<int> tenantId, std::shared_ptr<std::atomic<int>> blobbifiedCount, TTaskFct cont);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void populateDataTx(TTaskFct cont, std::optional<int> tenantId);
|
void populateDataTx(TTaskFct cont, std::optional<int> tenantId);
|
||||||
|
|
|
@ -52,26 +52,23 @@ private:
|
||||||
};
|
};
|
||||||
std::vector<OpType> excludedOpTypes;
|
std::vector<OpType> excludedOpTypes;
|
||||||
|
|
||||||
|
void setup(TTaskFct cont) override { setupBlobGranules(cont); }
|
||||||
|
|
||||||
// Allow reads at the start to get blob_granule_transaction_too_old if BG data isn't initialized yet
|
// Allow reads at the start to get blob_granule_transaction_too_old if BG data isn't initialized yet
|
||||||
// FIXME: should still guarantee a read succeeds eventually somehow
|
// FIXME: should still guarantee a read succeeds eventually somehow
|
||||||
// FIXME: this needs to be per tenant if tenant ids are set
|
|
||||||
std::unordered_set<std::optional<int>> tenantsWithReadSuccess;
|
std::unordered_set<std::optional<int>> tenantsWithReadSuccess;
|
||||||
|
|
||||||
inline void setReadSuccess(std::optional<int> tenantId) { tenantsWithReadSuccess.insert(tenantId); }
|
inline void setReadSuccess(std::optional<int> tenantId) { tenantsWithReadSuccess.insert(tenantId); }
|
||||||
|
|
||||||
inline bool seenReadSuccess(std::optional<int> tenantId) { return tenantsWithReadSuccess.count(tenantId); }
|
inline bool seenReadSuccess(std::optional<int> tenantId) { return tenantsWithReadSuccess.count(tenantId); }
|
||||||
|
|
||||||
std::string tenantDebugString(std::optional<int> tenantId) {
|
|
||||||
return tenantId.has_value() ? fmt::format(" (tenant {0})", tenantId.value()) : "";
|
|
||||||
}
|
|
||||||
|
|
||||||
void debugOp(std::string opName, fdb::Key begin, fdb::Key end, std::optional<int> tenantId, std::string message) {
|
void debugOp(std::string opName, fdb::Key begin, fdb::Key end, std::optional<int> tenantId, std::string message) {
|
||||||
if (BG_API_DEBUG_VERBOSE) {
|
if (BG_API_DEBUG_VERBOSE) {
|
||||||
info(fmt::format("{0}: [{1} - {2}){3}: {4}",
|
info(fmt::format("{0}: [{1} - {2}) {3}: {4}",
|
||||||
opName,
|
opName,
|
||||||
fdb::toCharsRef(begin),
|
fdb::toCharsRef(begin),
|
||||||
fdb::toCharsRef(end),
|
fdb::toCharsRef(end),
|
||||||
tenantDebugString(tenantId),
|
debugTenantStr(tenantId),
|
||||||
message));
|
message));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -117,7 +114,7 @@ private:
|
||||||
results.get()->assign(resVector.begin(), resVector.end());
|
results.get()->assign(resVector.begin(), resVector.end());
|
||||||
bool previousSuccess = seenReadSuccess(tenantId);
|
bool previousSuccess = seenReadSuccess(tenantId);
|
||||||
if (!previousSuccess) {
|
if (!previousSuccess) {
|
||||||
info(fmt::format("Read{0}: first success\n", tenantDebugString(tenantId)));
|
info(fmt::format("Read {0}: first success\n", debugTenantStr(tenantId)));
|
||||||
setReadSuccess(tenantId);
|
setReadSuccess(tenantId);
|
||||||
} else {
|
} else {
|
||||||
debugOp("Read", begin, end, tenantId, "complete");
|
debugOp("Read", begin, end, tenantId, "complete");
|
||||||
|
@ -289,20 +286,19 @@ private:
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: tenant support
|
// TODO: tenant support
|
||||||
void randomGetBlobRangesOp(TTaskFct cont) {
|
void randomGetBlobRangesOp(TTaskFct cont, std::optional<int> tenantId) {
|
||||||
fdb::Key begin = randomKeyName();
|
fdb::Key begin = randomKeyName();
|
||||||
fdb::Key end = randomKeyName();
|
fdb::Key end = randomKeyName();
|
||||||
auto results = std::make_shared<std::vector<fdb::KeyRange>>();
|
auto results = std::make_shared<std::vector<fdb::KeyRange>>();
|
||||||
if (begin > end) {
|
if (begin > end) {
|
||||||
std::swap(begin, end);
|
std::swap(begin, end);
|
||||||
}
|
}
|
||||||
std::optional<int> tenantId = {};
|
|
||||||
|
|
||||||
debugOp("GetBlobRanges", begin, end, tenantId, "starting");
|
debugOp("GetBlobRanges", begin, end, tenantId, "starting");
|
||||||
|
|
||||||
execOperation(
|
execOperation(
|
||||||
[begin, end, results](auto ctx) {
|
[begin, end, results](auto ctx) {
|
||||||
fdb::Future f = ctx->db().listBlobbifiedRanges(begin, end, 1000).eraseType();
|
fdb::Future f = ctx->dbOps()->listBlobbifiedRanges(begin, end, 1000).eraseType();
|
||||||
ctx->continueAfter(f, [ctx, f, results]() {
|
ctx->continueAfter(f, [ctx, f, results]() {
|
||||||
*results = copyKeyRangeArray(f.get<fdb::future_var::KeyRangeRefArray>());
|
*results = copyKeyRangeArray(f.get<fdb::future_var::KeyRangeRefArray>());
|
||||||
ctx->done();
|
ctx->done();
|
||||||
|
@ -314,25 +310,24 @@ private:
|
||||||
this->validateRanges(results, begin, end, seenReadSuccess(tenantId));
|
this->validateRanges(results, begin, end, seenReadSuccess(tenantId));
|
||||||
schedule(cont);
|
schedule(cont);
|
||||||
},
|
},
|
||||||
|
getTenant(tenantId),
|
||||||
/* failOnError = */ false);
|
/* failOnError = */ false);
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: tenant support
|
// TODO: tenant support
|
||||||
void randomVerifyOp(TTaskFct cont) {
|
void randomVerifyOp(TTaskFct cont, std::optional<int> tenantId) {
|
||||||
fdb::Key begin = randomKeyName();
|
fdb::Key begin = randomKeyName();
|
||||||
fdb::Key end = randomKeyName();
|
fdb::Key end = randomKeyName();
|
||||||
std::optional<int> tenantId;
|
|
||||||
if (begin > end) {
|
if (begin > end) {
|
||||||
std::swap(begin, end);
|
std::swap(begin, end);
|
||||||
}
|
}
|
||||||
|
|
||||||
auto verifyVersion = std::make_shared<int64_t>(false);
|
|
||||||
|
|
||||||
debugOp("Verify", begin, end, tenantId, "starting");
|
debugOp("Verify", begin, end, tenantId, "starting");
|
||||||
|
|
||||||
|
auto verifyVersion = std::make_shared<int64_t>(-1);
|
||||||
execOperation(
|
execOperation(
|
||||||
[begin, end, verifyVersion](auto ctx) {
|
[begin, end, verifyVersion](auto ctx) {
|
||||||
fdb::Future f = ctx->db().verifyBlobRange(begin, end, -2 /* latest version*/).eraseType();
|
fdb::Future f = ctx->dbOps()->verifyBlobRange(begin, end, -2 /* latest version*/).eraseType();
|
||||||
ctx->continueAfter(f, [ctx, verifyVersion, f]() {
|
ctx->continueAfter(f, [ctx, verifyVersion, f]() {
|
||||||
*verifyVersion = f.get<fdb::future_var::Int64>();
|
*verifyVersion = f.get<fdb::future_var::Int64>();
|
||||||
ctx->done();
|
ctx->done();
|
||||||
|
@ -344,15 +339,16 @@ private:
|
||||||
if (*verifyVersion == -1) {
|
if (*verifyVersion == -1) {
|
||||||
ASSERT(!previousSuccess);
|
ASSERT(!previousSuccess);
|
||||||
} else if (!previousSuccess) {
|
} else if (!previousSuccess) {
|
||||||
info(fmt::format("Verify{0}: first success\n", tenantDebugString(tenantId)));
|
info(fmt::format("Verify {0}: first success\n", debugTenantStr(tenantId)));
|
||||||
setReadSuccess(tenantId);
|
setReadSuccess(tenantId);
|
||||||
}
|
}
|
||||||
schedule(cont);
|
schedule(cont);
|
||||||
},
|
},
|
||||||
|
getTenant(tenantId),
|
||||||
/* failOnError = */ false);
|
/* failOnError = */ false);
|
||||||
}
|
}
|
||||||
|
|
||||||
void randomOperation(TTaskFct cont) {
|
void randomOperation(TTaskFct cont) override {
|
||||||
std::optional<int> tenantId = randomTenant();
|
std::optional<int> tenantId = randomTenant();
|
||||||
|
|
||||||
OpType txType = (stores[tenantId].size() == 0) ? OP_INSERT : (OpType)Random::get().randomInt(0, OP_LAST);
|
OpType txType = (stores[tenantId].size() == 0) ? OP_INSERT : (OpType)Random::get().randomInt(0, OP_LAST);
|
||||||
|
@ -380,10 +376,10 @@ private:
|
||||||
randomSummarizeOp(cont, tenantId);
|
randomSummarizeOp(cont, tenantId);
|
||||||
break;
|
break;
|
||||||
case OP_GET_BLOB_RANGES:
|
case OP_GET_BLOB_RANGES:
|
||||||
randomGetBlobRangesOp(cont);
|
randomGetBlobRangesOp(cont, tenantId);
|
||||||
break;
|
break;
|
||||||
case OP_VERIFY:
|
case OP_VERIFY:
|
||||||
randomVerifyOp(cont);
|
randomVerifyOp(cont, tenantId);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -47,6 +47,8 @@ private:
|
||||||
OP_LAST = OP_CANCEL_PURGE
|
OP_LAST = OP_CANCEL_PURGE
|
||||||
};
|
};
|
||||||
|
|
||||||
|
void setup(TTaskFct cont) override { setupBlobGranules(cont); }
|
||||||
|
|
||||||
// could add summarize too old and verify too old as ops if desired but those are lower value
|
// could add summarize too old and verify too old as ops if desired but those are lower value
|
||||||
|
|
||||||
// Allow reads at the start to get blob_granule_transaction_too_old if BG data isn't initialized yet
|
// Allow reads at the start to get blob_granule_transaction_too_old if BG data isn't initialized yet
|
||||||
|
|
|
@ -91,13 +91,15 @@ public:
|
||||||
fdbDb = executor->selectDatabase();
|
fdbDb = executor->selectDatabase();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (tenantName) {
|
||||||
|
fdbTenant = fdbDb.openTenant(*tenantName);
|
||||||
|
fdbDbOps = std::make_shared<fdb::Tenant>(fdbTenant);
|
||||||
|
} else {
|
||||||
|
fdbDbOps = std::make_shared<fdb::Database>(fdbDb);
|
||||||
|
}
|
||||||
|
|
||||||
if (transactional) {
|
if (transactional) {
|
||||||
if (tenantName) {
|
fdbTx = fdbDbOps->createTransaction();
|
||||||
fdb::Tenant tenant = fdbDb.openTenant(*tenantName);
|
|
||||||
fdbTx = tenant.createTransaction();
|
|
||||||
} else {
|
|
||||||
fdbTx = fdbDb.createTransaction();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -109,6 +111,10 @@ public:
|
||||||
|
|
||||||
fdb::Database db() override { return fdbDb.atomic_load(); }
|
fdb::Database db() override { return fdbDb.atomic_load(); }
|
||||||
|
|
||||||
|
fdb::Tenant tenant() override { return fdbTenant.atomic_load(); }
|
||||||
|
|
||||||
|
std::shared_ptr<fdb::IDatabaseOps> dbOps() override { return std::atomic_load(&fdbDbOps); }
|
||||||
|
|
||||||
fdb::Transaction tx() override { return fdbTx.atomic_load(); }
|
fdb::Transaction tx() override { return fdbTx.atomic_load(); }
|
||||||
|
|
||||||
// Set a continuation to be executed when a future gets ready
|
// Set a continuation to be executed when a future gets ready
|
||||||
|
@ -272,13 +278,17 @@ protected:
|
||||||
scheduler->schedule([thisRef]() {
|
scheduler->schedule([thisRef]() {
|
||||||
fdb::Database db = thisRef->executor->selectDatabase();
|
fdb::Database db = thisRef->executor->selectDatabase();
|
||||||
thisRef->fdbDb.atomic_store(db);
|
thisRef->fdbDb.atomic_store(db);
|
||||||
|
if (thisRef->tenantName) {
|
||||||
|
fdb::Tenant tenant = db.openTenant(*thisRef->tenantName);
|
||||||
|
thisRef->fdbTenant.atomic_store(tenant);
|
||||||
|
std::atomic_store(&thisRef->fdbDbOps,
|
||||||
|
std::dynamic_pointer_cast<fdb::IDatabaseOps>(std::make_shared<fdb::Tenant>(tenant)));
|
||||||
|
} else {
|
||||||
|
std::atomic_store(&thisRef->fdbDbOps,
|
||||||
|
std::dynamic_pointer_cast<fdb::IDatabaseOps>(std::make_shared<fdb::Database>(db)));
|
||||||
|
}
|
||||||
if (thisRef->transactional) {
|
if (thisRef->transactional) {
|
||||||
if (thisRef->tenantName) {
|
thisRef->fdbTx.atomic_store(thisRef->fdbDbOps->createTransaction());
|
||||||
fdb::Tenant tenant = db.openTenant(*thisRef->tenantName);
|
|
||||||
thisRef->fdbTx.atomic_store(tenant.createTransaction());
|
|
||||||
} else {
|
|
||||||
thisRef->fdbTx.atomic_store(db.createTransaction());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
thisRef->restartTransaction();
|
thisRef->restartTransaction();
|
||||||
});
|
});
|
||||||
|
@ -317,6 +327,14 @@ protected:
|
||||||
// Provides a thread safe interface by itself (no need for mutex)
|
// Provides a thread safe interface by itself (no need for mutex)
|
||||||
fdb::Database fdbDb;
|
fdb::Database fdbDb;
|
||||||
|
|
||||||
|
// FDB tenant
|
||||||
|
// Provides a thread safe interface by itself (no need for mutex)
|
||||||
|
fdb::Tenant fdbTenant;
|
||||||
|
|
||||||
|
// FDB IDatabaseOps to hide database/tenant accordingly.
|
||||||
|
// Provides a shared pointer to database functions based on if db or tenant.
|
||||||
|
std::shared_ptr<fdb::IDatabaseOps> fdbDbOps;
|
||||||
|
|
||||||
// FDB transaction
|
// FDB transaction
|
||||||
// Provides a thread safe interface by itself (no need for mutex)
|
// Provides a thread safe interface by itself (no need for mutex)
|
||||||
fdb::Transaction fdbTx;
|
fdb::Transaction fdbTx;
|
||||||
|
|
|
@ -41,6 +41,12 @@ public:
|
||||||
// Current FDB database
|
// Current FDB database
|
||||||
virtual fdb::Database db() = 0;
|
virtual fdb::Database db() = 0;
|
||||||
|
|
||||||
|
// Current FDB tenant
|
||||||
|
virtual fdb::Tenant tenant() = 0;
|
||||||
|
|
||||||
|
// Current FDB IDatabaseOps
|
||||||
|
virtual std::shared_ptr<fdb::IDatabaseOps> dbOps() = 0;
|
||||||
|
|
||||||
// Current FDB transaction
|
// Current FDB transaction
|
||||||
virtual fdb::Transaction tx() = 0;
|
virtual fdb::Transaction tx() = 0;
|
||||||
|
|
||||||
|
|
|
@ -117,8 +117,11 @@ void WorkloadBase::execTransaction(TOpStartFct startFct,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Execute a non-transactional database operation within the workload
|
// Execute a non-transactional database operation within the workload
|
||||||
void WorkloadBase::execOperation(TOpStartFct startFct, TTaskFct cont, bool failOnError) {
|
void WorkloadBase::execOperation(TOpStartFct startFct,
|
||||||
doExecute(startFct, cont, {}, failOnError, false);
|
TTaskFct cont,
|
||||||
|
std::optional<fdb::BytesRef> tenant,
|
||||||
|
bool failOnError) {
|
||||||
|
doExecute(startFct, cont, tenant, failOnError, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
void WorkloadBase::doExecute(TOpStartFct startFct,
|
void WorkloadBase::doExecute(TOpStartFct startFct,
|
||||||
|
|
|
@ -125,7 +125,10 @@ protected:
|
||||||
bool failOnError = true);
|
bool failOnError = true);
|
||||||
|
|
||||||
// Execute a non-transactional database operation within the workload
|
// Execute a non-transactional database operation within the workload
|
||||||
void execOperation(TOpStartFct startFct, TTaskFct cont, bool failOnError = true);
|
void execOperation(TOpStartFct startFct,
|
||||||
|
TTaskFct cont,
|
||||||
|
std::optional<fdb::BytesRef> tenant = std::optional<fdb::BytesRef>(),
|
||||||
|
bool failOnError = true);
|
||||||
|
|
||||||
// Log an error message, increase error counter
|
// Log an error message, increase error counter
|
||||||
void error(const std::string& msg);
|
void error(const std::string& msg);
|
||||||
|
|
|
@ -677,7 +677,28 @@ public:
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
class Tenant final {
|
// Handle this as an abstract class instead of interface to preserve lifetime of fdb objects owned by Tenant and
|
||||||
|
// Database.
|
||||||
|
class IDatabaseOps {
|
||||||
|
public:
|
||||||
|
virtual ~IDatabaseOps() = default;
|
||||||
|
|
||||||
|
virtual Transaction createTransaction() = 0;
|
||||||
|
|
||||||
|
virtual TypedFuture<future_var::Bool> blobbifyRange(KeyRef begin, KeyRef end) = 0;
|
||||||
|
virtual TypedFuture<future_var::Bool> unblobbifyRange(KeyRef begin, KeyRef end) = 0;
|
||||||
|
virtual TypedFuture<future_var::KeyRangeRefArray> listBlobbifiedRanges(KeyRef begin,
|
||||||
|
KeyRef end,
|
||||||
|
int rangeLimit) = 0;
|
||||||
|
virtual TypedFuture<future_var::Int64> verifyBlobRange(KeyRef begin, KeyRef end, int64_t version) = 0;
|
||||||
|
virtual TypedFuture<future_var::KeyRef> purgeBlobGranules(KeyRef begin,
|
||||||
|
KeyRef end,
|
||||||
|
int64_t version,
|
||||||
|
bool force) = 0;
|
||||||
|
virtual TypedFuture<future_var::None> waitPurgeGranulesComplete(KeyRef purgeKey) = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
class Tenant final : public IDatabaseOps {
|
||||||
friend class Database;
|
friend class Database;
|
||||||
std::shared_ptr<native::FDBTenant> tenant;
|
std::shared_ptr<native::FDBTenant> tenant;
|
||||||
|
|
||||||
|
@ -694,6 +715,14 @@ public:
|
||||||
Tenant& operator=(const Tenant&) noexcept = default;
|
Tenant& operator=(const Tenant&) noexcept = default;
|
||||||
Tenant() noexcept : tenant(nullptr) {}
|
Tenant() noexcept : tenant(nullptr) {}
|
||||||
|
|
||||||
|
void atomic_store(Tenant other) { std::atomic_store(&tenant, other.tenant); }
|
||||||
|
|
||||||
|
Tenant atomic_load() {
|
||||||
|
Tenant retVal;
|
||||||
|
retVal.tenant = std::atomic_load(&tenant);
|
||||||
|
return retVal;
|
||||||
|
}
|
||||||
|
|
||||||
static void createTenant(Transaction tr, BytesRef name) {
|
static void createTenant(Transaction tr, BytesRef name) {
|
||||||
tr.setOption(FDBTransactionOption::FDB_TR_OPTION_SPECIAL_KEY_SPACE_ENABLE_WRITES, BytesRef());
|
tr.setOption(FDBTransactionOption::FDB_TR_OPTION_SPECIAL_KEY_SPACE_ENABLE_WRITES, BytesRef());
|
||||||
tr.setOption(FDBTransactionOption::FDB_TR_OPTION_LOCK_AWARE, BytesRef());
|
tr.setOption(FDBTransactionOption::FDB_TR_OPTION_LOCK_AWARE, BytesRef());
|
||||||
|
@ -715,7 +744,7 @@ public:
|
||||||
return tr.get(toBytesRef(fmt::format("{}{}", tenantManagementMapPrefix, toCharsRef(name))), false);
|
return tr.get(toBytesRef(fmt::format("{}{}", tenantManagementMapPrefix, toCharsRef(name))), false);
|
||||||
}
|
}
|
||||||
|
|
||||||
Transaction createTransaction() {
|
Transaction createTransaction() override {
|
||||||
auto tx_native = static_cast<native::FDBTransaction*>(nullptr);
|
auto tx_native = static_cast<native::FDBTransaction*>(nullptr);
|
||||||
auto err = Error(native::fdb_tenant_create_transaction(tenant.get(), &tx_native));
|
auto err = Error(native::fdb_tenant_create_transaction(tenant.get(), &tx_native));
|
||||||
if (err)
|
if (err)
|
||||||
|
@ -723,14 +752,49 @@ public:
|
||||||
return Transaction(tx_native);
|
return Transaction(tx_native);
|
||||||
}
|
}
|
||||||
|
|
||||||
TypedFuture<future_var::Bool> blobbifyRange(KeyRef begin, KeyRef end) {
|
TypedFuture<future_var::Bool> blobbifyRange(KeyRef begin, KeyRef end) override {
|
||||||
if (!tenant)
|
if (!tenant)
|
||||||
throw std::runtime_error("blobbifyRange from null tenant");
|
throw std::runtime_error("blobbifyRange() from null tenant");
|
||||||
return native::fdb_tenant_blobbify_range(tenant.get(), begin.data(), intSize(begin), end.data(), intSize(end));
|
return native::fdb_tenant_blobbify_range(tenant.get(), begin.data(), intSize(begin), end.data(), intSize(end));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TypedFuture<future_var::Bool> unblobbifyRange(KeyRef begin, KeyRef end) override {
|
||||||
|
if (!tenant)
|
||||||
|
throw std::runtime_error("unblobbifyRange() from null tenant");
|
||||||
|
return native::fdb_tenant_unblobbify_range(
|
||||||
|
tenant.get(), begin.data(), intSize(begin), end.data(), intSize(end));
|
||||||
|
}
|
||||||
|
|
||||||
|
TypedFuture<future_var::KeyRangeRefArray> listBlobbifiedRanges(KeyRef begin, KeyRef end, int rangeLimit) override {
|
||||||
|
if (!tenant)
|
||||||
|
throw std::runtime_error("listBlobbifiedRanges() from null tenant");
|
||||||
|
return native::fdb_tenant_list_blobbified_ranges(
|
||||||
|
tenant.get(), begin.data(), intSize(begin), end.data(), intSize(end), rangeLimit);
|
||||||
|
}
|
||||||
|
|
||||||
|
TypedFuture<future_var::Int64> verifyBlobRange(KeyRef begin, KeyRef end, int64_t version) override {
|
||||||
|
if (!tenant)
|
||||||
|
throw std::runtime_error("verifyBlobRange() from null tenant");
|
||||||
|
return native::fdb_tenant_verify_blob_range(
|
||||||
|
tenant.get(), begin.data(), intSize(begin), end.data(), intSize(end), version);
|
||||||
|
}
|
||||||
|
|
||||||
|
TypedFuture<future_var::KeyRef> purgeBlobGranules(KeyRef begin, KeyRef end, int64_t version, bool force) override {
|
||||||
|
if (!tenant)
|
||||||
|
throw std::runtime_error("purgeBlobGranules() from null tenant");
|
||||||
|
native::fdb_bool_t forceBool = force;
|
||||||
|
return native::fdb_tenant_purge_blob_granules(
|
||||||
|
tenant.get(), begin.data(), intSize(begin), end.data(), intSize(end), version, forceBool);
|
||||||
|
}
|
||||||
|
|
||||||
|
TypedFuture<future_var::None> waitPurgeGranulesComplete(KeyRef purgeKey) override {
|
||||||
|
if (!tenant)
|
||||||
|
throw std::runtime_error("waitPurgeGranulesComplete() from null tenant");
|
||||||
|
return native::fdb_tenant_wait_purge_granules_complete(tenant.get(), purgeKey.data(), intSize(purgeKey));
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
class Database {
|
class Database : public IDatabaseOps {
|
||||||
friend class Tenant;
|
friend class Tenant;
|
||||||
std::shared_ptr<native::FDBDatabase> db;
|
std::shared_ptr<native::FDBDatabase> db;
|
||||||
|
|
||||||
|
@ -789,7 +853,7 @@ public:
|
||||||
return Tenant(tenant_native);
|
return Tenant(tenant_native);
|
||||||
}
|
}
|
||||||
|
|
||||||
Transaction createTransaction() {
|
Transaction createTransaction() override {
|
||||||
if (!db)
|
if (!db)
|
||||||
throw std::runtime_error("create_transaction from null database");
|
throw std::runtime_error("create_transaction from null database");
|
||||||
auto tx_native = static_cast<native::FDBTransaction*>(nullptr);
|
auto tx_native = static_cast<native::FDBTransaction*>(nullptr);
|
||||||
|
@ -799,33 +863,33 @@ public:
|
||||||
return Transaction(tx_native);
|
return Transaction(tx_native);
|
||||||
}
|
}
|
||||||
|
|
||||||
TypedFuture<future_var::KeyRangeRefArray> listBlobbifiedRanges(KeyRef begin, KeyRef end, int rangeLimit) {
|
TypedFuture<future_var::KeyRangeRefArray> listBlobbifiedRanges(KeyRef begin, KeyRef end, int rangeLimit) override {
|
||||||
if (!db)
|
if (!db)
|
||||||
throw std::runtime_error("listBlobbifiedRanges from null database");
|
throw std::runtime_error("listBlobbifiedRanges from null database");
|
||||||
return native::fdb_database_list_blobbified_ranges(
|
return native::fdb_database_list_blobbified_ranges(
|
||||||
db.get(), begin.data(), intSize(begin), end.data(), intSize(end), rangeLimit);
|
db.get(), begin.data(), intSize(begin), end.data(), intSize(end), rangeLimit);
|
||||||
}
|
}
|
||||||
|
|
||||||
TypedFuture<future_var::Int64> verifyBlobRange(KeyRef begin, KeyRef end, int64_t version) {
|
TypedFuture<future_var::Int64> verifyBlobRange(KeyRef begin, KeyRef end, int64_t version) override {
|
||||||
if (!db)
|
if (!db)
|
||||||
throw std::runtime_error("verifyBlobRange from null database");
|
throw std::runtime_error("verifyBlobRange from null database");
|
||||||
return native::fdb_database_verify_blob_range(
|
return native::fdb_database_verify_blob_range(
|
||||||
db.get(), begin.data(), intSize(begin), end.data(), intSize(end), version);
|
db.get(), begin.data(), intSize(begin), end.data(), intSize(end), version);
|
||||||
}
|
}
|
||||||
|
|
||||||
TypedFuture<future_var::Bool> blobbifyRange(KeyRef begin, KeyRef end) {
|
TypedFuture<future_var::Bool> blobbifyRange(KeyRef begin, KeyRef end) override {
|
||||||
if (!db)
|
if (!db)
|
||||||
throw std::runtime_error("blobbifyRange from null database");
|
throw std::runtime_error("blobbifyRange from null database");
|
||||||
return native::fdb_database_blobbify_range(db.get(), begin.data(), intSize(begin), end.data(), intSize(end));
|
return native::fdb_database_blobbify_range(db.get(), begin.data(), intSize(begin), end.data(), intSize(end));
|
||||||
}
|
}
|
||||||
|
|
||||||
TypedFuture<future_var::Bool> unblobbifyRange(KeyRef begin, KeyRef end) {
|
TypedFuture<future_var::Bool> unblobbifyRange(KeyRef begin, KeyRef end) override {
|
||||||
if (!db)
|
if (!db)
|
||||||
throw std::runtime_error("unblobbifyRange from null database");
|
throw std::runtime_error("unblobbifyRange from null database");
|
||||||
return native::fdb_database_unblobbify_range(db.get(), begin.data(), intSize(begin), end.data(), intSize(end));
|
return native::fdb_database_unblobbify_range(db.get(), begin.data(), intSize(begin), end.data(), intSize(end));
|
||||||
}
|
}
|
||||||
|
|
||||||
TypedFuture<future_var::KeyRef> purgeBlobGranules(KeyRef begin, KeyRef end, int64_t version, bool force) {
|
TypedFuture<future_var::KeyRef> purgeBlobGranules(KeyRef begin, KeyRef end, int64_t version, bool force) override {
|
||||||
if (!db)
|
if (!db)
|
||||||
throw std::runtime_error("purgeBlobGranules from null database");
|
throw std::runtime_error("purgeBlobGranules from null database");
|
||||||
native::fdb_bool_t forceBool = force;
|
native::fdb_bool_t forceBool = force;
|
||||||
|
@ -833,7 +897,7 @@ public:
|
||||||
db.get(), begin.data(), intSize(begin), end.data(), intSize(end), version, forceBool);
|
db.get(), begin.data(), intSize(begin), end.data(), intSize(end), version, forceBool);
|
||||||
}
|
}
|
||||||
|
|
||||||
TypedFuture<future_var::None> waitPurgeGranulesComplete(KeyRef purgeKey) {
|
TypedFuture<future_var::None> waitPurgeGranulesComplete(KeyRef purgeKey) override {
|
||||||
if (!db)
|
if (!db)
|
||||||
throw std::runtime_error("purgeBlobGranules from null database");
|
throw std::runtime_error("purgeBlobGranules from null database");
|
||||||
return native::fdb_database_wait_purge_granules_complete(db.get(), purgeKey.data(), intSize(purgeKey));
|
return native::fdb_database_wait_purge_granules_complete(db.get(), purgeKey.data(), intSize(purgeKey));
|
||||||
|
|
|
@ -1,12 +1,12 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
from argparse import ArgumentParser, RawDescriptionHelpFormatter
|
import argparse
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import platform
|
|
||||||
import shutil
|
import shutil
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
import glob
|
import glob
|
||||||
|
import unittest
|
||||||
|
|
||||||
sys.path[:0] = [os.path.join(os.path.dirname(__file__), "..", "..", "..", "tests", "TestRunner")]
|
sys.path[:0] = [os.path.join(os.path.dirname(__file__), "..", "..", "..", "tests", "TestRunner")]
|
||||||
|
|
||||||
|
@ -18,6 +18,9 @@ from local_cluster import LocalCluster, random_secret_string
|
||||||
PREV_RELEASE_VERSION = "7.1.5"
|
PREV_RELEASE_VERSION = "7.1.5"
|
||||||
PREV_PREV_RELEASE_VERSION = "7.0.0"
|
PREV_PREV_RELEASE_VERSION = "7.0.0"
|
||||||
|
|
||||||
|
args = None
|
||||||
|
downloader = None
|
||||||
|
|
||||||
|
|
||||||
def version_from_str(ver_str):
|
def version_from_str(ver_str):
|
||||||
ver = [int(s) for s in ver_str.split(".")]
|
ver = [int(s) for s in ver_str.split(".")]
|
||||||
|
@ -30,11 +33,9 @@ def api_version_from_str(ver_str):
|
||||||
return ver_tuple[0] * 100 + ver_tuple[1] * 10
|
return ver_tuple[0] * 100 + ver_tuple[1] * 10
|
||||||
|
|
||||||
|
|
||||||
class TestEnv(LocalCluster):
|
class TestCluster(LocalCluster):
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
args,
|
|
||||||
downloader: FdbBinaryDownloader,
|
|
||||||
version: str,
|
version: str,
|
||||||
):
|
):
|
||||||
self.client_config_tester_bin = Path(args.client_config_tester_bin).resolve()
|
self.client_config_tester_bin = Path(args.client_config_tester_bin).resolve()
|
||||||
|
@ -44,35 +45,33 @@ class TestEnv(LocalCluster):
|
||||||
assert self.build_dir.is_dir(), "{} is not a directory".format(args.build_dir)
|
assert self.build_dir.is_dir(), "{} is not a directory".format(args.build_dir)
|
||||||
self.tmp_dir = self.build_dir.joinpath("tmp", random_secret_string(16))
|
self.tmp_dir = self.build_dir.joinpath("tmp", random_secret_string(16))
|
||||||
self.tmp_dir.mkdir(parents=True)
|
self.tmp_dir.mkdir(parents=True)
|
||||||
self.downloader = downloader
|
|
||||||
self.version = version
|
self.version = version
|
||||||
super().__init__(
|
super().__init__(
|
||||||
self.tmp_dir,
|
self.tmp_dir,
|
||||||
self.downloader.binary_path(version, "fdbserver"),
|
downloader.binary_path(version, "fdbserver"),
|
||||||
self.downloader.binary_path(version, "fdbmonitor"),
|
downloader.binary_path(version, "fdbmonitor"),
|
||||||
self.downloader.binary_path(version, "fdbcli"),
|
downloader.binary_path(version, "fdbcli"),
|
||||||
1,
|
1,
|
||||||
)
|
)
|
||||||
self.set_env_var("LD_LIBRARY_PATH", self.downloader.lib_dir(version))
|
self.set_env_var("LD_LIBRARY_PATH", downloader.lib_dir(version))
|
||||||
self.failed_cnt = 0
|
|
||||||
|
|
||||||
def __enter__(self):
|
def setup(self):
|
||||||
super().__enter__()
|
self.__enter__()
|
||||||
super().create_database()
|
self.create_database()
|
||||||
return self
|
|
||||||
|
|
||||||
def __exit__(self, xc_type, exc_value, traceback):
|
def tearDown(self):
|
||||||
super().__exit__(xc_type, exc_value, traceback)
|
self.__exit__(None, None, None)
|
||||||
shutil.rmtree(self.tmp_dir)
|
shutil.rmtree(self.tmp_dir)
|
||||||
|
|
||||||
|
|
||||||
|
# Client configuration tests using a cluster of the current version
|
||||||
class ClientConfigTest:
|
class ClientConfigTest:
|
||||||
def __init__(self, test_env: TestEnv, title: str):
|
def __init__(self, tc: unittest.TestCase):
|
||||||
self.test_env = test_env
|
self.tc = tc
|
||||||
self.title = title
|
self.cluster = tc.cluster
|
||||||
self.external_lib_dir = None
|
self.external_lib_dir = None
|
||||||
self.external_lib_path = None
|
self.external_lib_path = None
|
||||||
self.test_dir = self.test_env.tmp_dir.joinpath(random_secret_string(16))
|
self.test_dir = self.cluster.tmp_dir.joinpath(random_secret_string(16))
|
||||||
self.test_dir.mkdir(parents=True)
|
self.test_dir.mkdir(parents=True)
|
||||||
self.log_dir = self.test_dir.joinpath("log")
|
self.log_dir = self.test_dir.joinpath("log")
|
||||||
self.log_dir.mkdir(parents=True)
|
self.log_dir.mkdir(parents=True)
|
||||||
|
@ -88,31 +87,28 @@ class ClientConfigTest:
|
||||||
self.external_lib_dir = self.test_dir.joinpath("extclients")
|
self.external_lib_dir = self.test_dir.joinpath("extclients")
|
||||||
self.external_lib_dir.mkdir(parents=True)
|
self.external_lib_dir.mkdir(parents=True)
|
||||||
for version in versions:
|
for version in versions:
|
||||||
src_file_path = self.test_env.downloader.lib_path(version)
|
src_file_path = downloader.lib_path(version)
|
||||||
assert src_file_path.exists(), "{} does not exist".format(src_file_path)
|
self.tc.assertTrue(src_file_path.exists(), "{} does not exist".format(src_file_path))
|
||||||
target_file_path = self.external_lib_dir.joinpath("libfdb_c.{}.so".format(version))
|
target_file_path = self.external_lib_dir.joinpath("libfdb_c.{}.so".format(version))
|
||||||
shutil.copyfile(src_file_path, target_file_path)
|
shutil.copyfile(src_file_path, target_file_path)
|
||||||
assert target_file_path.exists(), "{} does not exist".format(target_file_path)
|
self.tc.assertTrue(target_file_path.exists(), "{} does not exist".format(target_file_path))
|
||||||
|
|
||||||
def create_external_lib_path(self, version):
|
def create_external_lib_path(self, version):
|
||||||
src_file_path = self.test_env.downloader.lib_path(version)
|
src_file_path = downloader.lib_path(version)
|
||||||
assert src_file_path.exists(), "{} does not exist".format(src_file_path)
|
self.tc.assertTrue(src_file_path.exists(), "{} does not exist".format(src_file_path))
|
||||||
self.external_lib_path = self.test_dir.joinpath("libfdb_c.{}.so".format(version))
|
self.external_lib_path = self.test_dir.joinpath("libfdb_c.{}.so".format(version))
|
||||||
shutil.copyfile(src_file_path, self.external_lib_path)
|
shutil.copyfile(src_file_path, self.external_lib_path)
|
||||||
assert self.external_lib_path.exists(), "{} does not exist".format(self.external_lib_path)
|
self.tc.assertTrue(self.external_lib_path.exists(), "{} does not exist".format(self.external_lib_path))
|
||||||
|
|
||||||
def dump_client_logs(self):
|
def dump_client_logs(self):
|
||||||
for log_file in glob.glob(os.path.join(self.log_dir, "*")):
|
for log_file in glob.glob(os.path.join(self.log_dir, "*")):
|
||||||
print(">>>>>>>>>>>>>>>>>>>> Contents of {}:".format(log_file))
|
print(">>>>>>>>>>>>>>>>>>>> Contents of {}:".format(log_file), file=sys.stderr)
|
||||||
with open(log_file, "r") as f:
|
with open(log_file, "r") as f:
|
||||||
print(f.read())
|
print(f.read(), file=sys.stderr)
|
||||||
print(">>>>>>>>>>>>>>>>>>>> End of {}:".format(log_file))
|
print(">>>>>>>>>>>>>>>>>>>> End of {}:".format(log_file), file=sys.stderr)
|
||||||
|
|
||||||
def exec(self):
|
def exec(self):
|
||||||
print("-" * 80)
|
cmd_args = [self.cluster.client_config_tester_bin, "--cluster-file", self.cluster.cluster_file]
|
||||||
print(self.title)
|
|
||||||
print("-" * 80)
|
|
||||||
cmd_args = [self.test_env.client_config_tester_bin, "--cluster-file", self.test_env.cluster_file]
|
|
||||||
|
|
||||||
if self.tmp_dir is not None:
|
if self.tmp_dir is not None:
|
||||||
cmd_args += ["--tmp-dir", self.tmp_dir]
|
cmd_args += ["--tmp-dir", self.tmp_dir]
|
||||||
|
@ -141,61 +137,66 @@ class ClientConfigTest:
|
||||||
if self.transaction_timeout is not None:
|
if self.transaction_timeout is not None:
|
||||||
cmd_args += ["--transaction-timeout", str(self.transaction_timeout)]
|
cmd_args += ["--transaction-timeout", str(self.transaction_timeout)]
|
||||||
|
|
||||||
print("Executing test command: {}".format(" ".join([str(c) for c in cmd_args])))
|
print("\nExecuting test command: {}".format(" ".join([str(c) for c in cmd_args])), file=sys.stderr)
|
||||||
tester_proc = subprocess.Popen(cmd_args, stdout=sys.stdout, stderr=sys.stderr)
|
try:
|
||||||
tester_retcode = tester_proc.wait()
|
tester_proc = subprocess.Popen(cmd_args, stdout=sys.stdout, stderr=sys.stderr)
|
||||||
if tester_retcode != 0:
|
tester_retcode = tester_proc.wait()
|
||||||
print("Test '{}' failed".format(self.title))
|
self.tc.assertEqual(0, tester_retcode)
|
||||||
self.test_env.failed_cnt += 1
|
finally:
|
||||||
|
self.cleanup()
|
||||||
self.cleanup()
|
|
||||||
|
|
||||||
def cleanup(self):
|
def cleanup(self):
|
||||||
shutil.rmtree(self.test_dir)
|
shutil.rmtree(self.test_dir)
|
||||||
|
|
||||||
|
|
||||||
class ClientConfigTests:
|
class ClientConfigTests(unittest.TestCase):
|
||||||
def __init__(self, args):
|
@classmethod
|
||||||
self.args = args
|
def setUpClass(cls):
|
||||||
self.downloader = FdbBinaryDownloader(args.build_dir)
|
cls.cluster = TestCluster(CURRENT_VERSION)
|
||||||
# binary downloads are currently available only for x86_64
|
cls.cluster.setup()
|
||||||
self.platform = platform.machine()
|
|
||||||
if self.platform == "x86_64":
|
|
||||||
self.downloader.download_old_binaries(PREV_RELEASE_VERSION)
|
|
||||||
self.downloader.download_old_binaries(PREV_PREV_RELEASE_VERSION)
|
|
||||||
|
|
||||||
def test_local_client_only(self, test_env):
|
@classmethod
|
||||||
test = ClientConfigTest(test_env, "Local client only")
|
def tearDownClass(cls):
|
||||||
|
cls.cluster.tearDown()
|
||||||
|
|
||||||
|
def test_local_client_only(self):
|
||||||
|
# Local client only
|
||||||
|
test = ClientConfigTest(self)
|
||||||
test.exec()
|
test.exec()
|
||||||
|
|
||||||
def test_single_external_client_only(self, test_env):
|
def test_single_external_client_only(self):
|
||||||
test = ClientConfigTest(test_env, "Single external client")
|
# Single external client only
|
||||||
|
test = ClientConfigTest(self)
|
||||||
test.create_external_lib_path(CURRENT_VERSION)
|
test.create_external_lib_path(CURRENT_VERSION)
|
||||||
test.disable_local_client = True
|
test.disable_local_client = True
|
||||||
test.exec()
|
test.exec()
|
||||||
|
|
||||||
def test_same_local_and_external_client(self, test_env):
|
def test_same_local_and_external_client(self):
|
||||||
test = ClientConfigTest(test_env, "Same Local & External Client")
|
# Same version local & external client
|
||||||
|
test = ClientConfigTest(self)
|
||||||
test.create_external_lib_path(CURRENT_VERSION)
|
test.create_external_lib_path(CURRENT_VERSION)
|
||||||
test.exec()
|
test.exec()
|
||||||
|
|
||||||
def test_multiple_external_clients(self, test_env):
|
def test_multiple_external_clients(self):
|
||||||
test = ClientConfigTest(test_env, "Multiple external clients")
|
# Multiple external clients, normal case
|
||||||
|
test = ClientConfigTest(self)
|
||||||
test.create_external_lib_dir([CURRENT_VERSION, PREV_RELEASE_VERSION, PREV_PREV_RELEASE_VERSION])
|
test.create_external_lib_dir([CURRENT_VERSION, PREV_RELEASE_VERSION, PREV_PREV_RELEASE_VERSION])
|
||||||
test.disable_local_client = True
|
test.disable_local_client = True
|
||||||
test.api_version = api_version_from_str(PREV_PREV_RELEASE_VERSION)
|
test.api_version = api_version_from_str(PREV_PREV_RELEASE_VERSION)
|
||||||
test.exec()
|
test.exec()
|
||||||
|
|
||||||
def test_no_external_client_support_api_version(self, test_env):
|
def test_no_external_client_support_api_version(self):
|
||||||
test = ClientConfigTest(test_env, "Multiple external clients; API version supported by none")
|
# Multiple external clients, API version supported by none of them
|
||||||
|
test = ClientConfigTest(self)
|
||||||
test.create_external_lib_dir([PREV_PREV_RELEASE_VERSION, PREV_RELEASE_VERSION])
|
test.create_external_lib_dir([PREV_PREV_RELEASE_VERSION, PREV_RELEASE_VERSION])
|
||||||
test.disable_local_client = True
|
test.disable_local_client = True
|
||||||
test.api_version = api_version_from_str(CURRENT_VERSION)
|
test.api_version = api_version_from_str(CURRENT_VERSION)
|
||||||
test.expected_error = 2204 # API function missing
|
test.expected_error = 2204 # API function missing
|
||||||
test.exec()
|
test.exec()
|
||||||
|
|
||||||
def test_no_external_client_support_api_version_ignore(self, test_env):
|
def test_no_external_client_support_api_version_ignore(self):
|
||||||
test = ClientConfigTest(test_env, "Multiple external clients; API version supported by none; Ignore failures")
|
# Multiple external clients; API version supported by none of them; Ignore failures
|
||||||
|
test = ClientConfigTest(self)
|
||||||
test.create_external_lib_dir([PREV_PREV_RELEASE_VERSION, PREV_RELEASE_VERSION])
|
test.create_external_lib_dir([PREV_PREV_RELEASE_VERSION, PREV_RELEASE_VERSION])
|
||||||
test.disable_local_client = True
|
test.disable_local_client = True
|
||||||
test.api_version = api_version_from_str(CURRENT_VERSION)
|
test.api_version = api_version_from_str(CURRENT_VERSION)
|
||||||
|
@ -203,79 +204,66 @@ class ClientConfigTests:
|
||||||
test.expected_error = 2124 # All external clients failed
|
test.expected_error = 2124 # All external clients failed
|
||||||
test.exec()
|
test.exec()
|
||||||
|
|
||||||
def test_one_external_client_wrong_api_version(self, test_env):
|
def test_one_external_client_wrong_api_version(self):
|
||||||
test = ClientConfigTest(test_env, "Multiple external clients: API version unsupported by one")
|
# Multiple external clients, API version unsupported by one of othem
|
||||||
|
test = ClientConfigTest(self)
|
||||||
test.create_external_lib_dir([CURRENT_VERSION, PREV_RELEASE_VERSION, PREV_PREV_RELEASE_VERSION])
|
test.create_external_lib_dir([CURRENT_VERSION, PREV_RELEASE_VERSION, PREV_PREV_RELEASE_VERSION])
|
||||||
test.disable_local_client = True
|
test.disable_local_client = True
|
||||||
test.api_version = api_version_from_str(CURRENT_VERSION)
|
test.api_version = api_version_from_str(CURRENT_VERSION)
|
||||||
test.expected_error = 2204 # API function missing
|
test.expected_error = 2204 # API function missing
|
||||||
test.exec()
|
test.exec()
|
||||||
|
|
||||||
def test_one_external_client_wrong_api_version_ignore(self, test_env):
|
def test_one_external_client_wrong_api_version_ignore(self):
|
||||||
test = ClientConfigTest(test_env, "Multiple external clients; API version unsupported by one; Ignore failures")
|
# Multiple external clients; API version unsupported by one of them; Ignore failures
|
||||||
|
test = ClientConfigTest(self)
|
||||||
test.create_external_lib_dir([CURRENT_VERSION, PREV_RELEASE_VERSION, PREV_PREV_RELEASE_VERSION])
|
test.create_external_lib_dir([CURRENT_VERSION, PREV_RELEASE_VERSION, PREV_PREV_RELEASE_VERSION])
|
||||||
test.disable_local_client = True
|
test.disable_local_client = True
|
||||||
test.api_version = api_version_from_str(CURRENT_VERSION)
|
test.api_version = api_version_from_str(CURRENT_VERSION)
|
||||||
test.ignore_external_client_failures = True
|
test.ignore_external_client_failures = True
|
||||||
test.exec()
|
test.exec()
|
||||||
|
|
||||||
def test_prev_release_with_ext_client(self, test_env):
|
|
||||||
test = ClientConfigTest(test_env, "Cluster with previous release version")
|
# Client configuration tests using a cluster of previous release version
|
||||||
|
class ClientConfigPrevVersionTests(unittest.TestCase):
|
||||||
|
@classmethod
|
||||||
|
def setUpClass(cls):
|
||||||
|
cls.cluster = TestCluster(PREV_RELEASE_VERSION)
|
||||||
|
cls.cluster.setup()
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def tearDownClass(cls):
|
||||||
|
cls.cluster.tearDown()
|
||||||
|
|
||||||
|
def test_external_client(self):
|
||||||
|
# Using an external client to connect
|
||||||
|
test = ClientConfigTest(self)
|
||||||
test.create_external_lib_path(PREV_RELEASE_VERSION)
|
test.create_external_lib_path(PREV_RELEASE_VERSION)
|
||||||
test.api_version = api_version_from_str(PREV_RELEASE_VERSION)
|
test.api_version = api_version_from_str(PREV_RELEASE_VERSION)
|
||||||
test.exec()
|
test.exec()
|
||||||
|
|
||||||
def test_prev_release_with_ext_client_unsupported_api(self, test_env):
|
def test_prev_release_with_ext_client_unsupported_api(self):
|
||||||
test = ClientConfigTest(test_env, "Cluster with previous release version; Unsupported API version")
|
# Leaving an unsupported API version
|
||||||
|
test = ClientConfigTest(self)
|
||||||
test.create_external_lib_path(PREV_RELEASE_VERSION)
|
test.create_external_lib_path(PREV_RELEASE_VERSION)
|
||||||
test.expected_error = 2204 # API function missing
|
test.expected_error = 2204 # API function missing
|
||||||
test.exec()
|
test.exec()
|
||||||
|
|
||||||
def test_prev_release_with_ext_client_unsupported_api_ignore(self, test_env):
|
def test_prev_release_with_ext_client_unsupported_api_ignore(self):
|
||||||
test = ClientConfigTest(
|
# Leaving an unsupported API version, ignore failures
|
||||||
test_env, "Cluster with previous release version; Unsupported API version; Ignore failures"
|
test = ClientConfigTest(self)
|
||||||
)
|
|
||||||
test.create_external_lib_path(PREV_RELEASE_VERSION)
|
test.create_external_lib_path(PREV_RELEASE_VERSION)
|
||||||
test.transaction_timeout = 100
|
test.transaction_timeout = 100
|
||||||
test.expected_error = 1031 # Timeout
|
test.expected_error = 1031 # Timeout
|
||||||
test.ignore_external_client_failures = True
|
test.ignore_external_client_failures = True
|
||||||
test.exec()
|
test.exec()
|
||||||
|
|
||||||
def run_tests(self):
|
|
||||||
failed_cnt = 0
|
|
||||||
with TestEnv(self.args, self.downloader, CURRENT_VERSION) as test_env:
|
|
||||||
self.test_local_client_only(test_env)
|
|
||||||
self.test_single_external_client_only(test_env)
|
|
||||||
self.test_same_local_and_external_client(test_env)
|
|
||||||
self.test_multiple_external_clients(test_env)
|
|
||||||
self.test_no_external_client_support_api_version(test_env)
|
|
||||||
self.test_no_external_client_support_api_version_ignore(test_env)
|
|
||||||
self.test_one_external_client_wrong_api_version(test_env)
|
|
||||||
self.test_one_external_client_wrong_api_version_ignore(test_env)
|
|
||||||
failed_cnt += test_env.failed_cnt
|
|
||||||
|
|
||||||
if self.platform == "x86_64":
|
|
||||||
with TestEnv(self.args, self.downloader, PREV_RELEASE_VERSION) as test_env:
|
|
||||||
self.test_prev_release_with_ext_client(test_env)
|
|
||||||
self.test_prev_release_with_ext_client_unsupported_api(test_env)
|
|
||||||
self.test_prev_release_with_ext_client_unsupported_api_ignore(test_env)
|
|
||||||
failed_cnt += test_env.failed_cnt
|
|
||||||
|
|
||||||
if failed_cnt > 0:
|
|
||||||
print("{} tests failed".format(failed_cnt))
|
|
||||||
else:
|
|
||||||
print("All tests successful")
|
|
||||||
return failed_cnt
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
parser = ArgumentParser(
|
parser = argparse.ArgumentParser(
|
||||||
formatter_class=RawDescriptionHelpFormatter,
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||||
description="""
|
description="""
|
||||||
A script for testing FDB multi-version client in upgrade scenarios. Creates a local cluster,
|
Unit tests for running FDB client with different configurations.
|
||||||
generates a workload using fdb_c_api_tester with a specified test file, and performs
|
Also accepts python unit tests command line arguments.
|
||||||
cluster upgrade according to the specified upgrade path. Checks if the workload successfully
|
|
||||||
progresses after each upgrade step.
|
|
||||||
""",
|
""",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
|
@ -291,7 +279,13 @@ if __name__ == "__main__":
|
||||||
help="Path to the fdb_c_client_config_tester executable.",
|
help="Path to the fdb_c_client_config_tester executable.",
|
||||||
required=True,
|
required=True,
|
||||||
)
|
)
|
||||||
|
parser.add_argument("unittest_args", nargs=argparse.REMAINDER)
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
test = ClientConfigTests(args)
|
sys.argv[1:] = args.unittest_args
|
||||||
failed_cnt = test.run_tests()
|
|
||||||
sys.exit(failed_cnt)
|
downloader = FdbBinaryDownloader(args.build_dir)
|
||||||
|
downloader.download_old_binaries(PREV_RELEASE_VERSION)
|
||||||
|
downloader.download_old_binaries(PREV_PREV_RELEASE_VERSION)
|
||||||
|
|
||||||
|
unittest.main(verbosity=2)
|
||||||
|
|
|
@ -0,0 +1,47 @@
|
||||||
|
/*
|
||||||
|
* BlobRestoreCommand.actor.cpp
|
||||||
|
*
|
||||||
|
* This source file is part of the FoundationDB open source project
|
||||||
|
*
|
||||||
|
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "fdbcli/fdbcli.actor.h"
|
||||||
|
#include "fdbclient/FDBOptions.h"
|
||||||
|
#include "fdbclient/FDBTypes.h"
|
||||||
|
#include "fdbclient/NativeAPI.actor.h"
|
||||||
|
#include "fdbclient/SystemData.h"
|
||||||
|
#include "flow/actorcompiler.h" // This must be the last #include.
|
||||||
|
|
||||||
|
namespace fdb_cli {
|
||||||
|
|
||||||
|
ACTOR Future<bool> blobRestoreCommandActor(Database localDb, std::vector<StringRef> tokens) {
|
||||||
|
if (tokens.size() != 1 && tokens.size() != 2) {
|
||||||
|
printUsage(tokens[0]);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
state bool success = false;
|
||||||
|
wait(store(success, localDb->blobRestore(normalKeys)));
|
||||||
|
if (success) {
|
||||||
|
fmt::print("Started blob restore for the full cluster. Please use 'status' command to check progress.\n");
|
||||||
|
} else {
|
||||||
|
fmt::print("Fail to start a new blob restore while there is a pending one.\n");
|
||||||
|
}
|
||||||
|
return success;
|
||||||
|
}
|
||||||
|
|
||||||
|
CommandFactory blobRestoreFactory("blobrestore", CommandHelp("blobrestore", "", ""));
|
||||||
|
} // namespace fdb_cli
|
|
@ -1416,6 +1416,13 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise, Reference<ClusterCo
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (tokencmp(tokens[0], "blobrestore")) {
|
||||||
|
bool _result = wait(makeInterruptable(blobRestoreCommandActor(localDb, tokens)));
|
||||||
|
if (!_result)
|
||||||
|
is_error = true;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
if (tokencmp(tokens[0], "unlock")) {
|
if (tokencmp(tokens[0], "unlock")) {
|
||||||
if ((tokens.size() != 2) || (tokens[1].size() != 32) ||
|
if ((tokens.size() != 2) || (tokens[1].size() != 32) ||
|
||||||
!std::all_of(tokens[1].begin(), tokens[1].end(), &isxdigit)) {
|
!std::all_of(tokens[1].begin(), tokens[1].end(), &isxdigit)) {
|
||||||
|
|
|
@ -213,6 +213,9 @@ ACTOR Future<bool> blobRangeCommandActor(Database localDb,
|
||||||
ACTOR Future<bool> blobKeyCommandActor(Database localDb,
|
ACTOR Future<bool> blobKeyCommandActor(Database localDb,
|
||||||
Optional<TenantMapEntry> tenantEntry,
|
Optional<TenantMapEntry> tenantEntry,
|
||||||
std::vector<StringRef> tokens);
|
std::vector<StringRef> tokens);
|
||||||
|
// blobrestore command
|
||||||
|
ACTOR Future<bool> blobRestoreCommandActor(Database localDb, std::vector<StringRef> tokens);
|
||||||
|
|
||||||
// maintenance command
|
// maintenance command
|
||||||
ACTOR Future<bool> setHealthyZone(Reference<IDatabase> db, StringRef zoneId, double seconds, bool printWarning = false);
|
ACTOR Future<bool> setHealthyZone(Reference<IDatabase> db, StringRef zoneId, double seconds, bool printWarning = false);
|
||||||
ACTOR Future<bool> clearHealthyZone(Reference<IDatabase> db,
|
ACTOR Future<bool> clearHealthyZone(Reference<IDatabase> db,
|
||||||
|
|
|
@ -45,7 +45,12 @@ def run_fdbcli_command(*args):
|
||||||
string: Console output from fdbcli
|
string: Console output from fdbcli
|
||||||
"""
|
"""
|
||||||
commands = command_template + ["{}".format(' '.join(args))]
|
commands = command_template + ["{}".format(' '.join(args))]
|
||||||
return subprocess.run(commands, stdout=subprocess.PIPE, env=fdbcli_env).stdout.decode('utf-8').strip()
|
try:
|
||||||
|
# if the fdbcli command is stuck for more than 20 seconds, the database is definitely unavailable
|
||||||
|
process = subprocess.run(commands, stdout=subprocess.PIPE, env=fdbcli_env, timeout=20)
|
||||||
|
return process.stdout.decode('utf-8').strip()
|
||||||
|
except subprocess.TimeoutExpired:
|
||||||
|
raise Exception('The fdbcli command is stuck, database is unavailable')
|
||||||
|
|
||||||
|
|
||||||
def run_fdbcli_command_and_get_error(*args):
|
def run_fdbcli_command_and_get_error(*args):
|
||||||
|
@ -1079,16 +1084,19 @@ if __name__ == '__main__':
|
||||||
lockAndUnlock()
|
lockAndUnlock()
|
||||||
maintenance()
|
maintenance()
|
||||||
profile()
|
profile()
|
||||||
suspend()
|
# TODO: reenable it until it's stable
|
||||||
|
# suspend()
|
||||||
transaction()
|
transaction()
|
||||||
throttle()
|
# this is replaced by the "quota" command
|
||||||
|
#throttle()
|
||||||
triggerddteaminfolog()
|
triggerddteaminfolog()
|
||||||
tenants()
|
tenants()
|
||||||
versionepoch()
|
versionepoch()
|
||||||
integer_options()
|
integer_options()
|
||||||
tls_address_suffix()
|
tls_address_suffix()
|
||||||
knobmanagement()
|
knobmanagement()
|
||||||
quota()
|
# TODO: fix the issue when running through the external client
|
||||||
|
#quota()
|
||||||
else:
|
else:
|
||||||
assert args.process_number > 1, "Process number should be positive"
|
assert args.process_number > 1, "Process number should be positive"
|
||||||
coordinators()
|
coordinators()
|
||||||
|
|
|
@ -971,6 +971,11 @@ void sortDeltasByKey(const Standalone<GranuleDeltas>& deltasByVersion,
|
||||||
// clearVersion as previous guy)
|
// clearVersion as previous guy)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void sortDeltasByKey(const Standalone<GranuleDeltas>& deltasByVersion, const KeyRangeRef& fileRange) {
|
||||||
|
SortedDeltasT deltasByKey;
|
||||||
|
sortDeltasByKey(deltasByVersion, fileRange, deltasByKey);
|
||||||
|
}
|
||||||
|
|
||||||
// FIXME: Could maybe reduce duplicated code between this and chunkedSnapshot for chunking
|
// FIXME: Could maybe reduce duplicated code between this and chunkedSnapshot for chunking
|
||||||
Value serializeChunkedDeltaFile(const Standalone<StringRef>& fileNameRef,
|
Value serializeChunkedDeltaFile(const Standalone<StringRef>& fileNameRef,
|
||||||
const Standalone<GranuleDeltas>& deltas,
|
const Standalone<GranuleDeltas>& deltas,
|
||||||
|
|
|
@ -5924,7 +5924,6 @@ public:
|
||||||
printf("Restoring backup to version: %lld\n", (long long)targetVersion);
|
printf("Restoring backup to version: %lld\n", (long long)targetVersion);
|
||||||
}
|
}
|
||||||
|
|
||||||
state int retryCount = 0;
|
|
||||||
state Reference<ReadYourWritesTransaction> tr(new ReadYourWritesTransaction(cx));
|
state Reference<ReadYourWritesTransaction> tr(new ReadYourWritesTransaction(cx));
|
||||||
loop {
|
loop {
|
||||||
try {
|
try {
|
||||||
|
@ -5948,17 +5947,9 @@ public:
|
||||||
wait(tr->commit());
|
wait(tr->commit());
|
||||||
break;
|
break;
|
||||||
} catch (Error& e) {
|
} catch (Error& e) {
|
||||||
if (e.code() == error_code_transaction_too_old) {
|
|
||||||
retryCount++;
|
|
||||||
}
|
|
||||||
if (e.code() == error_code_restore_duplicate_tag) {
|
if (e.code() == error_code_restore_duplicate_tag) {
|
||||||
throw;
|
throw;
|
||||||
}
|
}
|
||||||
if (g_network->isSimulated() && retryCount > 50) {
|
|
||||||
CODE_PROBE(true, "submitRestore simulation speedup");
|
|
||||||
// try to make the read window back to normal size (5 * version_per_sec)
|
|
||||||
g_simulator->speedUpSimulation = true;
|
|
||||||
}
|
|
||||||
wait(tr->onError(e));
|
wait(tr->onError(e));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -2559,15 +2559,21 @@ bool schemaMatch(json_spirit::mValue const& schemaValue,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void setStorageQuota(Transaction& tr, StringRef tenantName, int64_t quota) {
|
void setStorageQuota(Transaction& tr, StringRef tenantGroupName, int64_t quota) {
|
||||||
tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||||
auto key = storageQuotaKey(tenantName);
|
auto key = storageQuotaKey(tenantGroupName);
|
||||||
tr.set(key, BinaryWriter::toValue<int64_t>(quota, Unversioned()));
|
tr.set(key, BinaryWriter::toValue<int64_t>(quota, Unversioned()));
|
||||||
}
|
}
|
||||||
|
|
||||||
ACTOR Future<Optional<int64_t>> getStorageQuota(Transaction* tr, StringRef tenantName) {
|
void clearStorageQuota(Transaction& tr, StringRef tenantGroupName) {
|
||||||
|
tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||||
|
auto key = storageQuotaKey(tenantGroupName);
|
||||||
|
tr.clear(key);
|
||||||
|
}
|
||||||
|
|
||||||
|
ACTOR Future<Optional<int64_t>> getStorageQuota(Transaction* tr, StringRef tenantGroupName) {
|
||||||
tr->setOption(FDBTransactionOptions::READ_SYSTEM_KEYS);
|
tr->setOption(FDBTransactionOptions::READ_SYSTEM_KEYS);
|
||||||
state Optional<Value> v = wait(tr->get(storageQuotaKey(tenantName)));
|
state Optional<Value> v = wait(tr->get(storageQuotaKey(tenantGroupName)));
|
||||||
if (!v.present()) {
|
if (!v.present()) {
|
||||||
return Optional<int64_t>();
|
return Optional<int64_t>();
|
||||||
}
|
}
|
||||||
|
|
|
@ -4524,9 +4524,11 @@ Future<RangeResultFamily> getRange(Reference<TransactionState> trState,
|
||||||
output.readToBegin = readToBegin;
|
output.readToBegin = readToBegin;
|
||||||
output.readThroughEnd = readThroughEnd;
|
output.readThroughEnd = readThroughEnd;
|
||||||
|
|
||||||
if (BUGGIFY && limits.hasByteLimit() && output.size() > std::max(1, originalLimits.minRows)) {
|
if (BUGGIFY && limits.hasByteLimit() && output.size() > std::max(1, originalLimits.minRows) &&
|
||||||
|
(!std::is_same<GetKeyValuesFamilyRequest, GetMappedKeyValuesRequest>::value)) {
|
||||||
// Copy instead of resizing because TSS maybe be using output's arena for comparison. This only
|
// Copy instead of resizing because TSS maybe be using output's arena for comparison. This only
|
||||||
// happens in simulation so it's fine
|
// happens in simulation so it's fine
|
||||||
|
// disable it on prefetch, because boundary entries serve as continuations
|
||||||
RangeResultFamily copy;
|
RangeResultFamily copy;
|
||||||
int newSize =
|
int newSize =
|
||||||
deterministicRandom()->randomInt(std::max(1, originalLimits.minRows), output.size());
|
deterministicRandom()->randomInt(std::max(1, originalLimits.minRows), output.size());
|
||||||
|
@ -10915,6 +10917,37 @@ Future<Standalone<VectorRef<KeyRangeRef>>> DatabaseContext::listBlobbifiedRanges
|
||||||
return listBlobbifiedRangesActor(Reference<DatabaseContext>::addRef(this), range, rangeLimit, tenantName);
|
return listBlobbifiedRangesActor(Reference<DatabaseContext>::addRef(this), range, rangeLimit, tenantName);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ACTOR Future<bool> blobRestoreActor(Reference<DatabaseContext> cx, KeyRange range) {
|
||||||
|
state Database db(cx);
|
||||||
|
state Reference<ReadYourWritesTransaction> tr = makeReference<ReadYourWritesTransaction>(db);
|
||||||
|
loop {
|
||||||
|
try {
|
||||||
|
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||||
|
tr->setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
|
||||||
|
state Key key = blobRestoreCommandKeyFor(range);
|
||||||
|
Optional<Value> value = wait(tr->get(key));
|
||||||
|
if (value.present()) {
|
||||||
|
Standalone<BlobRestoreStatus> status = decodeBlobRestoreStatus(value.get());
|
||||||
|
if (status.progress < 100) {
|
||||||
|
return false; // stop if there is in-progress restore.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Standalone<BlobRestoreStatus> status;
|
||||||
|
status.progress = 0;
|
||||||
|
Value newValue = blobRestoreCommandValueFor(status);
|
||||||
|
tr->set(key, newValue);
|
||||||
|
wait(tr->commit());
|
||||||
|
return true;
|
||||||
|
} catch (Error& e) {
|
||||||
|
wait(tr->onError(e));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Future<bool> DatabaseContext::blobRestore(KeyRange range) {
|
||||||
|
return blobRestoreActor(Reference<DatabaseContext>::addRef(this), range);
|
||||||
|
}
|
||||||
|
|
||||||
int64_t getMaxKeySize(KeyRef const& key) {
|
int64_t getMaxKeySize(KeyRef const& key) {
|
||||||
return getMaxWriteKeySize(key, true);
|
return getMaxWriteKeySize(key, true);
|
||||||
}
|
}
|
||||||
|
|
|
@ -296,7 +296,8 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
|
||||||
init( DD_STORAGE_WIGGLE_PAUSE_THRESHOLD, 10 ); if( randomize && BUGGIFY ) DD_STORAGE_WIGGLE_PAUSE_THRESHOLD = 1000;
|
init( DD_STORAGE_WIGGLE_PAUSE_THRESHOLD, 10 ); if( randomize && BUGGIFY ) DD_STORAGE_WIGGLE_PAUSE_THRESHOLD = 1000;
|
||||||
init( DD_STORAGE_WIGGLE_STUCK_THRESHOLD, 20 );
|
init( DD_STORAGE_WIGGLE_STUCK_THRESHOLD, 20 );
|
||||||
init( DD_STORAGE_WIGGLE_MIN_SS_AGE_SEC, isSimulated ? 2 : 21 * 60 * 60 * 24 ); if(randomize && BUGGIFY) DD_STORAGE_WIGGLE_MIN_SS_AGE_SEC = isSimulated ? 0: 120;
|
init( DD_STORAGE_WIGGLE_MIN_SS_AGE_SEC, isSimulated ? 2 : 21 * 60 * 60 * 24 ); if(randomize && BUGGIFY) DD_STORAGE_WIGGLE_MIN_SS_AGE_SEC = isSimulated ? 0: 120;
|
||||||
init( DD_TENANT_AWARENESS_ENABLED, false ); if(isSimulated) DD_TENANT_AWARENESS_ENABLED = deterministicRandom()->coinflip();
|
init( DD_TENANT_AWARENESS_ENABLED, false );
|
||||||
|
init( STORAGE_QUOTA_ENABLED, false ); if(isSimulated) STORAGE_QUOTA_ENABLED = deterministicRandom()->coinflip();
|
||||||
init( TENANT_CACHE_LIST_REFRESH_INTERVAL, 2 ); if( randomize && BUGGIFY ) TENANT_CACHE_LIST_REFRESH_INTERVAL = deterministicRandom()->randomInt(1, 10);
|
init( TENANT_CACHE_LIST_REFRESH_INTERVAL, 2 ); if( randomize && BUGGIFY ) TENANT_CACHE_LIST_REFRESH_INTERVAL = deterministicRandom()->randomInt(1, 10);
|
||||||
init( TENANT_CACHE_STORAGE_USAGE_REFRESH_INTERVAL, 2 ); if( randomize && BUGGIFY ) TENANT_CACHE_STORAGE_USAGE_REFRESH_INTERVAL = deterministicRandom()->randomInt(1, 10);
|
init( TENANT_CACHE_STORAGE_USAGE_REFRESH_INTERVAL, 2 ); if( randomize && BUGGIFY ) TENANT_CACHE_STORAGE_USAGE_REFRESH_INTERVAL = deterministicRandom()->randomInt(1, 10);
|
||||||
init( TENANT_CACHE_STORAGE_QUOTA_REFRESH_INTERVAL, 10 ); if( randomize && BUGGIFY ) TENANT_CACHE_STORAGE_QUOTA_REFRESH_INTERVAL = deterministicRandom()->randomInt(1, 10);
|
init( TENANT_CACHE_STORAGE_QUOTA_REFRESH_INTERVAL, 10 ); if( randomize && BUGGIFY ) TENANT_CACHE_STORAGE_QUOTA_REFRESH_INTERVAL = deterministicRandom()->randomInt(1, 10);
|
||||||
|
@ -387,7 +388,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
|
||||||
init( ROCKSDB_BACKGROUND_PARALLELISM, 4 );
|
init( ROCKSDB_BACKGROUND_PARALLELISM, 4 );
|
||||||
init( ROCKSDB_READ_PARALLELISM, 4 );
|
init( ROCKSDB_READ_PARALLELISM, 4 );
|
||||||
// If true, do not process and store RocksDB logs
|
// If true, do not process and store RocksDB logs
|
||||||
init( ROCKSDB_MUTE_LOGS, false );
|
init( ROCKSDB_MUTE_LOGS, true );
|
||||||
// Use a smaller memtable in simulation to avoid OOMs.
|
// Use a smaller memtable in simulation to avoid OOMs.
|
||||||
int64_t memtableBytes = isSimulated ? 32 * 1024 : 512 * 1024 * 1024;
|
int64_t memtableBytes = isSimulated ? 32 * 1024 : 512 * 1024 * 1024;
|
||||||
init( ROCKSDB_MEMTABLE_BYTES, memtableBytes );
|
init( ROCKSDB_MEMTABLE_BYTES, memtableBytes );
|
||||||
|
@ -809,18 +810,24 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
|
||||||
init( RANGESTREAM_LIMIT_BYTES, 2e6 ); if( randomize && BUGGIFY ) RANGESTREAM_LIMIT_BYTES = 1;
|
init( RANGESTREAM_LIMIT_BYTES, 2e6 ); if( randomize && BUGGIFY ) RANGESTREAM_LIMIT_BYTES = 1;
|
||||||
init( CHANGEFEEDSTREAM_LIMIT_BYTES, 1e6 ); if( randomize && BUGGIFY ) CHANGEFEEDSTREAM_LIMIT_BYTES = 1;
|
init( CHANGEFEEDSTREAM_LIMIT_BYTES, 1e6 ); if( randomize && BUGGIFY ) CHANGEFEEDSTREAM_LIMIT_BYTES = 1;
|
||||||
init( BLOBWORKERSTATUSSTREAM_LIMIT_BYTES, 1e4 ); if( randomize && BUGGIFY ) BLOBWORKERSTATUSSTREAM_LIMIT_BYTES = 1;
|
init( BLOBWORKERSTATUSSTREAM_LIMIT_BYTES, 1e4 ); if( randomize && BUGGIFY ) BLOBWORKERSTATUSSTREAM_LIMIT_BYTES = 1;
|
||||||
init( ENABLE_CLEAR_RANGE_EAGER_READS, true ); if( randomize && BUGGIFY ) ENABLE_CLEAR_RANGE_EAGER_READS = deterministicRandom()->coinflip() ? false : true;
|
init( ENABLE_CLEAR_RANGE_EAGER_READS, true ); if( randomize && BUGGIFY ) ENABLE_CLEAR_RANGE_EAGER_READS = deterministicRandom()->coinflip();
|
||||||
init( CHECKPOINT_TRANSFER_BLOCK_BYTES, 40e6 );
|
init( CHECKPOINT_TRANSFER_BLOCK_BYTES, 40e6 );
|
||||||
init( QUICK_GET_VALUE_FALLBACK, true );
|
init( QUICK_GET_VALUE_FALLBACK, true );
|
||||||
init( QUICK_GET_KEY_VALUES_FALLBACK, true );
|
init( QUICK_GET_KEY_VALUES_FALLBACK, true );
|
||||||
init( MAX_PARALLEL_QUICK_GET_VALUE, 50 ); if ( randomize && BUGGIFY ) MAX_PARALLEL_QUICK_GET_VALUE = deterministicRandom()->randomInt(1, 100);
|
init( STRICTLY_ENFORCE_BYTE_LIMIT, false); if( randomize && BUGGIFY ) STRICTLY_ENFORCE_BYTE_LIMIT = deterministicRandom()->coinflip();
|
||||||
|
init( FRACTION_INDEX_BYTELIMIT_PREFETCH, 0.2); if( randomize && BUGGIFY ) FRACTION_INDEX_BYTELIMIT_PREFETCH = 0.01 + deterministicRandom()->random01();
|
||||||
|
init( MAX_PARALLEL_QUICK_GET_VALUE, 10 ); if ( randomize && BUGGIFY ) MAX_PARALLEL_QUICK_GET_VALUE = deterministicRandom()->randomInt(1, 100);
|
||||||
init( QUICK_GET_KEY_VALUES_LIMIT, 2000 );
|
init( QUICK_GET_KEY_VALUES_LIMIT, 2000 );
|
||||||
init( QUICK_GET_KEY_VALUES_LIMIT_BYTES, 1e7 );
|
init( QUICK_GET_KEY_VALUES_LIMIT_BYTES, 1e7 );
|
||||||
init( STORAGE_FEED_QUERY_HARD_LIMIT, 100000 );
|
init( STORAGE_FEED_QUERY_HARD_LIMIT, 100000 );
|
||||||
|
// Read priority definitions in the form of a list of their relative concurrency share weights
|
||||||
|
init( STORAGESERVER_READ_PRIORITIES, "120,10,20,40,60" );
|
||||||
|
// The total concurrency which will be shared by active priorities according to their relative weights
|
||||||
init( STORAGE_SERVER_READ_CONCURRENCY, 70 );
|
init( STORAGE_SERVER_READ_CONCURRENCY, 70 );
|
||||||
// Priorities which each ReadType maps to, in enumeration order
|
// The priority number which each ReadType maps to in enumeration order
|
||||||
init( STORAGESERVER_READ_RANKS, "0,2,1,1,1" );
|
// This exists for flexibility but assigning each ReadType to its own unique priority number makes the most sense
|
||||||
init( STORAGESERVER_READ_PRIORITIES, "48,32,8" );
|
// The enumeration is currently: eager, fetch, low, normal, high
|
||||||
|
init( STORAGESERVER_READTYPE_PRIORITY_MAP, "0,1,2,3,4" );
|
||||||
|
|
||||||
//Wait Failure
|
//Wait Failure
|
||||||
init( MAX_OUTSTANDING_WAIT_FAILURE_REQUESTS, 250 ); if( randomize && BUGGIFY ) MAX_OUTSTANDING_WAIT_FAILURE_REQUESTS = 2;
|
init( MAX_OUTSTANDING_WAIT_FAILURE_REQUESTS, 250 ); if( randomize && BUGGIFY ) MAX_OUTSTANDING_WAIT_FAILURE_REQUESTS = 2;
|
||||||
|
@ -944,7 +951,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
|
||||||
init( REDWOOD_HISTOGRAM_INTERVAL, 30.0 );
|
init( REDWOOD_HISTOGRAM_INTERVAL, 30.0 );
|
||||||
init( REDWOOD_EVICT_UPDATED_PAGES, true ); if( randomize && BUGGIFY ) { REDWOOD_EVICT_UPDATED_PAGES = false; }
|
init( REDWOOD_EVICT_UPDATED_PAGES, true ); if( randomize && BUGGIFY ) { REDWOOD_EVICT_UPDATED_PAGES = false; }
|
||||||
init( REDWOOD_DECODECACHE_REUSE_MIN_HEIGHT, 2 ); if( randomize && BUGGIFY ) { REDWOOD_DECODECACHE_REUSE_MIN_HEIGHT = deterministicRandom()->randomInt(1, 7); }
|
init( REDWOOD_DECODECACHE_REUSE_MIN_HEIGHT, 2 ); if( randomize && BUGGIFY ) { REDWOOD_DECODECACHE_REUSE_MIN_HEIGHT = deterministicRandom()->randomInt(1, 7); }
|
||||||
init( REDWOOD_PRIORITY_LAUNCHS, "32,32,32,32" );
|
init( REDWOOD_IO_PRIORITIES, "32,32,32,32" );
|
||||||
init( REDWOOD_SPLIT_ENCRYPTED_PAGES_BY_TENANT, false );
|
init( REDWOOD_SPLIT_ENCRYPTED_PAGES_BY_TENANT, false );
|
||||||
|
|
||||||
// Server request latency measurement
|
// Server request latency measurement
|
||||||
|
|
|
@ -1660,11 +1660,41 @@ BlobWorkerInterface decodeBlobWorkerListValue(ValueRef const& value) {
|
||||||
return interf;
|
return interf;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const KeyRangeRef blobRestoreCommandKeys("\xff\x02/blobRestoreCommand/"_sr, "\xff\x02/blobRestoreCommand0"_sr);
|
||||||
|
|
||||||
|
const Value blobRestoreCommandKeyFor(const KeyRangeRef range) {
|
||||||
|
BinaryWriter wr(AssumeVersion(ProtocolVersion::withBlobGranule()));
|
||||||
|
wr.serializeBytes(blobRestoreCommandKeys.begin);
|
||||||
|
wr << range;
|
||||||
|
return wr.toValue();
|
||||||
|
}
|
||||||
|
|
||||||
|
const KeyRange decodeBlobRestoreCommandKeyFor(const KeyRef key) {
|
||||||
|
KeyRange range;
|
||||||
|
BinaryReader reader(key.removePrefix(blobRestoreCommandKeys.begin),
|
||||||
|
AssumeVersion(ProtocolVersion::withBlobGranule()));
|
||||||
|
reader >> range;
|
||||||
|
return range;
|
||||||
|
}
|
||||||
|
|
||||||
|
const Value blobRestoreCommandValueFor(BlobRestoreStatus status) {
|
||||||
|
BinaryWriter wr(IncludeVersion(ProtocolVersion::withBlobGranule()));
|
||||||
|
wr << status;
|
||||||
|
return wr.toValue();
|
||||||
|
}
|
||||||
|
|
||||||
|
Standalone<BlobRestoreStatus> decodeBlobRestoreStatus(ValueRef const& value) {
|
||||||
|
Standalone<BlobRestoreStatus> status;
|
||||||
|
BinaryReader reader(value, IncludeVersion());
|
||||||
|
reader >> status;
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
const KeyRangeRef storageQuotaKeys("\xff/storageQuota/"_sr, "\xff/storageQuota0"_sr);
|
const KeyRangeRef storageQuotaKeys("\xff/storageQuota/"_sr, "\xff/storageQuota0"_sr);
|
||||||
const KeyRef storageQuotaPrefix = storageQuotaKeys.begin;
|
const KeyRef storageQuotaPrefix = storageQuotaKeys.begin;
|
||||||
|
|
||||||
Key storageQuotaKey(StringRef tenantName) {
|
Key storageQuotaKey(StringRef tenantGroupName) {
|
||||||
return tenantName.withPrefix(storageQuotaPrefix);
|
return tenantGroupName.withPrefix(storageQuotaPrefix);
|
||||||
}
|
}
|
||||||
|
|
||||||
const KeyRangeRef idempotencyIdKeys("\xff\x02/idmp/"_sr, "\xff\x02/idmp0"_sr);
|
const KeyRangeRef idempotencyIdKeys("\xff\x02/idmp/"_sr, "\xff\x02/idmp0"_sr);
|
||||||
|
|
|
@ -18,6 +18,7 @@
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include "fdbrpc/Msgpack.h"
|
||||||
#include "fdbclient/Tracing.h"
|
#include "fdbclient/Tracing.h"
|
||||||
#include "flow/IRandom.h"
|
#include "flow/IRandom.h"
|
||||||
#include "flow/UnitTest.h"
|
#include "flow/UnitTest.h"
|
||||||
|
@ -79,41 +80,6 @@ struct LogfileTracer : ITracer {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct TraceRequest {
|
|
||||||
std::unique_ptr<uint8_t[]> buffer;
|
|
||||||
// Amount of data in buffer (bytes).
|
|
||||||
std::size_t data_size;
|
|
||||||
// Size of buffer (bytes).
|
|
||||||
std::size_t buffer_size;
|
|
||||||
|
|
||||||
void write_byte(uint8_t byte) { write_bytes(&byte, 1); }
|
|
||||||
|
|
||||||
void write_bytes(const uint8_t* buf, std::size_t n) {
|
|
||||||
resize(n);
|
|
||||||
std::copy(buf, buf + n, buffer.get() + data_size);
|
|
||||||
data_size += n;
|
|
||||||
}
|
|
||||||
|
|
||||||
void resize(std::size_t n) {
|
|
||||||
if (data_size + n <= buffer_size) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::size_t size = buffer_size;
|
|
||||||
while (size < data_size + n) {
|
|
||||||
size *= 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
TraceEvent(SevInfo, "TracingSpanResizedBuffer").detail("OldSize", buffer_size).detail("NewSize", size);
|
|
||||||
auto new_buffer = std::make_unique<uint8_t[]>(size);
|
|
||||||
std::copy(buffer.get(), buffer.get() + data_size, new_buffer.get());
|
|
||||||
buffer = std::move(new_buffer);
|
|
||||||
buffer_size = size;
|
|
||||||
}
|
|
||||||
|
|
||||||
void reset() { data_size = 0; }
|
|
||||||
};
|
|
||||||
|
|
||||||
// A server listening for UDP trace messages, run only in simulation.
|
// A server listening for UDP trace messages, run only in simulation.
|
||||||
ACTOR Future<Void> simulationStartServer() {
|
ACTOR Future<Void> simulationStartServer() {
|
||||||
// We're going to force the address to be loopback regardless of FLOW_KNOBS->TRACING_UDP_LISTENER_ADDR
|
// We're going to force the address to be loopback regardless of FLOW_KNOBS->TRACING_UDP_LISTENER_ADDR
|
||||||
|
@ -167,146 +133,89 @@ ACTOR Future<Void> traceLog(int* pendingMessages, bool* sendError) {
|
||||||
struct UDPTracer : public ITracer {
|
struct UDPTracer : public ITracer {
|
||||||
// Serializes span fields as an array into the supplied TraceRequest
|
// Serializes span fields as an array into the supplied TraceRequest
|
||||||
// buffer.
|
// buffer.
|
||||||
void serialize_span(const Span& span, TraceRequest& request) {
|
void serialize_span(const Span& span, MsgpackBuffer& buf) {
|
||||||
uint16_t size = 12;
|
uint16_t size = 12;
|
||||||
request.write_byte(size | 0b10010000); // write as array
|
buf.write_byte(size | 0b10010000); // write as array
|
||||||
serialize_value(span.context.traceID.first(), request, 0xcf); // trace id
|
serialize_value(span.context.traceID.first(), buf, 0xcf); // trace id
|
||||||
serialize_value(span.context.traceID.second(), request, 0xcf); // trace id
|
serialize_value(span.context.traceID.second(), buf, 0xcf); // trace id
|
||||||
serialize_value(span.context.spanID, request, 0xcf); // spanid
|
serialize_value(span.context.spanID, buf, 0xcf); // spanid
|
||||||
// parent span id
|
// parent span id
|
||||||
serialize_value(span.parentContext.spanID, request, 0xcf); // spanId
|
serialize_value(span.parentContext.spanID, buf, 0xcf); // spanId
|
||||||
// Payload
|
// Payload
|
||||||
serialize_string(span.location.name.toString(), request);
|
serialize_string(span.location.name.toString(), buf);
|
||||||
serialize_value(span.begin, request, 0xcb); // start time
|
serialize_value(span.begin, buf, 0xcb); // start time
|
||||||
serialize_value(span.end, request, 0xcb); // end
|
serialize_value(span.end, buf, 0xcb); // end
|
||||||
// Kind
|
// Kind
|
||||||
serialize_value(span.kind, request, 0xcc);
|
serialize_value(span.kind, buf, 0xcc);
|
||||||
// Status
|
// Status
|
||||||
serialize_value(span.status, request, 0xcc);
|
serialize_value(span.status, buf, 0xcc);
|
||||||
// Links
|
// Links
|
||||||
serialize_vector(span.links, request);
|
serialize_vector(span.links, buf);
|
||||||
// Events
|
// Events
|
||||||
serialize_vector(span.events, request);
|
serialize_vector(span.events, buf);
|
||||||
// Attributes
|
// Attributes
|
||||||
serialize_map(span.attributes, request);
|
serialize_map(span.attributes, buf);
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// Writes the given value in big-endian format to the request. Sets the
|
|
||||||
// first byte to msgpack_type.
|
|
||||||
template <typename T>
|
|
||||||
inline void serialize_value(const T& val, TraceRequest& request, uint8_t msgpack_type) {
|
|
||||||
request.write_byte(msgpack_type);
|
|
||||||
|
|
||||||
const uint8_t* p = reinterpret_cast<const uint8_t*>(std::addressof(val));
|
|
||||||
for (size_t i = 0; i < sizeof(T); ++i) {
|
|
||||||
request.write_byte(p[sizeof(T) - i - 1]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Writes the given string to the request as a sequence of bytes. Inserts a
|
|
||||||
// format byte at the beginning of the string according to the its length,
|
|
||||||
// as specified by the msgpack specification.
|
|
||||||
inline void serialize_string(const uint8_t* c, int length, TraceRequest& request) {
|
|
||||||
if (length <= 31) {
|
|
||||||
// A size 0 string is ok. We still need to write a byte
|
|
||||||
// identifiying the item as a string, but can set the size to 0.
|
|
||||||
request.write_byte(static_cast<uint8_t>(length) | 0b10100000);
|
|
||||||
} else if (length <= 255) {
|
|
||||||
request.write_byte(0xd9);
|
|
||||||
request.write_byte(static_cast<uint8_t>(length));
|
|
||||||
} else if (length <= 65535) {
|
|
||||||
request.write_byte(0xda);
|
|
||||||
request.write_byte(reinterpret_cast<const uint8_t*>(&length)[1]);
|
|
||||||
request.write_byte(reinterpret_cast<const uint8_t*>(&length)[0]);
|
|
||||||
} else {
|
|
||||||
TraceEvent(SevWarn, "TracingSpanSerializeString")
|
|
||||||
.detail("Failed to MessagePack encode very large string", length);
|
|
||||||
ASSERT_WE_THINK(false);
|
|
||||||
}
|
|
||||||
|
|
||||||
request.write_bytes(c, length);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void serialize_string(const std::string& str, TraceRequest& request) {
|
|
||||||
serialize_string(reinterpret_cast<const uint8_t*>(str.data()), str.size(), request);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Writes the given vector of linked SpanContext's to the request. If the vector is
|
// Writes the given vector of linked SpanContext's to the request. If the vector is
|
||||||
// empty, the request is not modified.
|
// empty, the request is not modified.
|
||||||
inline void serialize_vector(const SmallVectorRef<SpanContext>& vec, TraceRequest& request) {
|
inline void serialize_vector(const SmallVectorRef<SpanContext>& vec, MsgpackBuffer& buf) {
|
||||||
int size = vec.size();
|
int size = vec.size();
|
||||||
if (size <= 15) {
|
if (size <= 15) {
|
||||||
request.write_byte(static_cast<uint8_t>(size) | 0b10010000);
|
buf.write_byte(static_cast<uint8_t>(size) | 0b10010000);
|
||||||
} else if (size <= 65535) {
|
} else if (size <= 65535) {
|
||||||
request.write_byte(0xdc);
|
buf.write_byte(0xdc);
|
||||||
request.write_byte(reinterpret_cast<const uint8_t*>(&size)[1]);
|
buf.write_byte(reinterpret_cast<const uint8_t*>(&size)[1]);
|
||||||
request.write_byte(reinterpret_cast<const uint8_t*>(&size)[0]);
|
buf.write_byte(reinterpret_cast<const uint8_t*>(&size)[0]);
|
||||||
} else {
|
} else {
|
||||||
TraceEvent(SevWarn, "TracingSpanSerializeVector").detail("Failed to MessagePack encode large vector", size);
|
TraceEvent(SevWarn, "TracingSpanSerializeVector").detail("Failed to MessagePack encode large vector", size);
|
||||||
ASSERT_WE_THINK(false);
|
ASSERT_WE_THINK(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (const auto& link : vec) {
|
for (const auto& link : vec) {
|
||||||
serialize_value(link.traceID.first(), request, 0xcf); // trace id
|
serialize_value(link.traceID.first(), buf, 0xcf); // trace id
|
||||||
serialize_value(link.traceID.second(), request, 0xcf); // trace id
|
serialize_value(link.traceID.second(), buf, 0xcf); // trace id
|
||||||
serialize_value(link.spanID, request, 0xcf); // spanid
|
serialize_value(link.spanID, buf, 0xcf); // spanid
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Writes the given vector of linked SpanContext's to the request. If the vector is
|
// Writes the given vector of linked SpanEventRef's to the request. If the vector is
|
||||||
// empty, the request is not modified.
|
// empty, the request is not modified.
|
||||||
inline void serialize_vector(const SmallVectorRef<SpanEventRef>& vec, TraceRequest& request) {
|
inline void serialize_vector(const SmallVectorRef<SpanEventRef>& vec, MsgpackBuffer& buf) {
|
||||||
int size = vec.size();
|
int size = vec.size();
|
||||||
if (size <= 15) {
|
if (size <= 15) {
|
||||||
request.write_byte(static_cast<uint8_t>(size) | 0b10010000);
|
buf.write_byte(static_cast<uint8_t>(size) | 0b10010000);
|
||||||
} else if (size <= 65535) {
|
} else if (size <= 65535) {
|
||||||
request.write_byte(0xdc);
|
buf.write_byte(0xdc);
|
||||||
request.write_byte(reinterpret_cast<const uint8_t*>(&size)[1]);
|
buf.write_byte(reinterpret_cast<const uint8_t*>(&size)[1]);
|
||||||
request.write_byte(reinterpret_cast<const uint8_t*>(&size)[0]);
|
buf.write_byte(reinterpret_cast<const uint8_t*>(&size)[0]);
|
||||||
} else {
|
} else {
|
||||||
TraceEvent(SevWarn, "TracingSpanSerializeVector").detail("Failed to MessagePack encode large vector", size);
|
TraceEvent(SevWarn, "TracingSpanSerializeVector").detail("Failed to MessagePack encode large vector", size);
|
||||||
ASSERT_WE_THINK(false);
|
ASSERT_WE_THINK(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (const auto& event : vec) {
|
for (const auto& event : vec) {
|
||||||
serialize_string(event.name.toString(), request); // event name
|
serialize_string(event.name.toString(), buf); // event name
|
||||||
serialize_value(event.time, request, 0xcb); // event time
|
serialize_value(event.time, buf, 0xcb); // event time
|
||||||
serialize_vector(event.attributes, request);
|
serialize_vector(event.attributes, buf);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void serialize_vector(const SmallVectorRef<KeyValueRef>& vals, TraceRequest& request) {
|
inline void serialize_vector(const SmallVectorRef<KeyValueRef>& vals, MsgpackBuffer& buf) {
|
||||||
int size = vals.size();
|
int size = vals.size();
|
||||||
if (size <= 15) {
|
if (size <= 15) {
|
||||||
// N.B. We're actually writing this out as a fixmap here in messagepack format!
|
// N.B. We're actually writing this out as a fixmap here in messagepack format!
|
||||||
// fixmap 1000xxxx 0x80 - 0x8f
|
// fixmap 1000xxxx 0x80 - 0x8f
|
||||||
request.write_byte(static_cast<uint8_t>(size) | 0b10000000);
|
buf.write_byte(static_cast<uint8_t>(size) | 0b10000000);
|
||||||
} else {
|
} else {
|
||||||
TraceEvent(SevWarn, "TracingSpanSerializeVector").detail("Failed to MessagePack encode large vector", size);
|
TraceEvent(SevWarn, "TracingSpanSerializeVector").detail("Failed to MessagePack encode large vector", size);
|
||||||
ASSERT_WE_THINK(false);
|
ASSERT_WE_THINK(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (const auto& kv : vals) {
|
for (const auto& kv : vals) {
|
||||||
serialize_string(kv.key.toString(), request);
|
serialize_string(kv.key.toString(), buf);
|
||||||
serialize_string(kv.value.toString(), request);
|
serialize_string(kv.value.toString(), buf);
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <class Map>
|
|
||||||
inline void serialize_map(const Map& map, TraceRequest& request) {
|
|
||||||
int size = map.size();
|
|
||||||
|
|
||||||
if (size <= 15) {
|
|
||||||
request.write_byte(static_cast<uint8_t>(size) | 0b10000000);
|
|
||||||
} else {
|
|
||||||
TraceEvent(SevWarn, "TracingSpanSerializeMap").detail("Failed to MessagePack encode large map", size);
|
|
||||||
ASSERT_WE_THINK(false);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (const auto& [key, value] : map) {
|
|
||||||
serialize_string(key.begin(), key.size(), request);
|
|
||||||
serialize_string(value.begin(), value.size(), request);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -336,9 +245,9 @@ ACTOR Future<Void> fastTraceLogger(int* unreadyMessages, int* failedMessages, in
|
||||||
struct FastUDPTracer : public UDPTracer {
|
struct FastUDPTracer : public UDPTracer {
|
||||||
FastUDPTracer()
|
FastUDPTracer()
|
||||||
: unready_socket_messages_(0), failed_messages_(0), total_messages_(0), socket_fd_(-1), send_error_(false) {
|
: unready_socket_messages_(0), failed_messages_(0), total_messages_(0), socket_fd_(-1), send_error_(false) {
|
||||||
request_ = TraceRequest{ .buffer = std::make_unique<uint8_t[]>(kTraceBufferSize),
|
request_ = MsgpackBuffer{ .buffer = std::make_unique<uint8_t[]>(kTraceBufferSize),
|
||||||
.data_size = 0,
|
.data_size = 0,
|
||||||
.buffer_size = kTraceBufferSize };
|
.buffer_size = kTraceBufferSize };
|
||||||
}
|
}
|
||||||
|
|
||||||
TracerType type() const override { return TracerType::NETWORK_LOSSY; }
|
TracerType type() const override { return TracerType::NETWORK_LOSSY; }
|
||||||
|
@ -394,7 +303,7 @@ struct FastUDPTracer : public UDPTracer {
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
TraceRequest request_;
|
MsgpackBuffer request_;
|
||||||
|
|
||||||
int unready_socket_messages_;
|
int unready_socket_messages_;
|
||||||
int failed_messages_;
|
int failed_messages_;
|
||||||
|
@ -657,9 +566,9 @@ TEST_CASE("/flow/Tracing/FastUDPMessagePackEncoding") {
|
||||||
IKnobCollection::getMutableGlobalKnobCollection().setKnob("tracing_span_attributes_enabled",
|
IKnobCollection::getMutableGlobalKnobCollection().setKnob("tracing_span_attributes_enabled",
|
||||||
KnobValueRef::create(bool{ true }));
|
KnobValueRef::create(bool{ true }));
|
||||||
Span span1("encoded_span"_loc);
|
Span span1("encoded_span"_loc);
|
||||||
auto request = TraceRequest{ .buffer = std::make_unique<uint8_t[]>(kTraceBufferSize),
|
auto request = MsgpackBuffer{ .buffer = std::make_unique<uint8_t[]>(kTraceBufferSize),
|
||||||
.data_size = 0,
|
.data_size = 0,
|
||||||
.buffer_size = kTraceBufferSize };
|
.buffer_size = kTraceBufferSize };
|
||||||
auto tracer = FastUDPTracer();
|
auto tracer = FastUDPTracer();
|
||||||
tracer.serialize_span(span1, request);
|
tracer.serialize_span(span1, request);
|
||||||
auto data = request.buffer.get();
|
auto data = request.buffer.get();
|
||||||
|
|
|
@ -313,4 +313,15 @@ struct BlobManifest {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Defines blob restore status
|
||||||
|
struct BlobRestoreStatus {
|
||||||
|
constexpr static FileIdentifier file_identifier = 378657;
|
||||||
|
int progress;
|
||||||
|
|
||||||
|
template <class Ar>
|
||||||
|
void serialize(Ar& ar) {
|
||||||
|
serializer(ar, progress);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -56,4 +56,7 @@ RangeResult materializeBlobGranule(const BlobGranuleChunkRef& chunk,
|
||||||
|
|
||||||
std::string randomBGFilename(UID blobWorkerID, UID granuleID, Version version, std::string suffix);
|
std::string randomBGFilename(UID blobWorkerID, UID granuleID, Version version, std::string suffix);
|
||||||
|
|
||||||
#endif
|
// For benchmark testing only. It should never be called in prod.
|
||||||
|
void sortDeltasByKey(const Standalone<GranuleDeltas>& deltasByVersion, const KeyRangeRef& fileRange);
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
|
@ -403,6 +403,7 @@ public:
|
||||||
Future<Version> verifyBlobRange(const KeyRange& range,
|
Future<Version> verifyBlobRange(const KeyRange& range,
|
||||||
Optional<Version> version,
|
Optional<Version> version,
|
||||||
Optional<TenantName> tenantName = {});
|
Optional<TenantName> tenantName = {});
|
||||||
|
Future<bool> blobRestore(const KeyRange range);
|
||||||
|
|
||||||
// private:
|
// private:
|
||||||
explicit DatabaseContext(Reference<AsyncVar<Reference<IClusterConnectionRecord>>> connectionRecord,
|
explicit DatabaseContext(Reference<AsyncVar<Reference<IClusterConnectionRecord>>> connectionRecord,
|
||||||
|
|
|
@ -163,9 +163,10 @@ bool schemaMatch(json_spirit::mValue const& schema,
|
||||||
// storage nodes
|
// storage nodes
|
||||||
ACTOR Future<Void> mgmtSnapCreate(Database cx, Standalone<StringRef> snapCmd, UID snapUID);
|
ACTOR Future<Void> mgmtSnapCreate(Database cx, Standalone<StringRef> snapCmd, UID snapUID);
|
||||||
|
|
||||||
// Set and get the storage quota per tenant
|
// Set/clear/get the storage quota for the given tenant group
|
||||||
void setStorageQuota(Transaction& tr, StringRef tenantName, int64_t quota);
|
void setStorageQuota(Transaction& tr, StringRef tenantGroupName, int64_t quota);
|
||||||
ACTOR Future<Optional<int64_t>> getStorageQuota(Transaction* tr, StringRef tenantName);
|
void clearStorageQuota(Transaction& tr, StringRef tenantGroupName);
|
||||||
|
ACTOR Future<Optional<int64_t>> getStorageQuota(Transaction* tr, StringRef tenantGroupName);
|
||||||
|
|
||||||
#include "flow/unactorcompiler.h"
|
#include "flow/unactorcompiler.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -237,6 +237,8 @@ public:
|
||||||
int64_t
|
int64_t
|
||||||
DD_STORAGE_WIGGLE_MIN_SS_AGE_SEC; // Minimal age of a correct-configured server before it's chosen to be wiggled
|
DD_STORAGE_WIGGLE_MIN_SS_AGE_SEC; // Minimal age of a correct-configured server before it's chosen to be wiggled
|
||||||
bool DD_TENANT_AWARENESS_ENABLED;
|
bool DD_TENANT_AWARENESS_ENABLED;
|
||||||
|
bool STORAGE_QUOTA_ENABLED; // Whether storage quota enforcement for tenant groups and all the relevant storage
|
||||||
|
// usage / quota monitors are enabled.
|
||||||
int TENANT_CACHE_LIST_REFRESH_INTERVAL; // How often the TenantCache is refreshed
|
int TENANT_CACHE_LIST_REFRESH_INTERVAL; // How often the TenantCache is refreshed
|
||||||
int TENANT_CACHE_STORAGE_USAGE_REFRESH_INTERVAL; // How often the storage bytes used by each tenant is refreshed
|
int TENANT_CACHE_STORAGE_USAGE_REFRESH_INTERVAL; // How often the storage bytes used by each tenant is refreshed
|
||||||
// in the TenantCache
|
// in the TenantCache
|
||||||
|
@ -761,14 +763,16 @@ public:
|
||||||
bool ENABLE_CLEAR_RANGE_EAGER_READS;
|
bool ENABLE_CLEAR_RANGE_EAGER_READS;
|
||||||
bool QUICK_GET_VALUE_FALLBACK;
|
bool QUICK_GET_VALUE_FALLBACK;
|
||||||
bool QUICK_GET_KEY_VALUES_FALLBACK;
|
bool QUICK_GET_KEY_VALUES_FALLBACK;
|
||||||
|
bool STRICTLY_ENFORCE_BYTE_LIMIT;
|
||||||
|
double FRACTION_INDEX_BYTELIMIT_PREFETCH;
|
||||||
int MAX_PARALLEL_QUICK_GET_VALUE;
|
int MAX_PARALLEL_QUICK_GET_VALUE;
|
||||||
int CHECKPOINT_TRANSFER_BLOCK_BYTES;
|
int CHECKPOINT_TRANSFER_BLOCK_BYTES;
|
||||||
int QUICK_GET_KEY_VALUES_LIMIT;
|
int QUICK_GET_KEY_VALUES_LIMIT;
|
||||||
int QUICK_GET_KEY_VALUES_LIMIT_BYTES;
|
int QUICK_GET_KEY_VALUES_LIMIT_BYTES;
|
||||||
int STORAGE_FEED_QUERY_HARD_LIMIT;
|
int STORAGE_FEED_QUERY_HARD_LIMIT;
|
||||||
int STORAGE_SERVER_READ_CONCURRENCY;
|
|
||||||
std::string STORAGESERVER_READ_RANKS;
|
|
||||||
std::string STORAGESERVER_READ_PRIORITIES;
|
std::string STORAGESERVER_READ_PRIORITIES;
|
||||||
|
int STORAGE_SERVER_READ_CONCURRENCY;
|
||||||
|
std::string STORAGESERVER_READTYPE_PRIORITY_MAP;
|
||||||
|
|
||||||
// Wait Failure
|
// Wait Failure
|
||||||
int MAX_OUTSTANDING_WAIT_FAILURE_REQUESTS;
|
int MAX_OUTSTANDING_WAIT_FAILURE_REQUESTS;
|
||||||
|
@ -917,7 +921,7 @@ public:
|
||||||
int REDWOOD_DECODECACHE_REUSE_MIN_HEIGHT; // Minimum height for which to keep and reuse page decode caches
|
int REDWOOD_DECODECACHE_REUSE_MIN_HEIGHT; // Minimum height for which to keep and reuse page decode caches
|
||||||
bool REDWOOD_SPLIT_ENCRYPTED_PAGES_BY_TENANT; // Whether to split pages by tenant if encryption is enabled
|
bool REDWOOD_SPLIT_ENCRYPTED_PAGES_BY_TENANT; // Whether to split pages by tenant if encryption is enabled
|
||||||
|
|
||||||
std::string REDWOOD_PRIORITY_LAUNCHS;
|
std::string REDWOOD_IO_PRIORITIES;
|
||||||
|
|
||||||
// Server request latency measurement
|
// Server request latency measurement
|
||||||
int LATENCY_SAMPLE_SIZE;
|
int LATENCY_SAMPLE_SIZE;
|
||||||
|
|
|
@ -710,11 +710,18 @@ UID decodeBlobWorkerListKey(KeyRef const& key);
|
||||||
const Value blobWorkerListValue(BlobWorkerInterface const& interface);
|
const Value blobWorkerListValue(BlobWorkerInterface const& interface);
|
||||||
BlobWorkerInterface decodeBlobWorkerListValue(ValueRef const& value);
|
BlobWorkerInterface decodeBlobWorkerListValue(ValueRef const& value);
|
||||||
|
|
||||||
|
// Blob restore command
|
||||||
|
extern const KeyRangeRef blobRestoreCommandKeys;
|
||||||
|
const Value blobRestoreCommandKeyFor(const KeyRangeRef range);
|
||||||
|
const KeyRange decodeBlobRestoreCommandKeyFor(const KeyRef key);
|
||||||
|
const Value blobRestoreCommandValueFor(BlobRestoreStatus status);
|
||||||
|
Standalone<BlobRestoreStatus> decodeBlobRestoreStatus(ValueRef const& value);
|
||||||
|
|
||||||
// Storage quota per tenant
|
// Storage quota per tenant
|
||||||
// "\xff/storageQuota/[[tenantName]]" := "[[quota]]"
|
// "\xff/storageQuota/[[tenantGroupName]]" := "[[quota]]"
|
||||||
extern const KeyRangeRef storageQuotaKeys;
|
extern const KeyRangeRef storageQuotaKeys;
|
||||||
extern const KeyRef storageQuotaPrefix;
|
extern const KeyRef storageQuotaPrefix;
|
||||||
Key storageQuotaKey(StringRef tenantName);
|
Key storageQuotaKey(StringRef tenantGroupName);
|
||||||
|
|
||||||
extern const KeyRangeRef idempotencyIdKeys;
|
extern const KeyRangeRef idempotencyIdKeys;
|
||||||
extern const KeyRef idempotencyIdsExpiredVersion;
|
extern const KeyRef idempotencyIdsExpiredVersion;
|
||||||
|
|
|
@ -0,0 +1,157 @@
|
||||||
|
/*
|
||||||
|
* Msgpack.h
|
||||||
|
*
|
||||||
|
* This source file is part of the FoundationDB open source project
|
||||||
|
*
|
||||||
|
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
#ifndef FDBRPC_MSGPACK_H
|
||||||
|
#define FDBRPC_MSGPACK_H
|
||||||
|
#include <limits>
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
#include <algorithm>
|
||||||
|
#include "flow/Trace.h"
|
||||||
|
#include "flow/Error.h"
|
||||||
|
#include "flow/network.h"
|
||||||
|
|
||||||
|
struct MsgpackBuffer {
|
||||||
|
std::unique_ptr<uint8_t[]> buffer;
|
||||||
|
// Amount of data in buffer (bytes).
|
||||||
|
std::size_t data_size;
|
||||||
|
// Size of buffer (bytes).
|
||||||
|
std::size_t buffer_size;
|
||||||
|
|
||||||
|
void write_byte(uint8_t byte) { write_bytes(&byte, 1); }
|
||||||
|
|
||||||
|
// This assumes that pos <= data_size
|
||||||
|
void edit_byte(uint8_t byte, size_t pos) { buffer[pos] = byte; }
|
||||||
|
|
||||||
|
void write_bytes(const uint8_t* buf, std::size_t n) {
|
||||||
|
resize(n);
|
||||||
|
std::copy(buf, buf + n, buffer.get() + data_size);
|
||||||
|
data_size += n;
|
||||||
|
}
|
||||||
|
|
||||||
|
void resize(std::size_t n) {
|
||||||
|
if (data_size + n <= buffer_size) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::size_t size = buffer_size;
|
||||||
|
while (size < data_size + n) {
|
||||||
|
size *= 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
TraceEvent(SevInfo, "MsgpackResizedBuffer").detail("OldSize", buffer_size).detail("NewSize", size);
|
||||||
|
auto new_buffer = std::make_unique<uint8_t[]>(size);
|
||||||
|
std::copy(buffer.get(), buffer.get() + data_size, new_buffer.get());
|
||||||
|
buffer = std::move(new_buffer);
|
||||||
|
buffer_size = size;
|
||||||
|
}
|
||||||
|
|
||||||
|
void reset() { data_size = 0; }
|
||||||
|
};
|
||||||
|
|
||||||
|
inline void serialize_bool(bool val, MsgpackBuffer& buf) {
|
||||||
|
if (val) {
|
||||||
|
buf.write_byte(0xc3);
|
||||||
|
} else {
|
||||||
|
buf.write_byte(0xc2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Writes the given value in big-endian format to the request. Sets the
|
||||||
|
// first byte to msgpack_type.
|
||||||
|
template <typename T>
|
||||||
|
inline void serialize_value(const T& val, MsgpackBuffer& buf, uint8_t msgpack_type) {
|
||||||
|
buf.write_byte(msgpack_type);
|
||||||
|
|
||||||
|
const uint8_t* p = reinterpret_cast<const uint8_t*>(std::addressof(val));
|
||||||
|
for (size_t i = 0; i < sizeof(T); ++i) {
|
||||||
|
buf.write_byte(p[sizeof(T) - i - 1]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Writes the given string to the request as a sequence of bytes. Inserts a
|
||||||
|
// format byte at the beginning of the string according to the its length,
|
||||||
|
// as specified by the msgpack specification.
|
||||||
|
inline void serialize_string(const uint8_t* c, int length, MsgpackBuffer& buf) {
|
||||||
|
if (length <= 31) {
|
||||||
|
// A size 0 string is ok. We still need to write a byte
|
||||||
|
// identifiying the item as a string, but can set the size to 0.
|
||||||
|
buf.write_byte(static_cast<uint8_t>(length) | 0b10100000);
|
||||||
|
} else if (length <= 255) {
|
||||||
|
buf.write_byte(0xd9);
|
||||||
|
buf.write_byte(static_cast<uint8_t>(length));
|
||||||
|
} else if (length <= 65535) {
|
||||||
|
buf.write_byte(0xda);
|
||||||
|
buf.write_byte(reinterpret_cast<const uint8_t*>(&length)[1]);
|
||||||
|
buf.write_byte(reinterpret_cast<const uint8_t*>(&length)[0]);
|
||||||
|
} else {
|
||||||
|
TraceEvent(SevWarn, "MsgpackSerializeString").detail("Failed to MessagePack encode very large string", length);
|
||||||
|
ASSERT_WE_THINK(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
buf.write_bytes(c, length);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void serialize_string(const std::string& str, MsgpackBuffer& buf) {
|
||||||
|
serialize_string(reinterpret_cast<const uint8_t*>(str.data()), str.size(), buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T, typename F>
|
||||||
|
inline void serialize_vector(const std::vector<T>& vec, MsgpackBuffer& buf, F f) {
|
||||||
|
size_t size = vec.size();
|
||||||
|
if (size <= 15) {
|
||||||
|
buf.write_byte(static_cast<uint8_t>(size) | 0b10010000);
|
||||||
|
} else if (size <= 65535) {
|
||||||
|
buf.write_byte(0xdc);
|
||||||
|
buf.write_byte(reinterpret_cast<const uint8_t*>(&size)[1]);
|
||||||
|
buf.write_byte(reinterpret_cast<const uint8_t*>(&size)[0]);
|
||||||
|
} else if (size <= std::numeric_limits<uint32_t>::max()) {
|
||||||
|
buf.write_byte(0xdd);
|
||||||
|
buf.write_byte(reinterpret_cast<const uint8_t*>(&size)[3]);
|
||||||
|
buf.write_byte(reinterpret_cast<const uint8_t*>(&size)[2]);
|
||||||
|
buf.write_byte(reinterpret_cast<const uint8_t*>(&size)[1]);
|
||||||
|
buf.write_byte(reinterpret_cast<const uint8_t*>(&size)[0]);
|
||||||
|
} else {
|
||||||
|
TraceEvent(SevWarn, "MsgPackSerializeVector").detail("Failed to MessagePack encode large vector", size);
|
||||||
|
ASSERT_WE_THINK(false);
|
||||||
|
}
|
||||||
|
// Use the provided serializer function to serialize the individual types of the vector
|
||||||
|
for (const auto& val : vec) {
|
||||||
|
f(val, buf);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class Map>
|
||||||
|
inline void serialize_map(const Map& map, MsgpackBuffer& buf) {
|
||||||
|
int size = map.size();
|
||||||
|
|
||||||
|
if (size <= 15) {
|
||||||
|
buf.write_byte(static_cast<uint8_t>(size) | 0b10000000);
|
||||||
|
} else {
|
||||||
|
TraceEvent(SevWarn, "MsgPackSerializeMap").detail("Failed to MessagePack encode large map", size);
|
||||||
|
ASSERT_WE_THINK(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const auto& [key, value] : map) {
|
||||||
|
serialize_string(key.begin(), key.size(), buf);
|
||||||
|
serialize_string(value.begin(), value.size(), buf);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
|
@ -20,6 +20,7 @@
|
||||||
|
|
||||||
#ifndef FDBRPC_TIMED_REQUEST_H
|
#ifndef FDBRPC_TIMED_REQUEST_H
|
||||||
#define FDBRPC_TIMED_REQUEST_H
|
#define FDBRPC_TIMED_REQUEST_H
|
||||||
|
#include "flow/network.h"
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <fdbrpc/fdbrpc.h>
|
#include <fdbrpc/fdbrpc.h>
|
||||||
|
@ -35,7 +36,7 @@ public:
|
||||||
|
|
||||||
TimedRequest() {
|
TimedRequest() {
|
||||||
if (!FlowTransport::isClient()) {
|
if (!FlowTransport::isClient()) {
|
||||||
_requestTime = timer();
|
_requestTime = g_network->timer();
|
||||||
} else {
|
} else {
|
||||||
_requestTime = 0.0;
|
_requestTime = 0.0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -388,6 +388,8 @@ struct BlobManagerData : NonCopyable, ReferenceCounted<BlobManagerData> {
|
||||||
|
|
||||||
Promise<Void> iAmReplaced;
|
Promise<Void> iAmReplaced;
|
||||||
|
|
||||||
|
bool isFullRestoreMode = false;
|
||||||
|
|
||||||
BlobManagerData(UID id,
|
BlobManagerData(UID id,
|
||||||
Reference<AsyncVar<ServerDBInfo> const> dbInfo,
|
Reference<AsyncVar<ServerDBInfo> const> dbInfo,
|
||||||
Database db,
|
Database db,
|
||||||
|
@ -3537,7 +3539,10 @@ ACTOR Future<Void> recoverBlobManager(Reference<BlobManagerData> bmData) {
|
||||||
bmData->startRecruiting.trigger();
|
bmData->startRecruiting.trigger();
|
||||||
|
|
||||||
bmData->initBStore();
|
bmData->initBStore();
|
||||||
if (isFullRestoreMode()) {
|
|
||||||
|
bool isFullRestore = wait(isFullRestoreMode(bmData->db, normalKeys));
|
||||||
|
bmData->isFullRestoreMode = isFullRestore;
|
||||||
|
if (bmData->isFullRestoreMode) {
|
||||||
wait(loadManifest(bmData->db, bmData->bstore));
|
wait(loadManifest(bmData->db, bmData->bstore));
|
||||||
|
|
||||||
int64_t epoc = wait(lastBlobEpoc(bmData->db, bmData->bstore));
|
int64_t epoc = wait(lastBlobEpoc(bmData->db, bmData->bstore));
|
||||||
|
@ -5297,11 +5302,8 @@ ACTOR Future<Void> backupManifest(Reference<BlobManagerData> bmData) {
|
||||||
|
|
||||||
bmData->initBStore();
|
bmData->initBStore();
|
||||||
loop {
|
loop {
|
||||||
bool pendingSplit = wait(hasPendingSplit(bmData));
|
wait(dumpManifest(bmData->db, bmData->bstore, bmData->epoch, bmData->manifestDumperSeqNo));
|
||||||
if (!pendingSplit) {
|
bmData->manifestDumperSeqNo++;
|
||||||
wait(dumpManifest(bmData->db, bmData->bstore, bmData->epoch, bmData->manifestDumperSeqNo));
|
|
||||||
bmData->manifestDumperSeqNo++;
|
|
||||||
}
|
|
||||||
wait(delay(SERVER_KNOBS->BLOB_MANIFEST_BACKUP_INTERVAL));
|
wait(delay(SERVER_KNOBS->BLOB_MANIFEST_BACKUP_INTERVAL));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -5370,7 +5372,7 @@ ACTOR Future<Void> blobManager(BlobManagerInterface bmInterf,
|
||||||
if (SERVER_KNOBS->BG_ENABLE_MERGING) {
|
if (SERVER_KNOBS->BG_ENABLE_MERGING) {
|
||||||
self->addActor.send(granuleMergeChecker(self));
|
self->addActor.send(granuleMergeChecker(self));
|
||||||
}
|
}
|
||||||
if (SERVER_KNOBS->BLOB_MANIFEST_BACKUP && !isFullRestoreMode()) {
|
if (SERVER_KNOBS->BLOB_MANIFEST_BACKUP && !self->isFullRestoreMode) {
|
||||||
self->addActor.send(backupManifest(self));
|
self->addActor.send(backupManifest(self));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -60,7 +60,7 @@ struct BlobManifestFile {
|
||||||
int64_t seqNo{ 0 };
|
int64_t seqNo{ 0 };
|
||||||
|
|
||||||
BlobManifestFile(const std::string& path) {
|
BlobManifestFile(const std::string& path) {
|
||||||
if (sscanf(path.c_str(), MANIFEST_FOLDER "/manifest.%" SCNd64 ".%" SCNd64, &epoch, &seqNo) == 2) {
|
if (sscanf(path.c_str(), MANIFEST_FOLDER "/" MANIFEST_FOLDER ".%" SCNd64 ".%" SCNd64, &epoch, &seqNo) == 2) {
|
||||||
fileName = path;
|
fileName = path;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -76,7 +76,7 @@ struct BlobManifestFile {
|
||||||
BlobManifestFile file(path);
|
BlobManifestFile file(path);
|
||||||
return file.epoch > 0 && file.seqNo > 0;
|
return file.epoch > 0 && file.seqNo > 0;
|
||||||
};
|
};
|
||||||
BackupContainerFileSystem::FilesAndSizesT filesAndSizes = wait(reader->listFiles(MANIFEST_FOLDER, filter));
|
BackupContainerFileSystem::FilesAndSizesT filesAndSizes = wait(reader->listFiles(MANIFEST_FOLDER "/", filter));
|
||||||
|
|
||||||
std::vector<BlobManifestFile> result;
|
std::vector<BlobManifestFile> result;
|
||||||
for (auto& f : filesAndSizes) {
|
for (auto& f : filesAndSizes) {
|
||||||
|
@ -107,6 +107,9 @@ public:
|
||||||
try {
|
try {
|
||||||
state Standalone<BlobManifest> manifest;
|
state Standalone<BlobManifest> manifest;
|
||||||
Standalone<VectorRef<KeyValueRef>> rows = wait(getSystemKeys(self));
|
Standalone<VectorRef<KeyValueRef>> rows = wait(getSystemKeys(self));
|
||||||
|
if (rows.size() == 0) {
|
||||||
|
return Void();
|
||||||
|
}
|
||||||
manifest.rows = rows;
|
manifest.rows = rows;
|
||||||
Value data = encode(manifest);
|
Value data = encode(manifest);
|
||||||
wait(writeToFile(self, data));
|
wait(writeToFile(self, data));
|
||||||
|
@ -153,7 +156,8 @@ private:
|
||||||
state std::string fullPath;
|
state std::string fullPath;
|
||||||
|
|
||||||
std::tie(writer, fullPath) = self->blobConn_->createForWrite(MANIFEST_FOLDER);
|
std::tie(writer, fullPath) = self->blobConn_->createForWrite(MANIFEST_FOLDER);
|
||||||
state std::string fileName = format(MANIFEST_FOLDER "/manifest.%lld.%lld", self->epoch_, self->seqNo_);
|
state std::string fileName =
|
||||||
|
format(MANIFEST_FOLDER "/" MANIFEST_FOLDER ".%lld.%lld", self->epoch_, self->seqNo_);
|
||||||
state Reference<IBackupFile> file = wait(writer->writeFile(fileName));
|
state Reference<IBackupFile> file = wait(writer->writeFile(fileName));
|
||||||
wait(file->append(data.begin(), data.size()));
|
wait(file->append(data.begin(), data.size()));
|
||||||
wait(file->finish());
|
wait(file->finish());
|
||||||
|
@ -453,3 +457,26 @@ ACTOR Future<int64_t> lastBlobEpoc(Database db, Reference<BlobConnectionProvider
|
||||||
int64_t epoc = wait(BlobManifestLoader::lastBlobEpoc(loader));
|
int64_t epoc = wait(BlobManifestLoader::lastBlobEpoc(loader));
|
||||||
return epoc;
|
return epoc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Return true if the given key range is restoring
|
||||||
|
ACTOR Future<bool> isFullRestoreMode(Database db, KeyRangeRef keys) {
|
||||||
|
state Transaction tr(db);
|
||||||
|
loop {
|
||||||
|
tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
|
||||||
|
tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||||
|
tr.setOption(FDBTransactionOptions::LOCK_AWARE);
|
||||||
|
try {
|
||||||
|
RangeResult ranges = wait(tr.getRange(blobRestoreCommandKeys, CLIENT_KNOBS->TOO_MANY));
|
||||||
|
for (auto& r : ranges) {
|
||||||
|
KeyRange keyRange = decodeBlobRestoreCommandKeyFor(r.key);
|
||||||
|
if (keyRange.contains(keys)) {
|
||||||
|
Standalone<BlobRestoreStatus> status = decodeBlobRestoreStatus(r.value);
|
||||||
|
return status.progress < 100; // progress is less than 100
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
} catch (Error& e) {
|
||||||
|
wait(tr.onError(e));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -21,6 +21,7 @@
|
||||||
#include "flow/ActorCollection.h"
|
#include "flow/ActorCollection.h"
|
||||||
#include "flow/FastRef.h"
|
#include "flow/FastRef.h"
|
||||||
#include "flow/IRandom.h"
|
#include "flow/IRandom.h"
|
||||||
|
#include "flow/Trace.h"
|
||||||
#include "flow/flow.h"
|
#include "flow/flow.h"
|
||||||
#include "fdbclient/StorageServerInterface.h"
|
#include "fdbclient/StorageServerInterface.h"
|
||||||
#include "fdbclient/BlobConnectionProvider.h"
|
#include "fdbclient/BlobConnectionProvider.h"
|
||||||
|
@ -63,14 +64,7 @@ public:
|
||||||
|
|
||||||
// Start migration
|
// Start migration
|
||||||
ACTOR static Future<Void> start(Reference<BlobMigrator> self) {
|
ACTOR static Future<Void> start(Reference<BlobMigrator> self) {
|
||||||
if (!isFullRestoreMode()) {
|
wait(checkIfReadyForMigration(self));
|
||||||
return Void();
|
|
||||||
}
|
|
||||||
wait(delay(10)); // TODO need to wait for a signal for readiness of blob manager
|
|
||||||
|
|
||||||
BlobGranuleRestoreVersionVector granules = wait(listBlobGranules(self->db_, self->blobConn_));
|
|
||||||
self->blobGranules_ = granules;
|
|
||||||
|
|
||||||
wait(prepare(self, normalKeys));
|
wait(prepare(self, normalKeys));
|
||||||
wait(advanceVersion(self));
|
wait(advanceVersion(self));
|
||||||
wait(serverLoop(self));
|
wait(serverLoop(self));
|
||||||
|
@ -78,6 +72,28 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
// Check if blob manifest is loaded so that blob migration can start
|
||||||
|
ACTOR static Future<Void> checkIfReadyForMigration(Reference<BlobMigrator> self) {
|
||||||
|
loop {
|
||||||
|
bool isFullRestore = wait(isFullRestoreMode(self->db_, normalKeys));
|
||||||
|
if (isFullRestore) {
|
||||||
|
BlobGranuleRestoreVersionVector granules = wait(listBlobGranules(self->db_, self->blobConn_));
|
||||||
|
if (!granules.empty()) {
|
||||||
|
self->blobGranules_ = granules;
|
||||||
|
for (BlobGranuleRestoreVersion granule : granules) {
|
||||||
|
TraceEvent("RestorableGranule")
|
||||||
|
.detail("GranuleId", granule.granuleID.toString())
|
||||||
|
.detail("KeyRange", granule.keyRange.toString())
|
||||||
|
.detail("Version", granule.version)
|
||||||
|
.detail("SizeInBytes", granule.sizeInBytes);
|
||||||
|
}
|
||||||
|
return Void();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
wait(delay(SERVER_KNOBS->BLOB_MIGRATOR_CHECK_INTERVAL));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Prepare for data migration for given key range.
|
// Prepare for data migration for given key range.
|
||||||
ACTOR static Future<Void> prepare(Reference<BlobMigrator> self, KeyRangeRef keys) {
|
ACTOR static Future<Void> prepare(Reference<BlobMigrator> self, KeyRangeRef keys) {
|
||||||
// Register as a storage server, so that DataDistributor could start data movement after
|
// Register as a storage server, so that DataDistributor could start data movement after
|
||||||
|
@ -136,8 +152,9 @@ private:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (owning) {
|
if (owning) {
|
||||||
dprint("Unassign {} from storage server {}\n", keys.toString(), id.toString());
|
|
||||||
wait(krmSetRange(&tr, serverKeysPrefixFor(id), keys, serverKeysFalse));
|
wait(krmSetRange(&tr, serverKeysPrefixFor(id), keys, serverKeysFalse));
|
||||||
|
dprint("Unassign {} from storage server {}\n", keys.toString(), id.toString());
|
||||||
|
TraceEvent("UnassignKeys").detail("Keys", keys.toString()).detail("From", id.toString());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
wait(tr.commit());
|
wait(tr.commit());
|
||||||
|
@ -185,8 +202,10 @@ private:
|
||||||
// Calculated progress
|
// Calculated progress
|
||||||
int64_t total = sizeInBytes(self);
|
int64_t total = sizeInBytes(self);
|
||||||
int progress = (total - incompleted) * 100 / total;
|
int progress = (total - incompleted) * 100 / total;
|
||||||
bool done = incompleted == 0;
|
state bool done = incompleted == 0;
|
||||||
dprint("Progress {} :{}%. done {}\n", serverID.toString(), progress, done);
|
dprint("Migration progress :{}%. done {}\n", progress, done);
|
||||||
|
TraceEvent("BlobMigratorProgress").detail("Progress", progress).detail("Done", done);
|
||||||
|
wait(updateProgress(self, normalKeys, progress));
|
||||||
return done;
|
return done;
|
||||||
} catch (Error& e) {
|
} catch (Error& e) {
|
||||||
wait(tr.onError(e));
|
wait(tr.onError(e));
|
||||||
|
@ -194,6 +213,32 @@ private:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Update restore progress
|
||||||
|
ACTOR static Future<Void> updateProgress(Reference<BlobMigrator> self, KeyRangeRef range, int progress) {
|
||||||
|
state Transaction tr(self->db_);
|
||||||
|
loop {
|
||||||
|
try {
|
||||||
|
tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
|
||||||
|
tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||||
|
tr.setOption(FDBTransactionOptions::LOCK_AWARE);
|
||||||
|
state Key key = blobRestoreCommandKeyFor(range);
|
||||||
|
Optional<Value> value = wait(tr.get(key));
|
||||||
|
if (value.present()) {
|
||||||
|
Standalone<BlobRestoreStatus> status = decodeBlobRestoreStatus(value.get());
|
||||||
|
if (progress > status.progress) {
|
||||||
|
status.progress = progress;
|
||||||
|
Value updatedValue = blobRestoreCommandValueFor(status);
|
||||||
|
tr.set(key, updatedValue);
|
||||||
|
wait(tr.commit());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return Void();
|
||||||
|
} catch (Error& e) {
|
||||||
|
wait(tr.onError(e));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Advance version, so that future commits will have a larger version than the restored data
|
// Advance version, so that future commits will have a larger version than the restored data
|
||||||
ACTOR static Future<Void> advanceVersion(Reference<BlobMigrator> self) {
|
ACTOR static Future<Void> advanceVersion(Reference<BlobMigrator> self) {
|
||||||
state Transaction tr(self->db_);
|
state Transaction tr(self->db_);
|
||||||
|
@ -207,6 +252,7 @@ private:
|
||||||
if (currentVersion <= expectedVersion) {
|
if (currentVersion <= expectedVersion) {
|
||||||
tr.set(minRequiredCommitVersionKey, BinaryWriter::toValue(expectedVersion + 1, Unversioned()));
|
tr.set(minRequiredCommitVersionKey, BinaryWriter::toValue(expectedVersion + 1, Unversioned()));
|
||||||
dprint("Advance version from {} to {}\n", currentVersion, expectedVersion);
|
dprint("Advance version from {} to {}\n", currentVersion, expectedVersion);
|
||||||
|
TraceEvent("AdvanceVersion").detail("Current", currentVersion).detail("New", expectedVersion);
|
||||||
wait(tr.commit());
|
wait(tr.commit());
|
||||||
}
|
}
|
||||||
return Void();
|
return Void();
|
||||||
|
@ -218,7 +264,7 @@ private:
|
||||||
|
|
||||||
// Main server loop
|
// Main server loop
|
||||||
ACTOR static Future<Void> serverLoop(Reference<BlobMigrator> self) {
|
ACTOR static Future<Void> serverLoop(Reference<BlobMigrator> self) {
|
||||||
self->actors_.add(waitFailureServer(self->interf_.ssi.waitFailure.getFuture()));
|
self->actors_.add(waitFailureServer(self->interf_.waitFailure.getFuture()));
|
||||||
self->actors_.add(logProgress(self));
|
self->actors_.add(logProgress(self));
|
||||||
self->actors_.add(handleRequest(self));
|
self->actors_.add(handleRequest(self));
|
||||||
self->actors_.add(handleUnsupportedRequest(self));
|
self->actors_.add(handleUnsupportedRequest(self));
|
||||||
|
@ -226,6 +272,7 @@ private:
|
||||||
try {
|
try {
|
||||||
choose {
|
choose {
|
||||||
when(HaltBlobMigratorRequest req = waitNext(self->interf_.haltBlobMigrator.getFuture())) {
|
when(HaltBlobMigratorRequest req = waitNext(self->interf_.haltBlobMigrator.getFuture())) {
|
||||||
|
dprint("Stopping blob migrator {}\n", self->interf_.id().toString());
|
||||||
req.reply.send(Void());
|
req.reply.send(Void());
|
||||||
TraceEvent("BlobMigratorHalted", self->interf_.id()).detail("ReqID", req.requesterID);
|
TraceEvent("BlobMigratorHalted", self->interf_.id()).detail("ReqID", req.requesterID);
|
||||||
break;
|
break;
|
||||||
|
@ -237,6 +284,8 @@ private:
|
||||||
throw;
|
throw;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
self->actors_.clear(true);
|
||||||
|
dprint("Stopped blob migrator {}\n", self->interf_.id().toString());
|
||||||
return Void();
|
return Void();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -267,7 +316,7 @@ private:
|
||||||
req.reply.send(rep);
|
req.reply.send(rep);
|
||||||
}
|
}
|
||||||
when(GetStorageMetricsRequest req = waitNext(ssi.getStorageMetrics.getFuture())) {
|
when(GetStorageMetricsRequest req = waitNext(ssi.getStorageMetrics.getFuture())) {
|
||||||
fmt::print("Handle GetStorageMetrics\n");
|
// fmt::print("Handle GetStorageMetrics\n");
|
||||||
StorageMetrics metrics;
|
StorageMetrics metrics;
|
||||||
metrics.bytes = sizeInBytes(self);
|
metrics.bytes = sizeInBytes(self);
|
||||||
GetStorageMetricsReply resp;
|
GetStorageMetricsReply resp;
|
||||||
|
@ -331,7 +380,7 @@ private:
|
||||||
req.reply.sendError(unsupported_operation());
|
req.reply.sendError(unsupported_operation());
|
||||||
}
|
}
|
||||||
when(UpdateCommitCostRequest req = waitNext(ssi.updateCommitCostRequest.getFuture())) {
|
when(UpdateCommitCostRequest req = waitNext(ssi.updateCommitCostRequest.getFuture())) {
|
||||||
dprint("Unsupported UpdateCommitCostRequest\n");
|
// dprint("Unsupported UpdateCommitCostRequest\n");
|
||||||
req.reply.sendError(unsupported_operation());
|
req.reply.sendError(unsupported_operation());
|
||||||
}
|
}
|
||||||
when(FetchCheckpointKeyValuesRequest req = waitNext(ssi.fetchCheckpointKeyValues.getFuture())) {
|
when(FetchCheckpointKeyValuesRequest req = waitNext(ssi.fetchCheckpointKeyValues.getFuture())) {
|
||||||
|
@ -358,9 +407,9 @@ private:
|
||||||
}
|
}
|
||||||
|
|
||||||
ACTOR static Future<Void> processStorageQueuingMetricsRequest(StorageQueuingMetricsRequest req) {
|
ACTOR static Future<Void> processStorageQueuingMetricsRequest(StorageQueuingMetricsRequest req) {
|
||||||
dprint("Unsupported StorageQueuingMetricsRequest\n");
|
// dprint("Unsupported StorageQueuingMetricsRequest\n");
|
||||||
// FIXME get rid of this delay. it's a temp solution to avoid starvaion scheduling of DD
|
// FIXME get rid of this delay. it's a temp solution to avoid starvaion scheduling of DD
|
||||||
// processes
|
// processes
|
||||||
wait(delay(1));
|
wait(delay(1));
|
||||||
req.reply.sendError(unsupported_operation());
|
req.reply.sendError(unsupported_operation());
|
||||||
return Void();
|
return Void();
|
||||||
|
@ -398,7 +447,8 @@ private:
|
||||||
|
|
||||||
// Main entry point
|
// Main entry point
|
||||||
ACTOR Future<Void> blobMigrator(BlobMigratorInterface interf, Reference<AsyncVar<ServerDBInfo> const> dbInfo) {
|
ACTOR Future<Void> blobMigrator(BlobMigratorInterface interf, Reference<AsyncVar<ServerDBInfo> const> dbInfo) {
|
||||||
fmt::print("Start blob migrator {} \n", interf.id().toString());
|
TraceEvent("StartBlobMigrator").detail("Interface", interf.id().toString());
|
||||||
|
dprint("Starting blob migrator {}\n", interf.id().toString());
|
||||||
try {
|
try {
|
||||||
Reference<BlobMigrator> self = makeReference<BlobMigrator>(dbInfo, interf);
|
Reference<BlobMigrator> self = makeReference<BlobMigrator>(dbInfo, interf);
|
||||||
wait(BlobMigrator::start(self));
|
wait(BlobMigrator::start(self));
|
||||||
|
|
|
@ -292,6 +292,8 @@ struct BlobWorkerData : NonCopyable, ReferenceCounted<BlobWorkerData> {
|
||||||
int64_t lastResidentMemory = 0;
|
int64_t lastResidentMemory = 0;
|
||||||
double lastResidentMemoryCheckTime = -100.0;
|
double lastResidentMemoryCheckTime = -100.0;
|
||||||
|
|
||||||
|
bool isFullRestoreMode = false;
|
||||||
|
|
||||||
BlobWorkerData(UID id, Reference<AsyncVar<ServerDBInfo> const> dbInfo, Database db)
|
BlobWorkerData(UID id, Reference<AsyncVar<ServerDBInfo> const> dbInfo, Database db)
|
||||||
: id(id), db(db), tenantData(BGTenantMap(dbInfo)), dbInfo(dbInfo),
|
: id(id), db(db), tenantData(BGTenantMap(dbInfo)), dbInfo(dbInfo),
|
||||||
initialSnapshotLock(new FlowLock(SERVER_KNOBS->BLOB_WORKER_INITIAL_SNAPSHOT_PARALLELISM)),
|
initialSnapshotLock(new FlowLock(SERVER_KNOBS->BLOB_WORKER_INITIAL_SNAPSHOT_PARALLELISM)),
|
||||||
|
@ -2146,7 +2148,7 @@ ACTOR Future<Void> blobGranuleUpdateFiles(Reference<BlobWorkerData> bwData,
|
||||||
}
|
}
|
||||||
|
|
||||||
// No need to start Change Feed in full restore mode
|
// No need to start Change Feed in full restore mode
|
||||||
if (isFullRestoreMode())
|
if (bwData->isFullRestoreMode)
|
||||||
return Void();
|
return Void();
|
||||||
|
|
||||||
checkMergeCandidate = granuleCheckMergeCandidate(bwData,
|
checkMergeCandidate = granuleCheckMergeCandidate(bwData,
|
||||||
|
@ -3588,7 +3590,7 @@ ACTOR Future<Void> doBlobGranuleFileRequest(Reference<BlobWorkerData> bwData, Bl
|
||||||
state Reference<GranuleMetadata> metadata = m;
|
state Reference<GranuleMetadata> metadata = m;
|
||||||
// state Version granuleBeginVersion = req.beginVersion;
|
// state Version granuleBeginVersion = req.beginVersion;
|
||||||
// skip waiting for CF ready for recovery mode
|
// skip waiting for CF ready for recovery mode
|
||||||
if (!isFullRestoreMode()) {
|
if (!bwData->isFullRestoreMode) {
|
||||||
choose {
|
choose {
|
||||||
when(wait(metadata->readable.getFuture())) {}
|
when(wait(metadata->readable.getFuture())) {}
|
||||||
when(wait(metadata->cancelled.getFuture())) { throw wrong_shard_server(); }
|
when(wait(metadata->cancelled.getFuture())) { throw wrong_shard_server(); }
|
||||||
|
@ -3646,7 +3648,7 @@ ACTOR Future<Void> doBlobGranuleFileRequest(Reference<BlobWorkerData> bwData, Bl
|
||||||
// this is an active granule query
|
// this is an active granule query
|
||||||
loop {
|
loop {
|
||||||
// skip check since CF doesn't start for bare metal recovery mode
|
// skip check since CF doesn't start for bare metal recovery mode
|
||||||
if (isFullRestoreMode()) {
|
if (bwData->isFullRestoreMode) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (!metadata->activeCFData.get().isValid() || !metadata->cancelled.canBeSet()) {
|
if (!metadata->activeCFData.get().isValid() || !metadata->cancelled.canBeSet()) {
|
||||||
|
@ -3689,7 +3691,7 @@ ACTOR Future<Void> doBlobGranuleFileRequest(Reference<BlobWorkerData> bwData, Bl
|
||||||
// if feed was popped by another worker and BW only got empty versions, it wouldn't itself see that it
|
// if feed was popped by another worker and BW only got empty versions, it wouldn't itself see that it
|
||||||
// got popped, but we can still reject the in theory this should never happen with other protections but
|
// got popped, but we can still reject the in theory this should never happen with other protections but
|
||||||
// it's a useful and inexpensive sanity check
|
// it's a useful and inexpensive sanity check
|
||||||
if (!isFullRestoreMode()) {
|
if (!bwData->isFullRestoreMode) {
|
||||||
Version emptyVersion = metadata->activeCFData.get()->popVersion - 1;
|
Version emptyVersion = metadata->activeCFData.get()->popVersion - 1;
|
||||||
if (req.readVersion > metadata->durableDeltaVersion.get() &&
|
if (req.readVersion > metadata->durableDeltaVersion.get() &&
|
||||||
emptyVersion > metadata->bufferedDeltaVersion) {
|
emptyVersion > metadata->bufferedDeltaVersion) {
|
||||||
|
@ -3995,6 +3997,9 @@ ACTOR Future<GranuleStartState> openGranule(Reference<BlobWorkerData> bwData, As
|
||||||
throw granule_assignment_conflict();
|
throw granule_assignment_conflict();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool isFullRestore = wait(isFullRestoreMode(bwData->db, req.keyRange));
|
||||||
|
bwData->isFullRestoreMode = isFullRestore;
|
||||||
|
|
||||||
Optional<Value> prevLockValue = wait(fLockValue);
|
Optional<Value> prevLockValue = wait(fLockValue);
|
||||||
state bool hasPrevOwner = prevLockValue.present();
|
state bool hasPrevOwner = prevLockValue.present();
|
||||||
state bool createChangeFeed = false;
|
state bool createChangeFeed = false;
|
||||||
|
@ -4069,7 +4074,7 @@ ACTOR Future<GranuleStartState> openGranule(Reference<BlobWorkerData> bwData, As
|
||||||
}
|
}
|
||||||
|
|
||||||
// for recovery mode - don't create change feed, don't create snapshot
|
// for recovery mode - don't create change feed, don't create snapshot
|
||||||
if (isFullRestoreMode()) {
|
if (bwData->isFullRestoreMode) {
|
||||||
createChangeFeed = false;
|
createChangeFeed = false;
|
||||||
info.doSnapshot = false;
|
info.doSnapshot = false;
|
||||||
GranuleFiles granuleFiles = wait(loadPreviousFiles(&tr, info.granuleID));
|
GranuleFiles granuleFiles = wait(loadPreviousFiles(&tr, info.granuleID));
|
||||||
|
@ -4091,7 +4096,7 @@ ACTOR Future<GranuleStartState> openGranule(Reference<BlobWorkerData> bwData, As
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (createChangeFeed && !isFullRestoreMode()) {
|
if (createChangeFeed && !bwData->isFullRestoreMode) {
|
||||||
// create new change feed for new version of granule
|
// create new change feed for new version of granule
|
||||||
wait(updateChangeFeed(
|
wait(updateChangeFeed(
|
||||||
&tr, granuleIDToCFKey(info.granuleID), ChangeFeedStatus::CHANGE_FEED_CREATE, req.keyRange));
|
&tr, granuleIDToCFKey(info.granuleID), ChangeFeedStatus::CHANGE_FEED_CREATE, req.keyRange));
|
||||||
|
@ -4103,7 +4108,8 @@ ACTOR Future<GranuleStartState> openGranule(Reference<BlobWorkerData> bwData, As
|
||||||
// If anything in previousGranules, need to do the handoff logic and set
|
// If anything in previousGranules, need to do the handoff logic and set
|
||||||
// ret.previousChangeFeedId, and the previous durable version will come from the previous
|
// ret.previousChangeFeedId, and the previous durable version will come from the previous
|
||||||
// granules
|
// granules
|
||||||
if (info.history.present() && info.history.get().value.parentVersions.size() > 0 && !isFullRestoreMode()) {
|
if (info.history.present() && info.history.get().value.parentVersions.size() > 0 &&
|
||||||
|
!bwData->isFullRestoreMode) {
|
||||||
CODE_PROBE(true, "Granule open found parent");
|
CODE_PROBE(true, "Granule open found parent");
|
||||||
if (info.history.get().value.parentVersions.size() == 1) { // split
|
if (info.history.get().value.parentVersions.size() == 1) { // split
|
||||||
state KeyRangeRef parentRange(info.history.get().value.parentBoundaries[0],
|
state KeyRangeRef parentRange(info.history.get().value.parentBoundaries[0],
|
||||||
|
|
|
@ -23,6 +23,7 @@
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <set>
|
#include <set>
|
||||||
|
#include <tuple>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "fdbclient/FDBTypes.h"
|
#include "fdbclient/FDBTypes.h"
|
||||||
|
@ -691,7 +692,7 @@ void checkBetterSingletons(ClusterControllerData* self) {
|
||||||
WorkerDetails newMGWorker;
|
WorkerDetails newMGWorker;
|
||||||
if (self->db.blobGranulesEnabled.get()) {
|
if (self->db.blobGranulesEnabled.get()) {
|
||||||
newBMWorker = findNewProcessForSingleton(self, ProcessClass::BlobManager, id_used);
|
newBMWorker = findNewProcessForSingleton(self, ProcessClass::BlobManager, id_used);
|
||||||
if (isFullRestoreMode()) {
|
if (self->db.blobRestoreEnabled.get()) {
|
||||||
newMGWorker = findNewProcessForSingleton(self, ProcessClass::BlobMigrator, id_used);
|
newMGWorker = findNewProcessForSingleton(self, ProcessClass::BlobMigrator, id_used);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -710,7 +711,7 @@ void checkBetterSingletons(ClusterControllerData* self) {
|
||||||
ProcessClass::Fitness bestFitnessForMG;
|
ProcessClass::Fitness bestFitnessForMG;
|
||||||
if (self->db.blobGranulesEnabled.get()) {
|
if (self->db.blobGranulesEnabled.get()) {
|
||||||
bestFitnessForBM = findBestFitnessForSingleton(self, newBMWorker, ProcessClass::BlobManager);
|
bestFitnessForBM = findBestFitnessForSingleton(self, newBMWorker, ProcessClass::BlobManager);
|
||||||
if (isFullRestoreMode()) {
|
if (self->db.blobRestoreEnabled.get()) {
|
||||||
bestFitnessForMG = findBestFitnessForSingleton(self, newMGWorker, ProcessClass::BlobManager);
|
bestFitnessForMG = findBestFitnessForSingleton(self, newMGWorker, ProcessClass::BlobManager);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -744,7 +745,7 @@ void checkBetterSingletons(ClusterControllerData* self) {
|
||||||
if (self->db.blobGranulesEnabled.get()) {
|
if (self->db.blobGranulesEnabled.get()) {
|
||||||
bmHealthy = isHealthySingleton<BlobManagerInterface>(
|
bmHealthy = isHealthySingleton<BlobManagerInterface>(
|
||||||
self, newBMWorker, bmSingleton, bestFitnessForBM, self->recruitingBlobManagerID);
|
self, newBMWorker, bmSingleton, bestFitnessForBM, self->recruitingBlobManagerID);
|
||||||
if (isFullRestoreMode()) {
|
if (self->db.blobRestoreEnabled.get()) {
|
||||||
mgHealthy = isHealthySingleton<BlobMigratorInterface>(
|
mgHealthy = isHealthySingleton<BlobMigratorInterface>(
|
||||||
self, newMGWorker, mgSingleton, bestFitnessForMG, self->recruitingBlobMigratorID);
|
self, newMGWorker, mgSingleton, bestFitnessForMG, self->recruitingBlobMigratorID);
|
||||||
}
|
}
|
||||||
|
@ -775,7 +776,7 @@ void checkBetterSingletons(ClusterControllerData* self) {
|
||||||
if (self->db.blobGranulesEnabled.get()) {
|
if (self->db.blobGranulesEnabled.get()) {
|
||||||
currBMProcessId = bmSingleton.interface.get().locality.processId();
|
currBMProcessId = bmSingleton.interface.get().locality.processId();
|
||||||
newBMProcessId = newBMWorker.interf.locality.processId();
|
newBMProcessId = newBMWorker.interf.locality.processId();
|
||||||
if (isFullRestoreMode()) {
|
if (self->db.blobRestoreEnabled.get()) {
|
||||||
currMGProcessId = mgSingleton.interface.get().locality.processId();
|
currMGProcessId = mgSingleton.interface.get().locality.processId();
|
||||||
newMGProcessId = newMGWorker.interf.locality.processId();
|
newMGProcessId = newMGWorker.interf.locality.processId();
|
||||||
}
|
}
|
||||||
|
@ -792,7 +793,7 @@ void checkBetterSingletons(ClusterControllerData* self) {
|
||||||
if (self->db.blobGranulesEnabled.get()) {
|
if (self->db.blobGranulesEnabled.get()) {
|
||||||
currPids.emplace_back(currBMProcessId);
|
currPids.emplace_back(currBMProcessId);
|
||||||
newPids.emplace_back(newBMProcessId);
|
newPids.emplace_back(newBMProcessId);
|
||||||
if (isFullRestoreMode()) {
|
if (self->db.blobRestoreEnabled.get()) {
|
||||||
currPids.emplace_back(currMGProcessId);
|
currPids.emplace_back(currMGProcessId);
|
||||||
newPids.emplace_back(newMGProcessId);
|
newPids.emplace_back(newMGProcessId);
|
||||||
}
|
}
|
||||||
|
@ -810,7 +811,7 @@ void checkBetterSingletons(ClusterControllerData* self) {
|
||||||
if (!self->db.blobGranulesEnabled.get()) {
|
if (!self->db.blobGranulesEnabled.get()) {
|
||||||
ASSERT(currColocMap[currBMProcessId] == 0);
|
ASSERT(currColocMap[currBMProcessId] == 0);
|
||||||
ASSERT(newColocMap[newBMProcessId] == 0);
|
ASSERT(newColocMap[newBMProcessId] == 0);
|
||||||
if (isFullRestoreMode()) {
|
if (self->db.blobRestoreEnabled.get()) {
|
||||||
ASSERT(currColocMap[currMGProcessId] == 0);
|
ASSERT(currColocMap[currMGProcessId] == 0);
|
||||||
ASSERT(newColocMap[newMGProcessId] == 0);
|
ASSERT(newColocMap[newMGProcessId] == 0);
|
||||||
}
|
}
|
||||||
|
@ -836,7 +837,7 @@ void checkBetterSingletons(ClusterControllerData* self) {
|
||||||
ddSingleton.recruit(self);
|
ddSingleton.recruit(self);
|
||||||
} else if (self->db.blobGranulesEnabled.get() && newColocMap[newBMProcessId] < currColocMap[currBMProcessId]) {
|
} else if (self->db.blobGranulesEnabled.get() && newColocMap[newBMProcessId] < currColocMap[currBMProcessId]) {
|
||||||
bmSingleton.recruit(self);
|
bmSingleton.recruit(self);
|
||||||
} else if (self->db.blobGranulesEnabled.get() && isFullRestoreMode() &&
|
} else if (self->db.blobGranulesEnabled.get() && self->db.blobRestoreEnabled.get() &&
|
||||||
newColocMap[newMGProcessId] < currColocMap[currMGProcessId]) {
|
newColocMap[newMGProcessId] < currColocMap[currMGProcessId]) {
|
||||||
mgSingleton.recruit(self);
|
mgSingleton.recruit(self);
|
||||||
} else if (SERVER_KNOBS->ENABLE_ENCRYPTION && newColocMap[newEKPProcessId] < currColocMap[currEKPProcessId]) {
|
} else if (SERVER_KNOBS->ENABLE_ENCRYPTION && newColocMap[newEKPProcessId] < currColocMap[currEKPProcessId]) {
|
||||||
|
@ -1404,13 +1405,13 @@ ACTOR Future<Void> registerWorker(RegisterWorkerRequest req,
|
||||||
self, w, currSingleton, registeringSingleton, self->recruitingRatekeeperID);
|
self, w, currSingleton, registeringSingleton, self->recruitingRatekeeperID);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (self->db.blobGranulesEnabled.get() && isFullRestoreMode() && req.blobManagerInterf.present()) {
|
if (self->db.blobGranulesEnabled.get() && req.blobManagerInterf.present()) {
|
||||||
auto currSingleton = BlobManagerSingleton(self->db.serverInfo->get().blobManager);
|
auto currSingleton = BlobManagerSingleton(self->db.serverInfo->get().blobManager);
|
||||||
auto registeringSingleton = BlobManagerSingleton(req.blobManagerInterf);
|
auto registeringSingleton = BlobManagerSingleton(req.blobManagerInterf);
|
||||||
haltRegisteringOrCurrentSingleton<BlobManagerInterface>(
|
haltRegisteringOrCurrentSingleton<BlobManagerInterface>(
|
||||||
self, w, currSingleton, registeringSingleton, self->recruitingBlobManagerID);
|
self, w, currSingleton, registeringSingleton, self->recruitingBlobManagerID);
|
||||||
}
|
}
|
||||||
if (req.blobMigratorInterf.present()) {
|
if (req.blobMigratorInterf.present() && self->db.blobRestoreEnabled.get()) {
|
||||||
auto currSingleton = BlobMigratorSingleton(self->db.serverInfo->get().blobMigrator);
|
auto currSingleton = BlobMigratorSingleton(self->db.serverInfo->get().blobMigrator);
|
||||||
auto registeringSingleton = BlobMigratorSingleton(req.blobMigratorInterf);
|
auto registeringSingleton = BlobMigratorSingleton(req.blobMigratorInterf);
|
||||||
haltRegisteringOrCurrentSingleton<BlobMigratorInterface>(
|
haltRegisteringOrCurrentSingleton<BlobMigratorInterface>(
|
||||||
|
@ -2553,6 +2554,43 @@ ACTOR Future<int64_t> getNextBMEpoch(ClusterControllerData* self) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ACTOR Future<Void> watchBlobRestoreCommand(ClusterControllerData* self) {
|
||||||
|
state Reference<ReadYourWritesTransaction> tr = makeReference<ReadYourWritesTransaction>(self->cx);
|
||||||
|
state Key blobRestoreCommandKey = blobRestoreCommandKeyFor(normalKeys);
|
||||||
|
loop {
|
||||||
|
try {
|
||||||
|
tr->reset();
|
||||||
|
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||||
|
tr->setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
|
||||||
|
Optional<Value> blobRestoreCommand = wait(tr->get(blobRestoreCommandKey));
|
||||||
|
if (blobRestoreCommand.present()) {
|
||||||
|
Standalone<BlobRestoreStatus> status = decodeBlobRestoreStatus(blobRestoreCommand.get());
|
||||||
|
TraceEvent("WatchBlobRestoreCommand").detail("Progress", status.progress);
|
||||||
|
if (status.progress == 0) {
|
||||||
|
self->db.blobRestoreEnabled.set(true);
|
||||||
|
if (self->db.blobGranulesEnabled.get()) {
|
||||||
|
const auto& blobManager = self->db.serverInfo->get().blobManager;
|
||||||
|
if (blobManager.present()) {
|
||||||
|
BlobManagerSingleton(blobManager)
|
||||||
|
.haltBlobGranules(self, blobManager.get().locality.processId());
|
||||||
|
}
|
||||||
|
const auto& blobMigrator = self->db.serverInfo->get().blobMigrator;
|
||||||
|
if (blobMigrator.present()) {
|
||||||
|
BlobMigratorSingleton(blobMigrator).halt(self, blobMigrator.get().locality.processId());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
state Future<Void> watch = tr->watch(blobRestoreCommandKey);
|
||||||
|
wait(tr->commit());
|
||||||
|
wait(watch);
|
||||||
|
} catch (Error& e) {
|
||||||
|
wait(tr->onError(e));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
ACTOR Future<Void> startBlobMigrator(ClusterControllerData* self, double waitTime) {
|
ACTOR Future<Void> startBlobMigrator(ClusterControllerData* self, double waitTime) {
|
||||||
// If master fails at the same time, give it a chance to clear master PID.
|
// If master fails at the same time, give it a chance to clear master PID.
|
||||||
// Also wait to avoid too many consecutive recruits in a small time window.
|
// Also wait to avoid too many consecutive recruits in a small time window.
|
||||||
|
@ -2629,9 +2667,8 @@ ACTOR Future<Void> monitorBlobMigrator(ClusterControllerData* self) {
|
||||||
}
|
}
|
||||||
loop {
|
loop {
|
||||||
if (self->db.serverInfo->get().blobMigrator.present() && !self->recruitBlobMigrator.get()) {
|
if (self->db.serverInfo->get().blobMigrator.present() && !self->recruitBlobMigrator.get()) {
|
||||||
state Future<Void> wfClient =
|
state Future<Void> wfClient = waitFailureClient(self->db.serverInfo->get().blobMigrator.get().waitFailure,
|
||||||
waitFailureClient(self->db.serverInfo->get().blobMigrator.get().ssi.waitFailure,
|
SERVER_KNOBS->BLOB_MIGRATOR_FAILURE_TIME);
|
||||||
SERVER_KNOBS->BLOB_MIGRATOR_FAILURE_TIME);
|
|
||||||
loop {
|
loop {
|
||||||
choose {
|
choose {
|
||||||
when(wait(wfClient)) {
|
when(wait(wfClient)) {
|
||||||
|
@ -2643,11 +2680,11 @@ ACTOR Future<Void> monitorBlobMigrator(ClusterControllerData* self) {
|
||||||
when(wait(self->recruitBlobMigrator.onChange())) {}
|
when(wait(self->recruitBlobMigrator.onChange())) {}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if (self->db.blobGranulesEnabled.get() && isFullRestoreMode()) {
|
} else if (self->db.blobGranulesEnabled.get() && self->db.blobRestoreEnabled.get()) {
|
||||||
// if there is no blob migrator present but blob granules are now enabled, recruit a BM
|
// if there is no blob migrator present but blob granules are now enabled, recruit a BM
|
||||||
wait(startBlobMigrator(self, recruitThrottler.newRecruitment()));
|
wait(startBlobMigrator(self, recruitThrottler.newRecruitment()));
|
||||||
} else {
|
} else {
|
||||||
wait(self->db.blobGranulesEnabled.onChange());
|
wait(self->db.blobGranulesEnabled.onChange() || self->db.blobRestoreEnabled.onChange());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2778,7 +2815,7 @@ ACTOR Future<Void> monitorBlobManager(ClusterControllerData* self) {
|
||||||
const auto& blobManager = self->db.serverInfo->get().blobManager;
|
const auto& blobManager = self->db.serverInfo->get().blobManager;
|
||||||
BlobManagerSingleton(blobManager)
|
BlobManagerSingleton(blobManager)
|
||||||
.haltBlobGranules(self, blobManager.get().locality.processId());
|
.haltBlobGranules(self, blobManager.get().locality.processId());
|
||||||
if (isFullRestoreMode()) {
|
if (self->db.blobRestoreEnabled.get()) {
|
||||||
const auto& blobMigrator = self->db.serverInfo->get().blobMigrator;
|
const auto& blobMigrator = self->db.serverInfo->get().blobMigrator;
|
||||||
BlobMigratorSingleton(blobMigrator).halt(self, blobMigrator.get().locality.processId());
|
BlobMigratorSingleton(blobMigrator).halt(self, blobMigrator.get().locality.processId());
|
||||||
}
|
}
|
||||||
|
@ -3079,8 +3116,9 @@ ACTOR Future<Void> clusterControllerCore(ClusterControllerFullInterface interf,
|
||||||
self.addActor.send(monitorDataDistributor(&self));
|
self.addActor.send(monitorDataDistributor(&self));
|
||||||
self.addActor.send(monitorRatekeeper(&self));
|
self.addActor.send(monitorRatekeeper(&self));
|
||||||
self.addActor.send(monitorBlobManager(&self));
|
self.addActor.send(monitorBlobManager(&self));
|
||||||
self.addActor.send(monitorBlobMigrator(&self));
|
|
||||||
self.addActor.send(watchBlobGranulesConfigKey(&self));
|
self.addActor.send(watchBlobGranulesConfigKey(&self));
|
||||||
|
self.addActor.send(monitorBlobMigrator(&self));
|
||||||
|
self.addActor.send(watchBlobRestoreCommand(&self));
|
||||||
self.addActor.send(monitorConsistencyScan(&self));
|
self.addActor.send(monitorConsistencyScan(&self));
|
||||||
self.addActor.send(metaclusterMetricsUpdater(&self));
|
self.addActor.send(metaclusterMetricsUpdater(&self));
|
||||||
self.addActor.send(dbInfoUpdater(&self));
|
self.addActor.send(dbInfoUpdater(&self));
|
||||||
|
|
|
@ -414,7 +414,8 @@ ACTOR Future<Void> commitBatcher(ProxyCommitData* commitData,
|
||||||
}
|
}
|
||||||
|
|
||||||
Optional<TenantNameRef> const& tenantName = req.tenantInfo.name;
|
Optional<TenantNameRef> const& tenantName = req.tenantInfo.name;
|
||||||
if (tenantName.present() && commitData->tenantsOverStorageQuota.count(tenantName.get()) > 0) {
|
if (SERVER_KNOBS->STORAGE_QUOTA_ENABLED && tenantName.present() &&
|
||||||
|
commitData->tenantsOverStorageQuota.count(tenantName.get()) > 0) {
|
||||||
req.reply.sendError(storage_quota_exceeded());
|
req.reply.sendError(storage_quota_exceeded());
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
@ -829,7 +830,7 @@ ACTOR Future<Void> preresolutionProcessing(CommitBatchContext* self) {
|
||||||
SERVER_KNOBS->PROXY_REJECT_BATCH_QUEUED_TOO_LONG && canReject(trs)) {
|
SERVER_KNOBS->PROXY_REJECT_BATCH_QUEUED_TOO_LONG && canReject(trs)) {
|
||||||
// Disabled for the recovery transaction. otherwise, recovery can't finish and keeps doing more recoveries.
|
// Disabled for the recovery transaction. otherwise, recovery can't finish and keeps doing more recoveries.
|
||||||
CODE_PROBE(true, "Reject transactions in the batch");
|
CODE_PROBE(true, "Reject transactions in the batch");
|
||||||
TraceEvent(SevWarnAlways, "ProxyReject", pProxyCommitData->dbgid)
|
TraceEvent(g_network->isSimulated() ? SevInfo : SevWarnAlways, "ProxyReject", pProxyCommitData->dbgid)
|
||||||
.suppressFor(0.1)
|
.suppressFor(0.1)
|
||||||
.detail("QDelay", queuingDelay)
|
.detail("QDelay", queuingDelay)
|
||||||
.detail("Transactions", trs.size())
|
.detail("Transactions", trs.size())
|
||||||
|
@ -2971,7 +2972,9 @@ ACTOR Future<Void> commitProxyServerCore(CommitProxyInterface proxy,
|
||||||
proxy.expireIdempotencyId,
|
proxy.expireIdempotencyId,
|
||||||
commitData.expectedIdempotencyIdCountForKey,
|
commitData.expectedIdempotencyIdCountForKey,
|
||||||
&commitData.idempotencyClears));
|
&commitData.idempotencyClears));
|
||||||
addActor.send(monitorTenantsOverStorageQuota(proxy.id(), db, &commitData));
|
if (SERVER_KNOBS->STORAGE_QUOTA_ENABLED) {
|
||||||
|
addActor.send(monitorTenantsOverStorageQuota(proxy.id(), db, &commitData));
|
||||||
|
}
|
||||||
|
|
||||||
// wait for txnStateStore recovery
|
// wait for txnStateStore recovery
|
||||||
wait(success(commitData.txnStateStore->readValue(StringRef())));
|
wait(success(commitData.txnStateStore->readValue(StringRef())));
|
||||||
|
|
|
@ -1423,6 +1423,7 @@ ACTOR Future<Void> dataDistributionRelocator(DDQueue* self,
|
||||||
state double startTime = now();
|
state double startTime = now();
|
||||||
state std::vector<UID> destIds;
|
state std::vector<UID> destIds;
|
||||||
state uint64_t debugID = deterministicRandom()->randomUInt64();
|
state uint64_t debugID = deterministicRandom()->randomUInt64();
|
||||||
|
state bool enableShardMove = SERVER_KNOBS->SHARD_ENCODE_LOCATION_METADATA && SERVER_KNOBS->ENABLE_DD_PHYSICAL_SHARD;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
if (now() - self->lastInterval < 1.0) {
|
if (now() - self->lastInterval < 1.0) {
|
||||||
|
@ -1539,8 +1540,7 @@ ACTOR Future<Void> dataDistributionRelocator(DDQueue* self,
|
||||||
req.src = rd.src;
|
req.src = rd.src;
|
||||||
req.completeSources = rd.completeSources;
|
req.completeSources = rd.completeSources;
|
||||||
|
|
||||||
if (SERVER_KNOBS->SHARD_ENCODE_LOCATION_METADATA && SERVER_KNOBS->ENABLE_DD_PHYSICAL_SHARD &&
|
if (enableShardMove && tciIndex == 1) {
|
||||||
tciIndex == 1) {
|
|
||||||
ASSERT(physicalShardIDCandidate != UID().first() &&
|
ASSERT(physicalShardIDCandidate != UID().first() &&
|
||||||
physicalShardIDCandidate != anonymousShardId.first());
|
physicalShardIDCandidate != anonymousShardId.first());
|
||||||
Optional<ShardsAffectedByTeamFailure::Team> remoteTeamWithPhysicalShard =
|
Optional<ShardsAffectedByTeamFailure::Team> remoteTeamWithPhysicalShard =
|
||||||
|
@ -1587,64 +1587,58 @@ ACTOR Future<Void> dataDistributionRelocator(DDQueue* self,
|
||||||
anyWithSource = true;
|
anyWithSource = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (SERVER_KNOBS->SHARD_ENCODE_LOCATION_METADATA && SERVER_KNOBS->ENABLE_DD_PHYSICAL_SHARD) {
|
if (enableShardMove) {
|
||||||
// critical to the correctness of team selection by PhysicalShardCollection
|
|
||||||
// tryGetAvailableRemoteTeamWith() enforce to select a remote team paired with a primary
|
|
||||||
// team Thus, tryGetAvailableRemoteTeamWith() may select an almost full remote team In this
|
|
||||||
// case, we must re-select a remote team We set foundTeams = false to avoid finishing team
|
|
||||||
// selection Then, forceToUseNewPhysicalShard is set, which enforce to use getTeam to select
|
|
||||||
// a remote team
|
|
||||||
if (tciIndex == 1 && !forceToUseNewPhysicalShard) {
|
if (tciIndex == 1 && !forceToUseNewPhysicalShard) {
|
||||||
|
// critical to the correctness of team selection by PhysicalShardCollection
|
||||||
|
// tryGetAvailableRemoteTeamWith() enforce to select a remote team paired with a primary
|
||||||
|
// team Thus, tryGetAvailableRemoteTeamWith() may select an almost full remote team In
|
||||||
|
// this case, we must re-select a remote team We set foundTeams = false to avoid
|
||||||
|
// finishing team selection Then, forceToUseNewPhysicalShard is set, which enforce to
|
||||||
|
// use getTeam to select a remote team
|
||||||
bool minAvailableSpaceRatio = bestTeam.first.get()->getMinAvailableSpaceRatio(true);
|
bool minAvailableSpaceRatio = bestTeam.first.get()->getMinAvailableSpaceRatio(true);
|
||||||
if (minAvailableSpaceRatio < SERVER_KNOBS->TARGET_AVAILABLE_SPACE_RATIO) {
|
if (minAvailableSpaceRatio < SERVER_KNOBS->TARGET_AVAILABLE_SPACE_RATIO) {
|
||||||
retryFindDstReason = DDQueue::RetryFindDstReason::RemoteTeamIsFull;
|
retryFindDstReason = DDQueue::RetryFindDstReason::RemoteTeamIsFull;
|
||||||
foundTeams = false;
|
foundTeams = false;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (SERVER_KNOBS->SHARD_ENCODE_LOCATION_METADATA && SERVER_KNOBS->ENABLE_DD_PHYSICAL_SHARD) {
|
// critical to the correctness of team selection by PhysicalShardCollection
|
||||||
|
// tryGetAvailableRemoteTeamWith() enforce to select a remote team paired with a primary
|
||||||
|
// team Thus, tryGetAvailableRemoteTeamWith() may select an unhealthy remote team In
|
||||||
|
// this case, we must re-select a remote team We set foundTeams = false to avoid
|
||||||
|
// finishing team selection Then, forceToUseNewPhysicalShard is set, which enforce to
|
||||||
|
// use getTeam to select a remote team
|
||||||
|
if (!bestTeam.first.get()->isHealthy()) {
|
||||||
|
retryFindDstReason = DDQueue::RetryFindDstReason::RemoteTeamIsNotHealthy;
|
||||||
|
foundTeams = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
bestTeams.emplace_back(bestTeam.first.get(), true);
|
bestTeams.emplace_back(bestTeam.first.get(), true);
|
||||||
// Always set bestTeams[i].second = true to disable optimization in data move between DCs
|
// Always set bestTeams[i].second = true to disable optimization in data move between DCs
|
||||||
// for the correctness of PhysicalShardCollection
|
// for the correctness of PhysicalShardCollection
|
||||||
// Currently, enabling the optimization will break the invariant of PhysicalShardCollection
|
// Currently, enabling the optimization will break the invariant of PhysicalShardCollection
|
||||||
// Invariant: once a physical shard is created with a specific set of SSes, this SS set will
|
// Invariant: once a physical shard is created with a specific set of SSes, this SS set will
|
||||||
// never get changed.
|
// never get changed.
|
||||||
|
|
||||||
|
if (tciIndex == 0) {
|
||||||
|
ASSERT(foundTeams);
|
||||||
|
ShardsAffectedByTeamFailure::Team primaryTeam =
|
||||||
|
ShardsAffectedByTeamFailure::Team(bestTeams[0].first->getServerIDs(), true);
|
||||||
|
physicalShardIDCandidate =
|
||||||
|
self->physicalShardCollection->determinePhysicalShardIDGivenPrimaryTeam(
|
||||||
|
primaryTeam, metrics, forceToUseNewPhysicalShard, debugID);
|
||||||
|
ASSERT(physicalShardIDCandidate != UID().first() &&
|
||||||
|
physicalShardIDCandidate != anonymousShardId.first());
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
bestTeams.emplace_back(bestTeam.first.get(), bestTeam.second);
|
bestTeams.emplace_back(bestTeam.first.get(), bestTeam.second);
|
||||||
}
|
}
|
||||||
|
|
||||||
// get physicalShardIDCandidate
|
|
||||||
if (SERVER_KNOBS->SHARD_ENCODE_LOCATION_METADATA && SERVER_KNOBS->ENABLE_DD_PHYSICAL_SHARD &&
|
|
||||||
tciIndex == 0) {
|
|
||||||
ASSERT(foundTeams);
|
|
||||||
ShardsAffectedByTeamFailure::Team primaryTeam =
|
|
||||||
ShardsAffectedByTeamFailure::Team(bestTeams[0].first->getServerIDs(), true);
|
|
||||||
physicalShardIDCandidate =
|
|
||||||
self->physicalShardCollection->determinePhysicalShardIDGivenPrimaryTeam(
|
|
||||||
primaryTeam, metrics, forceToUseNewPhysicalShard, debugID);
|
|
||||||
ASSERT(physicalShardIDCandidate != UID().first() &&
|
|
||||||
physicalShardIDCandidate != anonymousShardId.first());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
tciIndex++;
|
tciIndex++;
|
||||||
}
|
}
|
||||||
|
|
||||||
// critical to the correctness of team selection by PhysicalShardCollection
|
|
||||||
// tryGetAvailableRemoteTeamWith() enforce to select a remote team paired with a primary team
|
|
||||||
// Thus, tryGetAvailableRemoteTeamWith() may select an unhealthy remote team
|
|
||||||
// In this case, we must re-select a remote team
|
|
||||||
// We set foundTeams = false to avoid finishing team selection
|
|
||||||
// Then, forceToUseNewPhysicalShard is set, which enforce to use getTeam to select a remote team
|
|
||||||
if (SERVER_KNOBS->SHARD_ENCODE_LOCATION_METADATA && SERVER_KNOBS->ENABLE_DD_PHYSICAL_SHARD &&
|
|
||||||
bestTeams.size() > 1 && !forceToUseNewPhysicalShard) {
|
|
||||||
if (!bestTeams[1].first->isHealthy()) {
|
|
||||||
retryFindDstReason = DDQueue::RetryFindDstReason::RemoteTeamIsNotHealthy;
|
|
||||||
foundTeams = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// once we've found healthy candidate teams, make sure they're not overloaded with outstanding moves
|
// once we've found healthy candidate teams, make sure they're not overloaded with outstanding moves
|
||||||
// already
|
// already
|
||||||
anyDestOverloaded = !canLaunchDest(bestTeams, rd.priority, self->destBusymap);
|
anyDestOverloaded = !canLaunchDest(bestTeams, rd.priority, self->destBusymap);
|
||||||
|
@ -1665,7 +1659,7 @@ ACTOR Future<Void> dataDistributionRelocator(DDQueue* self,
|
||||||
.detail("AnyDestOverloaded", anyDestOverloaded)
|
.detail("AnyDestOverloaded", anyDestOverloaded)
|
||||||
.detail("NumOfTeamCollections", self->teamCollections.size())
|
.detail("NumOfTeamCollections", self->teamCollections.size())
|
||||||
.detail("Servers", destServersString(bestTeams));
|
.detail("Servers", destServersString(bestTeams));
|
||||||
if (SERVER_KNOBS->SHARD_ENCODE_LOCATION_METADATA && SERVER_KNOBS->ENABLE_DD_PHYSICAL_SHARD) {
|
if (enableShardMove) {
|
||||||
if (rd.isRestore() && destOverloadedCount > 50) {
|
if (rd.isRestore() && destOverloadedCount > 50) {
|
||||||
throw data_move_dest_team_not_found();
|
throw data_move_dest_team_not_found();
|
||||||
}
|
}
|
||||||
|
@ -1689,14 +1683,14 @@ ACTOR Future<Void> dataDistributionRelocator(DDQueue* self,
|
||||||
// When forceToUseNewPhysicalShard = false, we get paired primary team and remote team
|
// When forceToUseNewPhysicalShard = false, we get paired primary team and remote team
|
||||||
// However, this may be failed
|
// However, this may be failed
|
||||||
// Any retry triggers to use new physicalShard which enters the normal routine
|
// Any retry triggers to use new physicalShard which enters the normal routine
|
||||||
if (SERVER_KNOBS->SHARD_ENCODE_LOCATION_METADATA && SERVER_KNOBS->ENABLE_DD_PHYSICAL_SHARD) {
|
if (enableShardMove) {
|
||||||
forceToUseNewPhysicalShard = true;
|
forceToUseNewPhysicalShard = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO different trace event + knob for overloaded? Could wait on an async var for done moves
|
// TODO different trace event + knob for overloaded? Could wait on an async var for done moves
|
||||||
}
|
}
|
||||||
|
|
||||||
if (SERVER_KNOBS->SHARD_ENCODE_LOCATION_METADATA && SERVER_KNOBS->ENABLE_DD_PHYSICAL_SHARD) {
|
if (enableShardMove) {
|
||||||
if (!rd.isRestore()) {
|
if (!rd.isRestore()) {
|
||||||
// when !rd.isRestore(), dataMoveId is just decided as physicalShardIDCandidate
|
// when !rd.isRestore(), dataMoveId is just decided as physicalShardIDCandidate
|
||||||
// thus, update the physicalShardIDCandidate to related data structures
|
// thus, update the physicalShardIDCandidate to related data structures
|
||||||
|
@ -1954,7 +1948,7 @@ ACTOR Future<Void> dataDistributionRelocator(DDQueue* self,
|
||||||
self->shardsAffectedByTeamFailure->finishMove(rd.keys);
|
self->shardsAffectedByTeamFailure->finishMove(rd.keys);
|
||||||
relocationComplete.send(rd);
|
relocationComplete.send(rd);
|
||||||
|
|
||||||
if (SERVER_KNOBS->SHARD_ENCODE_LOCATION_METADATA && SERVER_KNOBS->ENABLE_DD_PHYSICAL_SHARD) {
|
if (enableShardMove) {
|
||||||
// update physical shard collection
|
// update physical shard collection
|
||||||
std::vector<ShardsAffectedByTeamFailure::Team> selectedTeams;
|
std::vector<ShardsAffectedByTeamFailure::Team> selectedTeams;
|
||||||
for (int i = 0; i < bestTeams.size(); i++) {
|
for (int i = 0; i < bestTeams.size(); i++) {
|
||||||
|
|
|
@ -588,7 +588,6 @@ ACTOR Future<Void> dataDistribution(Reference<DataDistributor> self,
|
||||||
state Reference<DDTeamCollection> primaryTeamCollection;
|
state Reference<DDTeamCollection> primaryTeamCollection;
|
||||||
state Reference<DDTeamCollection> remoteTeamCollection;
|
state Reference<DDTeamCollection> remoteTeamCollection;
|
||||||
state bool trackerCancelled;
|
state bool trackerCancelled;
|
||||||
state bool ddIsTenantAware = SERVER_KNOBS->DD_TENANT_AWARENESS_ENABLED;
|
|
||||||
loop {
|
loop {
|
||||||
trackerCancelled = false;
|
trackerCancelled = false;
|
||||||
self->initialized = Promise<Void>();
|
self->initialized = Promise<Void>();
|
||||||
|
@ -610,7 +609,7 @@ ACTOR Future<Void> dataDistribution(Reference<DataDistributor> self,
|
||||||
state Reference<AsyncVar<bool>> processingUnhealthy(new AsyncVar<bool>(false));
|
state Reference<AsyncVar<bool>> processingUnhealthy(new AsyncVar<bool>(false));
|
||||||
state Reference<AsyncVar<bool>> processingWiggle(new AsyncVar<bool>(false));
|
state Reference<AsyncVar<bool>> processingWiggle(new AsyncVar<bool>(false));
|
||||||
|
|
||||||
if (ddIsTenantAware) {
|
if (SERVER_KNOBS->DD_TENANT_AWARENESS_ENABLED || SERVER_KNOBS->STORAGE_QUOTA_ENABLED) {
|
||||||
self->ddTenantCache = makeReference<TenantCache>(cx, self->ddId);
|
self->ddTenantCache = makeReference<TenantCache>(cx, self->ddId);
|
||||||
wait(self->ddTenantCache.get()->build());
|
wait(self->ddTenantCache.get()->build());
|
||||||
}
|
}
|
||||||
|
@ -684,6 +683,8 @@ ACTOR Future<Void> dataDistribution(Reference<DataDistributor> self,
|
||||||
"DDTenantCacheMonitor",
|
"DDTenantCacheMonitor",
|
||||||
self->ddId,
|
self->ddId,
|
||||||
&normalDDQueueErrors()));
|
&normalDDQueueErrors()));
|
||||||
|
}
|
||||||
|
if (self->ddTenantCache.present() && SERVER_KNOBS->STORAGE_QUOTA_ENABLED) {
|
||||||
actors.push_back(reportErrorsExcept(self->ddTenantCache.get()->monitorStorageQuota(),
|
actors.push_back(reportErrorsExcept(self->ddTenantCache.get()->monitorStorageQuota(),
|
||||||
"StorageQuotaTracker",
|
"StorageQuotaTracker",
|
||||||
self->ddId,
|
self->ddId,
|
||||||
|
@ -1320,7 +1321,7 @@ GetStorageWigglerStateReply getStorageWigglerStates(Reference<DataDistributor> s
|
||||||
|
|
||||||
TenantsOverStorageQuotaReply getTenantsOverStorageQuota(Reference<DataDistributor> self) {
|
TenantsOverStorageQuotaReply getTenantsOverStorageQuota(Reference<DataDistributor> self) {
|
||||||
TenantsOverStorageQuotaReply reply;
|
TenantsOverStorageQuotaReply reply;
|
||||||
if (self->ddTenantCache.present()) {
|
if (self->ddTenantCache.present() && SERVER_KNOBS->STORAGE_QUOTA_ENABLED) {
|
||||||
reply.tenants = self->ddTenantCache.get()->getTenantsOverQuota();
|
reply.tenants = self->ddTenantCache.get()->getTenantsOverQuota();
|
||||||
}
|
}
|
||||||
return reply;
|
return reply;
|
||||||
|
|
|
@ -446,11 +446,14 @@ void proxyGRVThresholdExceeded(const GetReadVersionRequest* req, GrvProxyStats*
|
||||||
++stats->txnRequestErrors;
|
++stats->txnRequestErrors;
|
||||||
req->reply.sendError(grv_proxy_memory_limit_exceeded());
|
req->reply.sendError(grv_proxy_memory_limit_exceeded());
|
||||||
if (req->priority == TransactionPriority::IMMEDIATE) {
|
if (req->priority == TransactionPriority::IMMEDIATE) {
|
||||||
TraceEvent(SevWarnAlways, "ProxyGRVThresholdExceededSystem").suppressFor(60);
|
TraceEvent(g_network->isSimulated() ? SevInfo : SevWarnAlways, "ProxyGRVThresholdExceededSystem")
|
||||||
|
.suppressFor(60);
|
||||||
} else if (req->priority == TransactionPriority::DEFAULT) {
|
} else if (req->priority == TransactionPriority::DEFAULT) {
|
||||||
TraceEvent(SevWarnAlways, "ProxyGRVThresholdExceededDefault").suppressFor(60);
|
TraceEvent(g_network->isSimulated() ? SevInfo : SevWarnAlways, "ProxyGRVThresholdExceededDefault")
|
||||||
|
.suppressFor(60);
|
||||||
} else {
|
} else {
|
||||||
TraceEvent(SevWarnAlways, "ProxyGRVThresholdExceededBatch").suppressFor(60);
|
TraceEvent(g_network->isSimulated() ? SevInfo : SevWarnAlways, "ProxyGRVThresholdExceededBatch")
|
||||||
|
.suppressFor(60);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -58,6 +58,14 @@ void GrvProxyTagThrottler::TagQueue::rejectRequests(LatencyBandsMap& latencyBand
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void GrvProxyTagThrottler::TagQueue::endReleaseWindow(int64_t numStarted, double elapsed) {
|
||||||
|
if (rateInfo.present()) {
|
||||||
|
CODE_PROBE(requests.empty(), "Tag queue ending release window with empty request queue");
|
||||||
|
CODE_PROBE(!requests.empty(), "Tag queue ending release window with requests still queued");
|
||||||
|
rateInfo.get().endReleaseWindow(numStarted, requests.empty(), elapsed);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
GrvProxyTagThrottler::GrvProxyTagThrottler(double maxThrottleDuration)
|
GrvProxyTagThrottler::GrvProxyTagThrottler(double maxThrottleDuration)
|
||||||
: maxThrottleDuration(maxThrottleDuration),
|
: maxThrottleDuration(maxThrottleDuration),
|
||||||
latencyBandsMap("GrvProxyTagThrottler",
|
latencyBandsMap("GrvProxyTagThrottler",
|
||||||
|
@ -202,16 +210,14 @@ void GrvProxyTagThrottler::releaseTransactions(double elapsed,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// End release windows for queues with valid rateInfo
|
// End release windows for all tag queues
|
||||||
{
|
{
|
||||||
TransactionTagMap<uint32_t> transactionsReleasedMap;
|
TransactionTagMap<uint32_t> transactionsReleasedMap;
|
||||||
for (const auto& [tag, count] : transactionsReleased) {
|
for (const auto& [tag, count] : transactionsReleased) {
|
||||||
transactionsReleasedMap[tag] = count;
|
transactionsReleasedMap[tag] = count;
|
||||||
}
|
}
|
||||||
for (auto& [tag, queue] : queues) {
|
for (auto& [tag, queue] : queues) {
|
||||||
if (queue.rateInfo.present()) {
|
queue.endReleaseWindow(transactionsReleasedMap[tag], elapsed);
|
||||||
queue.rateInfo.get().endReleaseWindow(transactionsReleasedMap[tag], false, elapsed);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// If the capacity is increased, that means the vector has been illegally resized, potentially
|
// If the capacity is increased, that means the vector has been illegally resized, potentially
|
||||||
|
@ -438,3 +444,33 @@ TEST_CASE("/GrvProxyTagThrottler/Fifo") {
|
||||||
wait(mockFifoClient(&throttler));
|
wait(mockFifoClient(&throttler));
|
||||||
return Void();
|
return Void();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Tests that while throughput is low, the tag throttler
|
||||||
|
// does not accumulate too much budget.
|
||||||
|
//
|
||||||
|
// A server is setup to server 10 transactions per second,
|
||||||
|
// then runs idly for 60 seconds. Then a client starts
|
||||||
|
// and attempts 20 transactions per second for 60 seconds.
|
||||||
|
// The server throttles the client to only achieve
|
||||||
|
// 10 transactions per second during this 60 second window.
|
||||||
|
// If the throttler is allowed to accumulate budget indefinitely
|
||||||
|
// during the idle 60 seconds, this test will fail.
|
||||||
|
TEST_CASE("/GrvProxyTagThrottler/LimitedIdleBudget") {
|
||||||
|
state GrvProxyTagThrottler throttler(5.0);
|
||||||
|
state TagSet tagSet;
|
||||||
|
state TransactionTagMap<uint32_t> counters;
|
||||||
|
{
|
||||||
|
TransactionTagMap<double> rates;
|
||||||
|
rates["sampleTag"_sr] = 10.0;
|
||||||
|
throttler.updateRates(rates);
|
||||||
|
}
|
||||||
|
tagSet.addTag("sampleTag"_sr);
|
||||||
|
|
||||||
|
state Future<Void> server = mockServer(&throttler);
|
||||||
|
wait(delay(60.0));
|
||||||
|
state Future<Void> client = mockClient(&throttler, TransactionPriority::DEFAULT, tagSet, 1, 20.0, &counters);
|
||||||
|
wait(timeout(client && server, 60.0, Void()));
|
||||||
|
TraceEvent("TagQuotaTest_LimitedIdleBudget").detail("Counter", counters["sampleTag"_sr]);
|
||||||
|
ASSERT(isNear(counters["sampleTag"_sr], 60.0 * 10.0));
|
||||||
|
return Void();
|
||||||
|
}
|
||||||
|
|
|
@ -35,7 +35,7 @@ bool GrvTransactionRateInfo::canStart(int64_t numAlreadyStarted, int64_t count)
|
||||||
std::min(limit + budget, SERVER_KNOBS->START_TRANSACTION_MAX_TRANSACTIONS_TO_START);
|
std::min(limit + budget, SERVER_KNOBS->START_TRANSACTION_MAX_TRANSACTIONS_TO_START);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GrvTransactionRateInfo::endReleaseWindow(int64_t numStartedAtPriority, bool queueEmptyAtPriority, double elapsed) {
|
void GrvTransactionRateInfo::endReleaseWindow(int64_t numStarted, bool queueEmpty, double elapsed) {
|
||||||
// Update the budget to accumulate any extra capacity available or remove any excess that was used.
|
// Update the budget to accumulate any extra capacity available or remove any excess that was used.
|
||||||
// The actual delta is the portion of the limit we didn't use multiplied by the fraction of the rate window that
|
// The actual delta is the portion of the limit we didn't use multiplied by the fraction of the rate window that
|
||||||
// elapsed.
|
// elapsed.
|
||||||
|
@ -52,16 +52,15 @@ void GrvTransactionRateInfo::endReleaseWindow(int64_t numStartedAtPriority, bool
|
||||||
//
|
//
|
||||||
// Note that "rate window" here indicates a period of SERVER_KNOBS->START_TRANSACTION_RATE_WINDOW seconds,
|
// Note that "rate window" here indicates a period of SERVER_KNOBS->START_TRANSACTION_RATE_WINDOW seconds,
|
||||||
// whereas "release window" is the period between wait statements, with duration indicated by "elapsed."
|
// whereas "release window" is the period between wait statements, with duration indicated by "elapsed."
|
||||||
budget =
|
budget = std::max(0.0, budget + elapsed * (limit - numStarted) / SERVER_KNOBS->START_TRANSACTION_RATE_WINDOW);
|
||||||
std::max(0.0, budget + elapsed * (limit - numStartedAtPriority) / SERVER_KNOBS->START_TRANSACTION_RATE_WINDOW);
|
|
||||||
|
|
||||||
// If we are emptying out the queue of requests, then we don't need to carry much budget forward
|
// If we are emptying out the queue of requests, then we don't need to carry much budget forward
|
||||||
// If we did keep accumulating budget, then our responsiveness to changes in workflow could be compromised
|
// If we did keep accumulating budget, then our responsiveness to changes in workflow could be compromised
|
||||||
if (queueEmptyAtPriority) {
|
if (queueEmpty) {
|
||||||
budget = std::min(budget, SERVER_KNOBS->START_TRANSACTION_MAX_EMPTY_QUEUE_BUDGET);
|
budget = std::min(budget, SERVER_KNOBS->START_TRANSACTION_MAX_EMPTY_QUEUE_BUDGET);
|
||||||
}
|
}
|
||||||
|
|
||||||
smoothReleased.addDelta(numStartedAtPriority);
|
smoothReleased.addDelta(numStarted);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GrvTransactionRateInfo::disable() {
|
void GrvTransactionRateInfo::disable() {
|
||||||
|
|
|
@ -391,9 +391,16 @@ struct Counters {
|
||||||
CounterCollection cc;
|
CounterCollection cc;
|
||||||
Counter immediateThrottle;
|
Counter immediateThrottle;
|
||||||
Counter failedToAcquire;
|
Counter failedToAcquire;
|
||||||
|
Counter deleteKeyReqs;
|
||||||
|
Counter deleteRangeReqs;
|
||||||
|
Counter convertedDeleteKeyReqs;
|
||||||
|
Counter convertedDeleteRangeReqs;
|
||||||
|
|
||||||
Counters()
|
Counters()
|
||||||
: cc("RocksDBThrottle"), immediateThrottle("ImmediateThrottle", cc), failedToAcquire("FailedToAcquire", cc) {}
|
: cc("RocksDBThrottle"), immediateThrottle("ImmediateThrottle", cc), failedToAcquire("FailedToAcquire", cc),
|
||||||
|
deleteKeyReqs("DeleteKeyRequests", cc), deleteRangeReqs("DeleteRangeRequests", cc),
|
||||||
|
convertedDeleteKeyReqs("ConvertedDeleteKeyRequests", cc),
|
||||||
|
convertedDeleteRangeReqs("ConvertedDeleteRangeRequests", cc) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct ReadIterator {
|
struct ReadIterator {
|
||||||
|
@ -1934,12 +1941,17 @@ struct RocksDBKeyValueStore : IKeyValueStore {
|
||||||
}
|
}
|
||||||
|
|
||||||
ASSERT(defaultFdbCF != nullptr);
|
ASSERT(defaultFdbCF != nullptr);
|
||||||
|
// Number of deletes to rocksdb = counters.deleteKeyReqs + convertedDeleteKeyReqs;
|
||||||
|
// Number of deleteRanges to rocksdb = counters.deleteRangeReqs - counters.convertedDeleteRangeReqs;
|
||||||
if (keyRange.singleKeyRange()) {
|
if (keyRange.singleKeyRange()) {
|
||||||
writeBatch->Delete(defaultFdbCF, toSlice(keyRange.begin));
|
writeBatch->Delete(defaultFdbCF, toSlice(keyRange.begin));
|
||||||
|
++counters.deleteKeyReqs;
|
||||||
} else {
|
} else {
|
||||||
|
++counters.deleteRangeReqs;
|
||||||
if (SERVER_KNOBS->ROCKSDB_SINGLEKEY_DELETES_ON_CLEARRANGE && storageMetrics != nullptr &&
|
if (SERVER_KNOBS->ROCKSDB_SINGLEKEY_DELETES_ON_CLEARRANGE && storageMetrics != nullptr &&
|
||||||
storageMetrics->byteSample.getEstimate(keyRange) <
|
storageMetrics->byteSample.getEstimate(keyRange) <
|
||||||
SERVER_KNOBS->ROCKSDB_SINGLEKEY_DELETES_BYTES_LIMIT) {
|
SERVER_KNOBS->ROCKSDB_SINGLEKEY_DELETES_BYTES_LIMIT) {
|
||||||
|
++counters.convertedDeleteRangeReqs;
|
||||||
rocksdb::ReadOptions options = sharedState->getReadOptions();
|
rocksdb::ReadOptions options = sharedState->getReadOptions();
|
||||||
auto beginSlice = toSlice(keyRange.begin);
|
auto beginSlice = toSlice(keyRange.begin);
|
||||||
auto endSlice = toSlice(keyRange.end);
|
auto endSlice = toSlice(keyRange.end);
|
||||||
|
@ -1949,6 +1961,7 @@ struct RocksDBKeyValueStore : IKeyValueStore {
|
||||||
cursor->Seek(toSlice(keyRange.begin));
|
cursor->Seek(toSlice(keyRange.begin));
|
||||||
while (cursor->Valid() && toStringRef(cursor->key()) < keyRange.end) {
|
while (cursor->Valid() && toStringRef(cursor->key()) < keyRange.end) {
|
||||||
writeBatch->Delete(defaultFdbCF, cursor->key());
|
writeBatch->Delete(defaultFdbCF, cursor->key());
|
||||||
|
++counters.convertedDeleteKeyReqs;
|
||||||
cursor->Next();
|
cursor->Next();
|
||||||
}
|
}
|
||||||
if (!cursor->status().ok()) {
|
if (!cursor->status().ok()) {
|
||||||
|
@ -1958,6 +1971,7 @@ struct RocksDBKeyValueStore : IKeyValueStore {
|
||||||
auto it = keysSet.lower_bound(keyRange.begin);
|
auto it = keysSet.lower_bound(keyRange.begin);
|
||||||
while (it != keysSet.end() && *it < keyRange.end) {
|
while (it != keysSet.end() && *it < keyRange.end) {
|
||||||
writeBatch->Delete(defaultFdbCF, toSlice(*it));
|
writeBatch->Delete(defaultFdbCF, toSlice(*it));
|
||||||
|
++counters.convertedDeleteKeyReqs;
|
||||||
it++;
|
it++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -289,11 +289,7 @@ ACTOR Future<Void> resolveBatch(Reference<Resolver> self,
|
||||||
// Detect conflicts
|
// Detect conflicts
|
||||||
double expire = now() + SERVER_KNOBS->SAMPLE_EXPIRATION_TIME;
|
double expire = now() + SERVER_KNOBS->SAMPLE_EXPIRATION_TIME;
|
||||||
ConflictBatch conflictBatch(self->conflictSet, &reply.conflictingKeyRangeMap, &reply.arena);
|
ConflictBatch conflictBatch(self->conflictSet, &reply.conflictingKeyRangeMap, &reply.arena);
|
||||||
Version newOldestVersion = req.version - SERVER_KNOBS->MAX_WRITE_TRANSACTION_LIFE_VERSIONS;
|
const Version newOldestVersion = req.version - SERVER_KNOBS->MAX_WRITE_TRANSACTION_LIFE_VERSIONS;
|
||||||
if (g_network->isSimulated() && g_simulator->speedUpSimulation) {
|
|
||||||
newOldestVersion = req.version - std::max(5 * SERVER_KNOBS->VERSIONS_PER_SECOND,
|
|
||||||
SERVER_KNOBS->MAX_WRITE_TRANSACTION_LIFE_VERSIONS);
|
|
||||||
}
|
|
||||||
for (int t = 0; t < req.transactions.size(); t++) {
|
for (int t = 0; t < req.transactions.size(); t++) {
|
||||||
conflictBatch.addTransaction(req.transactions[t], newOldestVersion);
|
conflictBatch.addTransaction(req.transactions[t], newOldestVersion);
|
||||||
self->resolvedReadConflictRanges += req.transactions[t].read_conflict_ranges.size();
|
self->resolvedReadConflictRanges += req.transactions[t].read_conflict_ranges.size();
|
||||||
|
|
|
@ -422,11 +422,12 @@ ACTOR static Future<Void> _parsePartitionedLogFileOnLoader(
|
||||||
state LogMessageVersion msgVersion;
|
state LogMessageVersion msgVersion;
|
||||||
msgVersion.version = reader.consumeNetworkUInt64();
|
msgVersion.version = reader.consumeNetworkUInt64();
|
||||||
msgVersion.sub = reader.consumeNetworkUInt32();
|
msgVersion.sub = reader.consumeNetworkUInt32();
|
||||||
int msgSize = reader.consumeNetworkInt32();
|
state int msgSize = reader.consumeNetworkInt32();
|
||||||
const uint8_t* message = reader.consume(msgSize);
|
state const uint8_t* message = reader.consume(msgSize);
|
||||||
|
|
||||||
// Skip mutations out of the version range
|
// Skip mutations out of the version range
|
||||||
if (!asset.isInVersionRange(msgVersion.version)) {
|
if (!asset.isInVersionRange(msgVersion.version)) {
|
||||||
|
wait(yield()); // avoid potential stack overflows
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -127,25 +127,38 @@ public:
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
state double fetchStartTime = now();
|
state double fetchStartTime = now();
|
||||||
state std::vector<TenantName> tenants = tenantCache->getTenantList();
|
state std::vector<TenantGroupName> groups;
|
||||||
|
for (const auto& [group, storage] : tenantCache->tenantStorageMap) {
|
||||||
|
groups.push_back(group);
|
||||||
|
}
|
||||||
state int i;
|
state int i;
|
||||||
for (i = 0; i < tenants.size(); i++) {
|
for (i = 0; i < groups.size(); i++) {
|
||||||
state ReadYourWritesTransaction tr(tenantCache->dbcx(), tenants[i]);
|
state TenantGroupName group = groups[i];
|
||||||
loop {
|
state int64_t usage = 0;
|
||||||
try {
|
// `tenants` needs to be a copy so that the erase (below) or inserts/erases from other
|
||||||
state int64_t size = wait(tr.getEstimatedRangeSizeBytes(normalKeys));
|
// functions (when this actor yields) do not interfere with the iteration
|
||||||
tenantCache->tenantStorageMap[tenants[i]].usage = size;
|
state std::unordered_set<TenantName> tenants = tenantCache->tenantStorageMap[group].tenants;
|
||||||
break;
|
state std::unordered_set<TenantName>::iterator iter = tenants.begin();
|
||||||
} catch (Error& e) {
|
for (; iter != tenants.end(); iter++) {
|
||||||
if (e.code() == error_code_tenant_not_found) {
|
state TenantName tenant = *iter;
|
||||||
tenantCache->tenantStorageMap.erase(tenants[i]);
|
state ReadYourWritesTransaction tr(tenantCache->dbcx(), tenant);
|
||||||
|
loop {
|
||||||
|
try {
|
||||||
|
state int64_t size = wait(tr.getEstimatedRangeSizeBytes(normalKeys));
|
||||||
|
usage += size;
|
||||||
break;
|
break;
|
||||||
} else {
|
} catch (Error& e) {
|
||||||
TraceEvent("TenantCacheGetStorageUsageError", tenantCache->id()).error(e);
|
if (e.code() == error_code_tenant_not_found) {
|
||||||
wait(tr.onError(e));
|
tenantCache->tenantStorageMap[group].tenants.erase(tenant);
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
TraceEvent("TenantCacheGetStorageUsageError", tenantCache->id()).error(e);
|
||||||
|
wait(tr.onError(e));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
tenantCache->tenantStorageMap[group].usage = usage;
|
||||||
}
|
}
|
||||||
|
|
||||||
lastTenantListFetchTime = now();
|
lastTenantListFetchTime = now();
|
||||||
|
@ -162,22 +175,24 @@ public:
|
||||||
state Transaction tr(tenantCache->dbcx());
|
state Transaction tr(tenantCache->dbcx());
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
loop {
|
try {
|
||||||
try {
|
state RangeResult currentQuotas = wait(tr.getRange(storageQuotaKeys, CLIENT_KNOBS->TOO_MANY));
|
||||||
state RangeResult currentQuotas = wait(tr.getRange(storageQuotaKeys, CLIENT_KNOBS->TOO_MANY));
|
// Reset the quota for all groups; this essentially sets the quota to `max` for groups where the
|
||||||
for (auto const kv : currentQuotas) {
|
// quota might have been cleared (i.e., groups that will not be returned in `getRange` request above).
|
||||||
TenantName const tenant = kv.key.removePrefix(storageQuotaPrefix);
|
for (auto& [group, storage] : tenantCache->tenantStorageMap) {
|
||||||
int64_t const quota = BinaryReader::fromStringRef<int64_t>(kv.value, Unversioned());
|
storage.quota = std::numeric_limits<int64_t>::max();
|
||||||
tenantCache->tenantStorageMap[tenant].quota = quota;
|
|
||||||
}
|
|
||||||
tr.reset();
|
|
||||||
break;
|
|
||||||
} catch (Error& e) {
|
|
||||||
TraceEvent("TenantCacheGetStorageQuotaError", tenantCache->id()).error(e);
|
|
||||||
wait(tr.onError(e));
|
|
||||||
}
|
}
|
||||||
|
for (const auto kv : currentQuotas) {
|
||||||
|
const TenantGroupName group = kv.key.removePrefix(storageQuotaPrefix);
|
||||||
|
const int64_t quota = BinaryReader::fromStringRef<int64_t>(kv.value, Unversioned());
|
||||||
|
tenantCache->tenantStorageMap[group].quota = quota;
|
||||||
|
}
|
||||||
|
tr.reset();
|
||||||
|
wait(delay(SERVER_KNOBS->TENANT_CACHE_STORAGE_QUOTA_REFRESH_INTERVAL));
|
||||||
|
} catch (Error& e) {
|
||||||
|
TraceEvent("TenantCacheGetStorageQuotaError", tenantCache->id()).error(e);
|
||||||
|
wait(tr.onError(e));
|
||||||
}
|
}
|
||||||
wait(delay(SERVER_KNOBS->TENANT_CACHE_STORAGE_QUOTA_REFRESH_INTERVAL));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -189,6 +204,10 @@ void TenantCache::insert(TenantName& tenantName, TenantMapEntry& tenant) {
|
||||||
TenantInfo tenantInfo(tenantName, Optional<Standalone<StringRef>>(), tenant.id);
|
TenantInfo tenantInfo(tenantName, Optional<Standalone<StringRef>>(), tenant.id);
|
||||||
tenantCache[tenantPrefix] = makeReference<TCTenantInfo>(tenantInfo, tenant.prefix);
|
tenantCache[tenantPrefix] = makeReference<TCTenantInfo>(tenantInfo, tenant.prefix);
|
||||||
tenantCache[tenantPrefix]->updateCacheGeneration(generation);
|
tenantCache[tenantPrefix]->updateCacheGeneration(generation);
|
||||||
|
|
||||||
|
if (tenant.tenantGroup.present()) {
|
||||||
|
tenantStorageMap[tenant.tenantGroup.get()].tenants.insert(tenantName);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void TenantCache::startRefresh() {
|
void TenantCache::startRefresh() {
|
||||||
|
@ -289,13 +308,13 @@ Optional<Reference<TCTenantInfo>> TenantCache::tenantOwning(KeyRef key) const {
|
||||||
}
|
}
|
||||||
|
|
||||||
std::unordered_set<TenantName> TenantCache::getTenantsOverQuota() const {
|
std::unordered_set<TenantName> TenantCache::getTenantsOverQuota() const {
|
||||||
std::unordered_set<TenantName> tenants;
|
std::unordered_set<TenantName> tenantsOverQuota;
|
||||||
for (const auto& [tenant, storage] : tenantStorageMap) {
|
for (const auto& [tenantGroup, storage] : tenantStorageMap) {
|
||||||
if (storage.usage > storage.quota) {
|
if (storage.usage > storage.quota) {
|
||||||
tenants.insert(tenant);
|
tenantsOverQuota.insert(storage.tenants.begin(), storage.tenants.end());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return tenants;
|
return tenantsOverQuota;
|
||||||
}
|
}
|
||||||
|
|
||||||
Future<Void> TenantCache::monitorTenantMap() {
|
Future<Void> TenantCache::monitorTenantMap() {
|
||||||
|
|
|
@ -2025,7 +2025,8 @@ public:
|
||||||
bool memoryOnly,
|
bool memoryOnly,
|
||||||
Reference<IPageEncryptionKeyProvider> keyProvider,
|
Reference<IPageEncryptionKeyProvider> keyProvider,
|
||||||
Promise<Void> errorPromise = {})
|
Promise<Void> errorPromise = {})
|
||||||
: keyProvider(keyProvider), ioLock(FLOW_KNOBS->MAX_OUTSTANDING, SERVER_KNOBS->REDWOOD_PRIORITY_LAUNCHS),
|
: keyProvider(keyProvider),
|
||||||
|
ioLock(makeReference<PriorityMultiLock>(FLOW_KNOBS->MAX_OUTSTANDING, SERVER_KNOBS->REDWOOD_IO_PRIORITIES)),
|
||||||
pageCacheBytes(pageCacheSizeBytes), desiredPageSize(desiredPageSize), desiredExtentSize(desiredExtentSize),
|
pageCacheBytes(pageCacheSizeBytes), desiredPageSize(desiredPageSize), desiredExtentSize(desiredExtentSize),
|
||||||
filename(filename), memoryOnly(memoryOnly), errorPromise(errorPromise),
|
filename(filename), memoryOnly(memoryOnly), errorPromise(errorPromise),
|
||||||
remapCleanupWindowBytes(remapCleanupWindowBytes), concurrentExtentReads(new FlowLock(concurrentExtentReads)) {
|
remapCleanupWindowBytes(remapCleanupWindowBytes), concurrentExtentReads(new FlowLock(concurrentExtentReads)) {
|
||||||
|
@ -2037,7 +2038,7 @@ public:
|
||||||
// This sets the page cache size for all PageCacheT instances using the same evictor
|
// This sets the page cache size for all PageCacheT instances using the same evictor
|
||||||
pageCache.evictor().sizeLimit = pageCacheBytes;
|
pageCache.evictor().sizeLimit = pageCacheBytes;
|
||||||
|
|
||||||
g_redwoodMetrics.ioLock = &ioLock;
|
g_redwoodMetrics.ioLock = ioLock.getPtr();
|
||||||
if (!g_redwoodMetricsActor.isValid()) {
|
if (!g_redwoodMetricsActor.isValid()) {
|
||||||
g_redwoodMetricsActor = redwoodMetricsLogger();
|
g_redwoodMetricsActor = redwoodMetricsLogger();
|
||||||
}
|
}
|
||||||
|
@ -2499,7 +2500,7 @@ public:
|
||||||
unsigned int level,
|
unsigned int level,
|
||||||
bool header) {
|
bool header) {
|
||||||
|
|
||||||
state PriorityMultiLock::Lock lock = wait(self->ioLock.lock(header ? ioMaxPriority : ioMinPriority));
|
state PriorityMultiLock::Lock lock = wait(self->ioLock->lock(header ? ioMaxPriority : ioMinPriority));
|
||||||
++g_redwoodMetrics.metric.pagerDiskWrite;
|
++g_redwoodMetrics.metric.pagerDiskWrite;
|
||||||
g_redwoodMetrics.level(level).metrics.events.addEventReason(PagerEvents::PageWrite, reason);
|
g_redwoodMetrics.level(level).metrics.events.addEventReason(PagerEvents::PageWrite, reason);
|
||||||
if (self->memoryOnly) {
|
if (self->memoryOnly) {
|
||||||
|
@ -2779,7 +2780,7 @@ public:
|
||||||
int blockSize,
|
int blockSize,
|
||||||
int64_t offset,
|
int64_t offset,
|
||||||
int priority) {
|
int priority) {
|
||||||
state PriorityMultiLock::Lock lock = wait(self->ioLock.lock(std::min(priority, ioMaxPriority)));
|
state PriorityMultiLock::Lock lock = wait(self->ioLock->lock(std::min(priority, ioMaxPriority)));
|
||||||
++g_redwoodMetrics.metric.pagerDiskRead;
|
++g_redwoodMetrics.metric.pagerDiskRead;
|
||||||
int bytes = wait(self->pageFile->read(pageBuffer->rawData() + pageOffset, blockSize, offset));
|
int bytes = wait(self->pageFile->read(pageBuffer->rawData() + pageOffset, blockSize, offset));
|
||||||
return bytes;
|
return bytes;
|
||||||
|
@ -3593,7 +3594,7 @@ public:
|
||||||
|
|
||||||
// The next section explicitly cancels all pending operations held in the pager
|
// The next section explicitly cancels all pending operations held in the pager
|
||||||
debug_printf("DWALPager(%s) shutdown kill ioLock\n", self->filename.c_str());
|
debug_printf("DWALPager(%s) shutdown kill ioLock\n", self->filename.c_str());
|
||||||
self->ioLock.kill();
|
self->ioLock->kill();
|
||||||
|
|
||||||
debug_printf("DWALPager(%s) shutdown cancel recovery\n", self->filename.c_str());
|
debug_printf("DWALPager(%s) shutdown cancel recovery\n", self->filename.c_str());
|
||||||
self->recoverFuture.cancel();
|
self->recoverFuture.cancel();
|
||||||
|
@ -3802,7 +3803,7 @@ private:
|
||||||
|
|
||||||
Reference<IPageEncryptionKeyProvider> keyProvider;
|
Reference<IPageEncryptionKeyProvider> keyProvider;
|
||||||
|
|
||||||
PriorityMultiLock ioLock;
|
Reference<PriorityMultiLock> ioLock;
|
||||||
|
|
||||||
int64_t pageCacheBytes;
|
int64_t pageCacheBytes;
|
||||||
|
|
||||||
|
@ -8894,32 +8895,25 @@ void RedwoodMetrics::getIOLockFields(TraceEvent* e, std::string* s) {
|
||||||
int maxPriority = ioLock->maxPriority();
|
int maxPriority = ioLock->maxPriority();
|
||||||
|
|
||||||
if (e != nullptr) {
|
if (e != nullptr) {
|
||||||
e->detail("ActiveReads", ioLock->totalRunners());
|
e->detail("IOActiveTotal", ioLock->getRunnersCount());
|
||||||
e->detail("AwaitReads", ioLock->totalWaiters());
|
e->detail("IOWaitingTotal", ioLock->getWaitersCount());
|
||||||
|
|
||||||
for (int priority = 0; priority <= maxPriority; ++priority) {
|
for (int priority = 0; priority <= maxPriority; ++priority) {
|
||||||
e->detail(format("ActiveP%d", priority), ioLock->numRunners(priority));
|
e->detail(format("IOActiveP%d", priority), ioLock->getRunnersCount(priority));
|
||||||
e->detail(format("AwaitP%d", priority), ioLock->numWaiters(priority));
|
e->detail(format("IOWaitingP%d", priority), ioLock->getWaitersCount(priority));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (s != nullptr) {
|
if (s != nullptr) {
|
||||||
std::string active = "Active";
|
|
||||||
std::string await = "Await";
|
|
||||||
|
|
||||||
*s += "\n";
|
*s += "\n";
|
||||||
*s += format("%-15s %-8u ", "ActiveReads", ioLock->totalRunners());
|
*s += format("%-15s %-8u ", "IOActiveTotal", ioLock->getRunnersCount());
|
||||||
*s += format("%-15s %-8u ", "AwaitReads", ioLock->totalWaiters());
|
|
||||||
*s += "\n";
|
|
||||||
|
|
||||||
for (int priority = 0; priority <= maxPriority; ++priority) {
|
for (int priority = 0; priority <= maxPriority; ++priority) {
|
||||||
*s +=
|
*s += format("IOActiveP%-6d %-8u ", priority, ioLock->getRunnersCount(priority));
|
||||||
format("%-15s %-8u ", (active + 'P' + std::to_string(priority)).c_str(), ioLock->numRunners(priority));
|
|
||||||
}
|
}
|
||||||
*s += "\n";
|
*s += "\n";
|
||||||
|
*s += format("%-15s %-8u ", "IOWaitingTotal", ioLock->getWaitersCount());
|
||||||
for (int priority = 0; priority <= maxPriority; ++priority) {
|
for (int priority = 0; priority <= maxPriority; ++priority) {
|
||||||
*s +=
|
*s += format("IOWaitingP%-5d %-8u ", priority, ioLock->getWaitersCount(priority));
|
||||||
format("%-15s %-8u ", (await + 'P' + std::to_string(priority)).c_str(), ioLock->numWaiters(priority));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -11407,57 +11401,3 @@ TEST_CASE(":/redwood/performance/histograms") {
|
||||||
|
|
||||||
return Void();
|
return Void();
|
||||||
}
|
}
|
||||||
|
|
||||||
ACTOR Future<Void> waitLockIncrement(PriorityMultiLock* pml, int priority, int* pout) {
|
|
||||||
state PriorityMultiLock::Lock lock = wait(pml->lock(priority));
|
|
||||||
wait(delay(deterministicRandom()->random01() * .1));
|
|
||||||
++*pout;
|
|
||||||
return Void();
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST_CASE("/redwood/PriorityMultiLock") {
|
|
||||||
state std::vector<int> priorities = { 10, 20, 40 };
|
|
||||||
state int concurrency = 25;
|
|
||||||
state PriorityMultiLock* pml = new PriorityMultiLock(concurrency, priorities);
|
|
||||||
state std::vector<int> counts;
|
|
||||||
counts.resize(priorities.size(), 0);
|
|
||||||
|
|
||||||
// Clog the lock buy taking concurrency locks at each level
|
|
||||||
state std::vector<Future<PriorityMultiLock::Lock>> lockFutures;
|
|
||||||
for (int i = 0; i < priorities.size(); ++i) {
|
|
||||||
for (int j = 0; j < concurrency; ++j) {
|
|
||||||
lockFutures.push_back(pml->lock(i));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Wait for n = concurrency locks to be acquired
|
|
||||||
wait(quorum(lockFutures, concurrency));
|
|
||||||
|
|
||||||
state std::vector<Future<Void>> futures;
|
|
||||||
for (int i = 0; i < 10e3; ++i) {
|
|
||||||
int p = i % priorities.size();
|
|
||||||
futures.push_back(waitLockIncrement(pml, p, &counts[p]));
|
|
||||||
}
|
|
||||||
|
|
||||||
state Future<Void> f = waitForAll(futures);
|
|
||||||
|
|
||||||
// Release the locks
|
|
||||||
lockFutures.clear();
|
|
||||||
|
|
||||||
// Print stats and wait for all futures to be ready
|
|
||||||
loop {
|
|
||||||
choose {
|
|
||||||
when(wait(delay(1))) {
|
|
||||||
printf("counts: ");
|
|
||||||
for (auto c : counts) {
|
|
||||||
printf("%d ", c);
|
|
||||||
}
|
|
||||||
printf(" pml: %s\n", pml->toString().c_str());
|
|
||||||
}
|
|
||||||
when(wait(f)) { break; }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
delete pml;
|
|
||||||
return Void();
|
|
||||||
}
|
|
||||||
|
|
|
@ -162,10 +162,7 @@ ACTOR Future<Void> loadManifest(Database db, Reference<BlobConnectionProvider> b
|
||||||
ACTOR Future<Void> printRestoreSummary(Database db, Reference<BlobConnectionProvider> blobConn);
|
ACTOR Future<Void> printRestoreSummary(Database db, Reference<BlobConnectionProvider> blobConn);
|
||||||
ACTOR Future<BlobGranuleRestoreVersionVector> listBlobGranules(Database db, Reference<BlobConnectionProvider> blobConn);
|
ACTOR Future<BlobGranuleRestoreVersionVector> listBlobGranules(Database db, Reference<BlobConnectionProvider> blobConn);
|
||||||
ACTOR Future<int64_t> lastBlobEpoc(Database db, Reference<BlobConnectionProvider> blobConn);
|
ACTOR Future<int64_t> lastBlobEpoc(Database db, Reference<BlobConnectionProvider> blobConn);
|
||||||
|
ACTOR Future<bool> isFullRestoreMode(Database db, KeyRangeRef range);
|
||||||
inline bool isFullRestoreMode() {
|
|
||||||
return SERVER_KNOBS->BLOB_FULL_RESTORE_MODE;
|
|
||||||
};
|
|
||||||
|
|
||||||
#include "flow/unactorcompiler.h"
|
#include "flow/unactorcompiler.h"
|
||||||
|
|
||||||
|
|
|
@ -30,6 +30,7 @@
|
||||||
struct BlobMigratorInterface {
|
struct BlobMigratorInterface {
|
||||||
constexpr static FileIdentifier file_identifier = 869199;
|
constexpr static FileIdentifier file_identifier = 869199;
|
||||||
RequestStream<struct HaltBlobMigratorRequest> haltBlobMigrator;
|
RequestStream<struct HaltBlobMigratorRequest> haltBlobMigrator;
|
||||||
|
RequestStream<ReplyPromise<Void>> waitFailure;
|
||||||
LocalityData locality;
|
LocalityData locality;
|
||||||
UID uniqueID;
|
UID uniqueID;
|
||||||
StorageServerInterface ssi;
|
StorageServerInterface ssi;
|
||||||
|
@ -48,7 +49,7 @@ struct BlobMigratorInterface {
|
||||||
|
|
||||||
template <class Archive>
|
template <class Archive>
|
||||||
void serialize(Archive& ar) {
|
void serialize(Archive& ar) {
|
||||||
serializer(ar, locality, uniqueID, haltBlobMigrator);
|
serializer(ar, locality, uniqueID, haltBlobMigrator, waitFailure);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -144,6 +144,7 @@ public:
|
||||||
Future<Void> clientCounter;
|
Future<Void> clientCounter;
|
||||||
int clientCount;
|
int clientCount;
|
||||||
AsyncVar<bool> blobGranulesEnabled;
|
AsyncVar<bool> blobGranulesEnabled;
|
||||||
|
AsyncVar<bool> blobRestoreEnabled;
|
||||||
ClusterType clusterType = ClusterType::STANDALONE;
|
ClusterType clusterType = ClusterType::STANDALONE;
|
||||||
Optional<ClusterName> metaclusterName;
|
Optional<ClusterName> metaclusterName;
|
||||||
Optional<MetaclusterRegistrationEntry> metaclusterRegistration;
|
Optional<MetaclusterRegistrationEntry> metaclusterRegistration;
|
||||||
|
@ -159,7 +160,7 @@ public:
|
||||||
TaskPriority::DefaultEndpoint,
|
TaskPriority::DefaultEndpoint,
|
||||||
LockAware::True)), // SOMEDAY: Locality!
|
LockAware::True)), // SOMEDAY: Locality!
|
||||||
unfinishedRecoveries(0), logGenerations(0), cachePopulated(false), clientCount(0),
|
unfinishedRecoveries(0), logGenerations(0), cachePopulated(false), clientCount(0),
|
||||||
blobGranulesEnabled(config.blobGranulesEnabled) {
|
blobGranulesEnabled(config.blobGranulesEnabled), blobRestoreEnabled(false) {
|
||||||
clientCounter = countClients(this);
|
clientCounter = countClients(this);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -60,6 +60,7 @@ class GrvProxyTagThrottler {
|
||||||
void setRate(double rate);
|
void setRate(double rate);
|
||||||
bool isMaxThrottled(double maxThrottleDuration) const;
|
bool isMaxThrottled(double maxThrottleDuration) const;
|
||||||
void rejectRequests(LatencyBandsMap&);
|
void rejectRequests(LatencyBandsMap&);
|
||||||
|
void endReleaseWindow(int64_t numStarted, double elapsed);
|
||||||
};
|
};
|
||||||
|
|
||||||
// Track the budgets for each tag
|
// Track the budgets for each tag
|
||||||
|
|
|
@ -55,7 +55,7 @@ public:
|
||||||
|
|
||||||
// Updates the budget to accumulate any extra capacity available or remove any excess that was used.
|
// Updates the budget to accumulate any extra capacity available or remove any excess that was used.
|
||||||
// Call at the end of a release window.
|
// Call at the end of a release window.
|
||||||
void endReleaseWindow(int64_t numStartedAtPriority, bool queueEmptyAtPriority, double elapsed);
|
void endReleaseWindow(int64_t numStarted, bool queueEmpty, double elapsed);
|
||||||
|
|
||||||
// Smoothly sets rate. If currently disabled, reenable
|
// Smoothly sets rate. If currently disabled, reenable
|
||||||
void setRate(double rate);
|
void setRate(double rate);
|
||||||
|
|
|
@ -35,8 +35,9 @@ typedef Map<KeyRef, Reference<TCTenantInfo>> TenantMapByPrefix;
|
||||||
struct Storage {
|
struct Storage {
|
||||||
int64_t quota = std::numeric_limits<int64_t>::max();
|
int64_t quota = std::numeric_limits<int64_t>::max();
|
||||||
int64_t usage = 0;
|
int64_t usage = 0;
|
||||||
|
std::unordered_set<TenantName> tenants;
|
||||||
};
|
};
|
||||||
typedef std::unordered_map<TenantName, Storage> TenantStorageMap;
|
typedef std::unordered_map<TenantGroupName, Storage> TenantStorageMap;
|
||||||
|
|
||||||
struct TenantCacheTenantCreated {
|
struct TenantCacheTenantCreated {
|
||||||
KeyRange keys;
|
KeyRange keys;
|
||||||
|
@ -56,7 +57,8 @@ private:
|
||||||
uint64_t generation;
|
uint64_t generation;
|
||||||
TenantMapByPrefix tenantCache;
|
TenantMapByPrefix tenantCache;
|
||||||
|
|
||||||
// Map from tenant names to storage quota and usage
|
// Map from tenant group names to the list of tenants, cumumlative storage used by
|
||||||
|
// all the tenants in the group, and its storage quota.
|
||||||
TenantStorageMap tenantStorageMap;
|
TenantStorageMap tenantStorageMap;
|
||||||
|
|
||||||
// mark the start of a new sweep of the tenant cache
|
// mark the start of a new sweep of the tenant cache
|
||||||
|
|
|
@ -435,6 +435,7 @@ struct StorageServerDisk {
|
||||||
// The following are pointers to the Counters in StorageServer::counters of the same names.
|
// The following are pointers to the Counters in StorageServer::counters of the same names.
|
||||||
Counter* kvCommitLogicalBytes;
|
Counter* kvCommitLogicalBytes;
|
||||||
Counter* kvClearRanges;
|
Counter* kvClearRanges;
|
||||||
|
Counter* kvClearSingleKey;
|
||||||
Counter* kvGets;
|
Counter* kvGets;
|
||||||
Counter* kvScans;
|
Counter* kvScans;
|
||||||
Counter* kvCommits;
|
Counter* kvCommits;
|
||||||
|
@ -1109,15 +1110,13 @@ public:
|
||||||
|
|
||||||
FlowLock serveFetchCheckpointParallelismLock;
|
FlowLock serveFetchCheckpointParallelismLock;
|
||||||
|
|
||||||
PriorityMultiLock ssLock;
|
Reference<PriorityMultiLock> ssLock;
|
||||||
std::vector<int> readPriorityRanks;
|
std::vector<int> readPriorityRanks;
|
||||||
|
|
||||||
Future<PriorityMultiLock::Lock> getReadLock(const Optional<ReadOptions>& options) {
|
Future<PriorityMultiLock::Lock> getReadLock(const Optional<ReadOptions>& options) {
|
||||||
// TODO: Fix perf regression in 100% cache read case where taking this lock adds too much overhead
|
int readType = (int)(options.present() ? options.get().type : ReadType::NORMAL);
|
||||||
return PriorityMultiLock::Lock();
|
readType = std::clamp<int>(readType, 0, readPriorityRanks.size() - 1);
|
||||||
// int readType = (int)(options.present() ? options.get().type : ReadType::NORMAL);
|
return ssLock->lock(readPriorityRanks[readType]);
|
||||||
// readType = std::clamp<int>(readType, 0, readPriorityRanks.size() - 1);
|
|
||||||
// return ssLock.lock(readPriorityRanks[readType]);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
FlowLock serveAuditStorageParallelismLock;
|
FlowLock serveAuditStorageParallelismLock;
|
||||||
|
@ -1172,6 +1171,8 @@ public:
|
||||||
Counter kvCommitLogicalBytes;
|
Counter kvCommitLogicalBytes;
|
||||||
// Count of all clearRange operatons to the storage engine.
|
// Count of all clearRange operatons to the storage engine.
|
||||||
Counter kvClearRanges;
|
Counter kvClearRanges;
|
||||||
|
// Count of all clearRange operations on a singlekeyRange(key delete) to the storage engine.
|
||||||
|
Counter kvClearSingleKey;
|
||||||
// ClearRange operations issued by FDB, instead of from users, e.g., ClearRange operations to remove a shard
|
// ClearRange operations issued by FDB, instead of from users, e.g., ClearRange operations to remove a shard
|
||||||
// from a storage server, as in removeDataRange().
|
// from a storage server, as in removeDataRange().
|
||||||
Counter kvSystemClearRanges;
|
Counter kvSystemClearRanges;
|
||||||
|
@ -1247,8 +1248,8 @@ public:
|
||||||
feedVersionQueries("FeedVersionQueries", cc), bytesInput("BytesInput", cc),
|
feedVersionQueries("FeedVersionQueries", cc), bytesInput("BytesInput", cc),
|
||||||
logicalBytesInput("LogicalBytesInput", cc), logicalBytesMoveInOverhead("LogicalBytesMoveInOverhead", cc),
|
logicalBytesInput("LogicalBytesInput", cc), logicalBytesMoveInOverhead("LogicalBytesMoveInOverhead", cc),
|
||||||
kvCommitLogicalBytes("KVCommitLogicalBytes", cc), kvClearRanges("KVClearRanges", cc),
|
kvCommitLogicalBytes("KVCommitLogicalBytes", cc), kvClearRanges("KVClearRanges", cc),
|
||||||
kvSystemClearRanges("KVSystemClearRanges", cc), bytesDurable("BytesDurable", cc),
|
kvClearSingleKey("KVClearSingleKey", cc), kvSystemClearRanges("KVSystemClearRanges", cc),
|
||||||
bytesFetched("BytesFetched", cc), mutationBytes("MutationBytes", cc),
|
bytesDurable("BytesDurable", cc), bytesFetched("BytesFetched", cc), mutationBytes("MutationBytes", cc),
|
||||||
feedBytesFetched("FeedBytesFetched", cc), sampledBytesCleared("SampledBytesCleared", cc),
|
feedBytesFetched("FeedBytesFetched", cc), sampledBytesCleared("SampledBytesCleared", cc),
|
||||||
kvFetched("KVFetched", cc), mutations("Mutations", cc), setMutations("SetMutations", cc),
|
kvFetched("KVFetched", cc), mutations("Mutations", cc), setMutations("SetMutations", cc),
|
||||||
clearRangeMutations("ClearRangeMutations", cc), atomicMutations("AtomicMutations", cc),
|
clearRangeMutations("ClearRangeMutations", cc), atomicMutations("AtomicMutations", cc),
|
||||||
|
@ -1404,7 +1405,8 @@ public:
|
||||||
fetchKeysParallelismFullLock(SERVER_KNOBS->FETCH_KEYS_PARALLELISM_FULL),
|
fetchKeysParallelismFullLock(SERVER_KNOBS->FETCH_KEYS_PARALLELISM_FULL),
|
||||||
fetchKeysBytesBudget(SERVER_KNOBS->STORAGE_FETCH_BYTES), fetchKeysBudgetUsed(false),
|
fetchKeysBytesBudget(SERVER_KNOBS->STORAGE_FETCH_BYTES), fetchKeysBudgetUsed(false),
|
||||||
serveFetchCheckpointParallelismLock(SERVER_KNOBS->SERVE_FETCH_CHECKPOINT_PARALLELISM),
|
serveFetchCheckpointParallelismLock(SERVER_KNOBS->SERVE_FETCH_CHECKPOINT_PARALLELISM),
|
||||||
ssLock(SERVER_KNOBS->STORAGE_SERVER_READ_CONCURRENCY, SERVER_KNOBS->STORAGESERVER_READ_PRIORITIES),
|
ssLock(makeReference<PriorityMultiLock>(SERVER_KNOBS->STORAGE_SERVER_READ_CONCURRENCY,
|
||||||
|
SERVER_KNOBS->STORAGESERVER_READ_PRIORITIES)),
|
||||||
serveAuditStorageParallelismLock(SERVER_KNOBS->SERVE_AUDIT_STORAGE_PARALLELISM),
|
serveAuditStorageParallelismLock(SERVER_KNOBS->SERVE_AUDIT_STORAGE_PARALLELISM),
|
||||||
instanceID(deterministicRandom()->randomUniqueID().first()), shuttingDown(false), behind(false),
|
instanceID(deterministicRandom()->randomUniqueID().first()), shuttingDown(false), behind(false),
|
||||||
versionBehind(false), debug_inApplyUpdate(false), debug_lastValidateTime(0), lastBytesInputEBrake(0),
|
versionBehind(false), debug_inApplyUpdate(false), debug_lastValidateTime(0), lastBytesInputEBrake(0),
|
||||||
|
@ -1412,7 +1414,7 @@ public:
|
||||||
busiestWriteTagContext(ssi.id()), counters(this),
|
busiestWriteTagContext(ssi.id()), counters(this),
|
||||||
storageServerSourceTLogIDEventHolder(
|
storageServerSourceTLogIDEventHolder(
|
||||||
makeReference<EventCacheHolder>(ssi.id().toString() + "/StorageServerSourceTLogID")) {
|
makeReference<EventCacheHolder>(ssi.id().toString() + "/StorageServerSourceTLogID")) {
|
||||||
readPriorityRanks = parseStringToVector<int>(SERVER_KNOBS->STORAGESERVER_READ_RANKS, ',');
|
readPriorityRanks = parseStringToVector<int>(SERVER_KNOBS->STORAGESERVER_READTYPE_PRIORITY_MAP, ',');
|
||||||
ASSERT(readPriorityRanks.size() > (int)ReadType::MAX);
|
ASSERT(readPriorityRanks.size() > (int)ReadType::MAX);
|
||||||
version.initMetric("StorageServer.Version"_sr, counters.cc.getId());
|
version.initMetric("StorageServer.Version"_sr, counters.cc.getId());
|
||||||
oldestVersion.initMetric("StorageServer.OldestVersion"_sr, counters.cc.getId());
|
oldestVersion.initMetric("StorageServer.OldestVersion"_sr, counters.cc.getId());
|
||||||
|
@ -1431,6 +1433,7 @@ public:
|
||||||
|
|
||||||
this->storage.kvCommitLogicalBytes = &counters.kvCommitLogicalBytes;
|
this->storage.kvCommitLogicalBytes = &counters.kvCommitLogicalBytes;
|
||||||
this->storage.kvClearRanges = &counters.kvClearRanges;
|
this->storage.kvClearRanges = &counters.kvClearRanges;
|
||||||
|
this->storage.kvClearSingleKey = &counters.kvClearSingleKey;
|
||||||
this->storage.kvGets = &counters.kvGets;
|
this->storage.kvGets = &counters.kvGets;
|
||||||
this->storage.kvScans = &counters.kvScans;
|
this->storage.kvScans = &counters.kvScans;
|
||||||
this->storage.kvCommits = &counters.kvCommits;
|
this->storage.kvCommits = &counters.kvCommits;
|
||||||
|
@ -4762,7 +4765,6 @@ ACTOR Future<Void> mapSubquery(StorageServer* data,
|
||||||
Arena* pArena,
|
Arena* pArena,
|
||||||
int matchIndex,
|
int matchIndex,
|
||||||
bool isRangeQuery,
|
bool isRangeQuery,
|
||||||
bool isBoundary,
|
|
||||||
KeyValueRef* it,
|
KeyValueRef* it,
|
||||||
MappedKeyValueRef* kvm,
|
MappedKeyValueRef* kvm,
|
||||||
Key mappedKey) {
|
Key mappedKey) {
|
||||||
|
@ -4770,31 +4772,42 @@ ACTOR Future<Void> mapSubquery(StorageServer* data,
|
||||||
// Use the mappedKey as the prefix of the range query.
|
// Use the mappedKey as the prefix of the range query.
|
||||||
GetRangeReqAndResultRef getRange = wait(quickGetKeyValues(data, mappedKey, version, pArena, pOriginalReq));
|
GetRangeReqAndResultRef getRange = wait(quickGetKeyValues(data, mappedKey, version, pArena, pOriginalReq));
|
||||||
if ((!getRange.result.empty() && matchIndex == MATCH_INDEX_MATCHED_ONLY) ||
|
if ((!getRange.result.empty() && matchIndex == MATCH_INDEX_MATCHED_ONLY) ||
|
||||||
(getRange.result.empty() && matchIndex == MATCH_INDEX_UNMATCHED_ONLY)) {
|
(getRange.result.empty() && matchIndex == MATCH_INDEX_UNMATCHED_ONLY) || matchIndex == MATCH_INDEX_ALL) {
|
||||||
kvm->key = it->key;
|
kvm->key = it->key;
|
||||||
kvm->value = it->value;
|
kvm->value = it->value;
|
||||||
}
|
}
|
||||||
|
|
||||||
kvm->boundaryAndExist = isBoundary && !getRange.result.empty();
|
|
||||||
kvm->reqAndResult = getRange;
|
kvm->reqAndResult = getRange;
|
||||||
} else {
|
} else {
|
||||||
GetValueReqAndResultRef getValue = wait(quickGetValue(data, mappedKey, version, pArena, pOriginalReq));
|
GetValueReqAndResultRef getValue = wait(quickGetValue(data, mappedKey, version, pArena, pOriginalReq));
|
||||||
kvm->reqAndResult = getValue;
|
kvm->reqAndResult = getValue;
|
||||||
kvm->boundaryAndExist = isBoundary && getValue.result.present();
|
|
||||||
}
|
}
|
||||||
return Void();
|
return Void();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int getMappedKeyValueSize(MappedKeyValueRef mappedKeyValue) {
|
||||||
|
auto& reqAndResult = mappedKeyValue.reqAndResult;
|
||||||
|
int bytes = 0;
|
||||||
|
if (std::holds_alternative<GetValueReqAndResultRef>(reqAndResult)) {
|
||||||
|
const auto& getValue = std::get<GetValueReqAndResultRef>(reqAndResult);
|
||||||
|
bytes = getValue.expectedSize();
|
||||||
|
} else if (std::holds_alternative<GetRangeReqAndResultRef>(reqAndResult)) {
|
||||||
|
const auto& getRange = std::get<GetRangeReqAndResultRef>(reqAndResult);
|
||||||
|
bytes = getRange.result.expectedSize();
|
||||||
|
} else {
|
||||||
|
throw internal_error();
|
||||||
|
}
|
||||||
|
return bytes;
|
||||||
|
}
|
||||||
|
|
||||||
ACTOR Future<GetMappedKeyValuesReply> mapKeyValues(StorageServer* data,
|
ACTOR Future<GetMappedKeyValuesReply> mapKeyValues(StorageServer* data,
|
||||||
GetKeyValuesReply input,
|
GetKeyValuesReply input,
|
||||||
StringRef mapper,
|
StringRef mapper,
|
||||||
// To provide span context, tags, debug ID to underlying lookups.
|
// To provide span context, tags, debug ID to underlying lookups.
|
||||||
GetMappedKeyValuesRequest* pOriginalReq,
|
GetMappedKeyValuesRequest* pOriginalReq,
|
||||||
Optional<Key> tenantPrefix,
|
int matchIndex,
|
||||||
int matchIndex) {
|
int* remainingLimitBytes) {
|
||||||
state GetMappedKeyValuesReply result;
|
state GetMappedKeyValuesReply result;
|
||||||
result.version = input.version;
|
result.version = input.version;
|
||||||
result.more = input.more;
|
|
||||||
result.cached = input.cached;
|
result.cached = input.cached;
|
||||||
result.arena.dependsOn(input.arena);
|
result.arena.dependsOn(input.arena);
|
||||||
|
|
||||||
|
@ -4823,22 +4836,15 @@ ACTOR Future<GetMappedKeyValuesReply> mapKeyValues(StorageServer* data,
|
||||||
g_traceBatch.addEvent("TransactionDebug",
|
g_traceBatch.addEvent("TransactionDebug",
|
||||||
pOriginalReq->options.get().debugID.get().first(),
|
pOriginalReq->options.get().debugID.get().first(),
|
||||||
"storageserver.mapKeyValues.BeforeLoop");
|
"storageserver.mapKeyValues.BeforeLoop");
|
||||||
for (; offset < sz; offset += SERVER_KNOBS->MAX_PARALLEL_QUICK_GET_VALUE) {
|
|
||||||
|
for (; offset<sz&& * remainingLimitBytes> 0; offset += SERVER_KNOBS->MAX_PARALLEL_QUICK_GET_VALUE) {
|
||||||
// Divide into batches of MAX_PARALLEL_QUICK_GET_VALUE subqueries
|
// Divide into batches of MAX_PARALLEL_QUICK_GET_VALUE subqueries
|
||||||
for (int i = 0; i + offset < sz && i < SERVER_KNOBS->MAX_PARALLEL_QUICK_GET_VALUE; i++) {
|
for (int i = 0; i + offset < sz && i < SERVER_KNOBS->MAX_PARALLEL_QUICK_GET_VALUE; i++) {
|
||||||
KeyValueRef* it = &input.data[i + offset];
|
KeyValueRef* it = &input.data[i + offset];
|
||||||
MappedKeyValueRef* kvm = &kvms[i];
|
MappedKeyValueRef* kvm = &kvms[i];
|
||||||
bool isBoundary = (i + offset) == 0 || (i + offset) == sz - 1;
|
// Clear key value to the default.
|
||||||
// need to keep the boundary, so that caller can use it as a continuation.
|
kvm->key = ""_sr;
|
||||||
if (isBoundary || matchIndex == MATCH_INDEX_ALL) {
|
kvm->value = ""_sr;
|
||||||
kvm->key = it->key;
|
|
||||||
kvm->value = it->value;
|
|
||||||
} else {
|
|
||||||
// Clear key value to the default.
|
|
||||||
kvm->key = ""_sr;
|
|
||||||
kvm->value = ""_sr;
|
|
||||||
}
|
|
||||||
|
|
||||||
Key mappedKey = constructMappedKey(it, vt, mappedKeyFormatTuple);
|
Key mappedKey = constructMappedKey(it, vt, mappedKeyFormatTuple);
|
||||||
// Make sure the mappedKey is always available, so that it's good even we want to get key asynchronously.
|
// Make sure the mappedKey is always available, so that it's good even we want to get key asynchronously.
|
||||||
result.arena.dependsOn(mappedKey.arena());
|
result.arena.dependsOn(mappedKey.arena());
|
||||||
|
@ -4846,16 +4852,8 @@ ACTOR Future<GetMappedKeyValuesReply> mapKeyValues(StorageServer* data,
|
||||||
// std::cout << "key:" << printable(kvm->key) << ", value:" << printable(kvm->value)
|
// std::cout << "key:" << printable(kvm->key) << ", value:" << printable(kvm->value)
|
||||||
// << ", mappedKey:" << printable(mappedKey) << std::endl;
|
// << ", mappedKey:" << printable(mappedKey) << std::endl;
|
||||||
|
|
||||||
subqueries.push_back(mapSubquery(data,
|
subqueries.push_back(mapSubquery(
|
||||||
input.version,
|
data, input.version, pOriginalReq, &result.arena, matchIndex, isRangeQuery, it, kvm, mappedKey));
|
||||||
pOriginalReq,
|
|
||||||
&result.arena,
|
|
||||||
matchIndex,
|
|
||||||
isRangeQuery,
|
|
||||||
isBoundary,
|
|
||||||
it,
|
|
||||||
kvm,
|
|
||||||
mappedKey));
|
|
||||||
}
|
}
|
||||||
wait(waitForAll(subqueries));
|
wait(waitForAll(subqueries));
|
||||||
if (pOriginalReq->options.present() && pOriginalReq->options.get().debugID.present())
|
if (pOriginalReq->options.present() && pOriginalReq->options.get().debugID.present())
|
||||||
|
@ -4864,9 +4862,31 @@ ACTOR Future<GetMappedKeyValuesReply> mapKeyValues(StorageServer* data,
|
||||||
"storageserver.mapKeyValues.AfterBatch");
|
"storageserver.mapKeyValues.AfterBatch");
|
||||||
subqueries.clear();
|
subqueries.clear();
|
||||||
for (int i = 0; i + offset < sz && i < SERVER_KNOBS->MAX_PARALLEL_QUICK_GET_VALUE; i++) {
|
for (int i = 0; i + offset < sz && i < SERVER_KNOBS->MAX_PARALLEL_QUICK_GET_VALUE; i++) {
|
||||||
|
// since we always read the index, so always consider the index size
|
||||||
|
int indexSize = sizeof(KeyValueRef) + input.data[i + offset].expectedSize();
|
||||||
|
int size = indexSize + getMappedKeyValueSize(kvms[i]);
|
||||||
|
*remainingLimitBytes -= size;
|
||||||
result.data.push_back(result.arena, kvms[i]);
|
result.data.push_back(result.arena, kvms[i]);
|
||||||
|
if (SERVER_KNOBS->STRICTLY_ENFORCE_BYTE_LIMIT && *remainingLimitBytes <= 0) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int resultSize = result.data.size();
|
||||||
|
if (resultSize > 0) {
|
||||||
|
// keep index for boundary index entries, so that caller can use it as a continuation.
|
||||||
|
result.data[0].key = input.data[0].key;
|
||||||
|
result.data[0].value = input.data[0].value;
|
||||||
|
result.data[0].boundaryAndExist = getMappedKeyValueSize(kvms[0]) > 0;
|
||||||
|
|
||||||
|
result.data.back().key = input.data[resultSize - 1].key;
|
||||||
|
result.data.back().value = input.data[resultSize - 1].value;
|
||||||
|
// index needs to be -1
|
||||||
|
int index = (resultSize - 1) % SERVER_KNOBS->MAX_PARALLEL_QUICK_GET_VALUE;
|
||||||
|
result.data.back().boundaryAndExist = getMappedKeyValueSize(kvms[index]) > 0;
|
||||||
|
}
|
||||||
|
result.more = input.more || resultSize < sz;
|
||||||
if (pOriginalReq->options.present() && pOriginalReq->options.get().debugID.present())
|
if (pOriginalReq->options.present() && pOriginalReq->options.get().debugID.present())
|
||||||
g_traceBatch.addEvent("TransactionDebug",
|
g_traceBatch.addEvent("TransactionDebug",
|
||||||
pOriginalReq->options.get().debugID.get().first(),
|
pOriginalReq->options.get().debugID.get().first(),
|
||||||
|
@ -5121,12 +5141,15 @@ ACTOR Future<Void> getMappedKeyValuesQ(StorageServer* data, GetMappedKeyValuesRe
|
||||||
req.reply.send(none);
|
req.reply.send(none);
|
||||||
} else {
|
} else {
|
||||||
state int remainingLimitBytes = req.limitBytes;
|
state int remainingLimitBytes = req.limitBytes;
|
||||||
|
// create a temporary byte limit for index fetching ONLY, this should be excessive
|
||||||
|
// because readRange is cheap when reading additional bytes
|
||||||
|
state int bytesForIndex =
|
||||||
|
std::min(req.limitBytes, (int)(req.limitBytes * SERVER_KNOBS->FRACTION_INDEX_BYTELIMIT_PREFETCH));
|
||||||
GetKeyValuesReply getKeyValuesReply = wait(readRange(data,
|
GetKeyValuesReply getKeyValuesReply = wait(readRange(data,
|
||||||
version,
|
version,
|
||||||
KeyRangeRef(begin, end),
|
KeyRangeRef(begin, end),
|
||||||
req.limit,
|
req.limit,
|
||||||
&remainingLimitBytes,
|
&bytesForIndex,
|
||||||
span.context,
|
span.context,
|
||||||
req.options,
|
req.options,
|
||||||
tenantPrefix));
|
tenantPrefix));
|
||||||
|
@ -5140,9 +5163,10 @@ ACTOR Future<Void> getMappedKeyValuesQ(StorageServer* data, GetMappedKeyValuesRe
|
||||||
try {
|
try {
|
||||||
// Map the scanned range to another list of keys and look up.
|
// Map the scanned range to another list of keys and look up.
|
||||||
GetMappedKeyValuesReply _r =
|
GetMappedKeyValuesReply _r =
|
||||||
wait(mapKeyValues(data, getKeyValuesReply, req.mapper, &req, tenantPrefix, req.matchIndex));
|
wait(mapKeyValues(data, getKeyValuesReply, req.mapper, &req, req.matchIndex, &remainingLimitBytes));
|
||||||
r = _r;
|
r = _r;
|
||||||
} catch (Error& e) {
|
} catch (Error& e) {
|
||||||
|
// catch txn_too_old here if prefetch runs for too long, and returns it back to client
|
||||||
TraceEvent("MapError").error(e);
|
TraceEvent("MapError").error(e);
|
||||||
throw;
|
throw;
|
||||||
}
|
}
|
||||||
|
@ -6138,6 +6162,7 @@ ACTOR Future<Standalone<VectorRef<BlobGranuleChunkRef>>> tryReadBlobGranules(Tra
|
||||||
loop {
|
loop {
|
||||||
try {
|
try {
|
||||||
Standalone<VectorRef<BlobGranuleChunkRef>> chunks = wait(tr->readBlobGranules(keys, 0, readVersion));
|
Standalone<VectorRef<BlobGranuleChunkRef>> chunks = wait(tr->readBlobGranules(keys, 0, readVersion));
|
||||||
|
TraceEvent(SevDebug, "ReadBlobGranules").detail("Keys", keys).detail("Chunks", chunks.size());
|
||||||
return chunks;
|
return chunks;
|
||||||
} catch (Error& e) {
|
} catch (Error& e) {
|
||||||
if (retryCount >= maxRetryCount) {
|
if (retryCount >= maxRetryCount) {
|
||||||
|
@ -6169,10 +6194,7 @@ ACTOR Future<Void> tryGetRangeFromBlob(PromiseStream<RangeResult> results,
|
||||||
for (i = 0; i < chunks.size(); ++i) {
|
for (i = 0; i < chunks.size(); ++i) {
|
||||||
state KeyRangeRef chunkRange = chunks[i].keyRange;
|
state KeyRangeRef chunkRange = chunks[i].keyRange;
|
||||||
state RangeResult rows = wait(readBlobGranule(chunks[i], keys, 0, fetchVersion, blobConn));
|
state RangeResult rows = wait(readBlobGranule(chunks[i], keys, 0, fetchVersion, blobConn));
|
||||||
TraceEvent("ReadBlobData")
|
TraceEvent(SevDebug, "ReadBlobData").detail("Rows", rows.size()).detail("ChunkRange", chunkRange);
|
||||||
.detail("Rows", rows.size())
|
|
||||||
.detail("ChunkRange", chunkRange.toString())
|
|
||||||
.detail("Keys", keys.toString());
|
|
||||||
if (rows.size() == 0) {
|
if (rows.size() == 0) {
|
||||||
rows.readThrough = KeyRef(rows.arena(), std::min(chunkRange.end, keys.end));
|
rows.readThrough = KeyRef(rows.arena(), std::min(chunkRange.end, keys.end));
|
||||||
}
|
}
|
||||||
|
@ -6185,7 +6207,7 @@ ACTOR Future<Void> tryGetRangeFromBlob(PromiseStream<RangeResult> results,
|
||||||
} catch (Error& e) {
|
} catch (Error& e) {
|
||||||
TraceEvent(SevWarn, "ReadBlobDataFailure")
|
TraceEvent(SevWarn, "ReadBlobDataFailure")
|
||||||
.suppressFor(5.0)
|
.suppressFor(5.0)
|
||||||
.detail("Keys", keys.toString())
|
.detail("Keys", keys)
|
||||||
.detail("FetchVersion", fetchVersion)
|
.detail("FetchVersion", fetchVersion)
|
||||||
.detail("Error", e.what());
|
.detail("Error", e.what());
|
||||||
tr->reset();
|
tr->reset();
|
||||||
|
@ -6994,7 +7016,8 @@ ACTOR Future<Void> fetchKeys(StorageServer* data, AddingShard* shard) {
|
||||||
// We must also ensure we have fetched all change feed metadata BEFORE changing the phase to fetching to ensure
|
// We must also ensure we have fetched all change feed metadata BEFORE changing the phase to fetching to ensure
|
||||||
// change feed mutations get applied correctly
|
// change feed mutations get applied correctly
|
||||||
state std::vector<Key> changeFeedsToFetch;
|
state std::vector<Key> changeFeedsToFetch;
|
||||||
if (!isFullRestoreMode()) {
|
state bool isFullRestore = wait(isFullRestoreMode(data->cx, keys));
|
||||||
|
if (!isFullRestore) {
|
||||||
std::vector<Key> _cfToFetch = wait(fetchCFMetadata);
|
std::vector<Key> _cfToFetch = wait(fetchCFMetadata);
|
||||||
changeFeedsToFetch = _cfToFetch;
|
changeFeedsToFetch = _cfToFetch;
|
||||||
}
|
}
|
||||||
|
@ -7072,7 +7095,7 @@ ACTOR Future<Void> fetchKeys(StorageServer* data, AddingShard* shard) {
|
||||||
|
|
||||||
state PromiseStream<RangeResult> results;
|
state PromiseStream<RangeResult> results;
|
||||||
state Future<Void> hold;
|
state Future<Void> hold;
|
||||||
if (SERVER_KNOBS->FETCH_USING_BLOB) {
|
if (isFullRestore) {
|
||||||
hold = tryGetRangeFromBlob(results, &tr, keys, fetchVersion, data->blobConn);
|
hold = tryGetRangeFromBlob(results, &tr, keys, fetchVersion, data->blobConn);
|
||||||
} else {
|
} else {
|
||||||
hold = tryGetRange(results, &tr, keys);
|
hold = tryGetRange(results, &tr, keys);
|
||||||
|
@ -7110,7 +7133,6 @@ ACTOR Future<Void> fetchKeys(StorageServer* data, AddingShard* shard) {
|
||||||
data->thisServerID);
|
data->thisServerID);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
metricReporter.addFetchedBytes(expectedBlockSize, this_block.size());
|
metricReporter.addFetchedBytes(expectedBlockSize, this_block.size());
|
||||||
|
|
||||||
// Write this_block to storage
|
// Write this_block to storage
|
||||||
|
@ -9703,6 +9725,9 @@ void setAssignedStatus(StorageServer* self, KeyRangeRef keys, bool nowAssigned)
|
||||||
void StorageServerDisk::clearRange(KeyRangeRef keys) {
|
void StorageServerDisk::clearRange(KeyRangeRef keys) {
|
||||||
storage->clear(keys, &data->metrics);
|
storage->clear(keys, &data->metrics);
|
||||||
++(*kvClearRanges);
|
++(*kvClearRanges);
|
||||||
|
if (keys.singleKeyRange()) {
|
||||||
|
++(*kvClearSingleKey);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void StorageServerDisk::writeKeyValue(KeyValueRef kv) {
|
void StorageServerDisk::writeKeyValue(KeyValueRef kv) {
|
||||||
|
@ -9717,6 +9742,9 @@ void StorageServerDisk::writeMutation(MutationRef mutation) {
|
||||||
} else if (mutation.type == MutationRef::ClearRange) {
|
} else if (mutation.type == MutationRef::ClearRange) {
|
||||||
storage->clear(KeyRangeRef(mutation.param1, mutation.param2), &data->metrics);
|
storage->clear(KeyRangeRef(mutation.param1, mutation.param2), &data->metrics);
|
||||||
++(*kvClearRanges);
|
++(*kvClearRanges);
|
||||||
|
if (KeyRangeRef(mutation.param1, mutation.param2).singleKeyRange()) {
|
||||||
|
++(*kvClearSingleKey);
|
||||||
|
}
|
||||||
} else
|
} else
|
||||||
ASSERT(false);
|
ASSERT(false);
|
||||||
}
|
}
|
||||||
|
@ -9732,6 +9760,9 @@ void StorageServerDisk::writeMutations(const VectorRef<MutationRef>& mutations,
|
||||||
} else if (m.type == MutationRef::ClearRange) {
|
} else if (m.type == MutationRef::ClearRange) {
|
||||||
storage->clear(KeyRangeRef(m.param1, m.param2), &data->metrics);
|
storage->clear(KeyRangeRef(m.param1, m.param2), &data->metrics);
|
||||||
++(*kvClearRanges);
|
++(*kvClearRanges);
|
||||||
|
if (KeyRangeRef(m.param1, m.param2).singleKeyRange()) {
|
||||||
|
++(*kvClearSingleKey);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -10399,20 +10430,20 @@ ACTOR Future<Void> metricsCore(StorageServer* self, StorageServerInterface ssi)
|
||||||
te.detail("StorageEngine", self->storage.getKeyValueStoreType().toString());
|
te.detail("StorageEngine", self->storage.getKeyValueStoreType().toString());
|
||||||
te.detail("Tag", self->tag.toString());
|
te.detail("Tag", self->tag.toString());
|
||||||
std::vector<int> rpr = self->readPriorityRanks;
|
std::vector<int> rpr = self->readPriorityRanks;
|
||||||
te.detail("ReadsActive", self->ssLock.totalRunners());
|
te.detail("ReadsTotalActive", self->ssLock->getRunnersCount());
|
||||||
te.detail("ReadsWaiting", self->ssLock.totalWaiters());
|
te.detail("ReadsTotalWaiting", self->ssLock->getWaitersCount());
|
||||||
int type = (int)ReadType::FETCH;
|
int type = (int)ReadType::FETCH;
|
||||||
te.detail("ReadFetchActive", self->ssLock.numRunners(rpr[type]));
|
te.detail("ReadFetchActive", self->ssLock->getRunnersCount(rpr[type]));
|
||||||
te.detail("ReadFetchWaiting", self->ssLock.numWaiters(rpr[type]));
|
te.detail("ReadFetchWaiting", self->ssLock->getWaitersCount(rpr[type]));
|
||||||
type = (int)ReadType::LOW;
|
type = (int)ReadType::LOW;
|
||||||
te.detail("ReadLowActive", self->ssLock.numRunners(rpr[type]));
|
te.detail("ReadLowActive", self->ssLock->getRunnersCount(rpr[type]));
|
||||||
te.detail("ReadLowWaiting", self->ssLock.numWaiters(rpr[type]));
|
te.detail("ReadLowWaiting", self->ssLock->getWaitersCount(rpr[type]));
|
||||||
type = (int)ReadType::NORMAL;
|
type = (int)ReadType::NORMAL;
|
||||||
te.detail("ReadNormalActive", self->ssLock.numRunners(rpr[type]));
|
te.detail("ReadNormalActive", self->ssLock->getRunnersCount(rpr[type]));
|
||||||
te.detail("ReadNormalWaiting", self->ssLock.numWaiters(rpr[type]));
|
te.detail("ReadNormalWaiting", self->ssLock->getWaitersCount(rpr[type]));
|
||||||
type = (int)ReadType::HIGH;
|
type = (int)ReadType::HIGH;
|
||||||
te.detail("ReadHighActive", self->ssLock.numRunners(rpr[type]));
|
te.detail("ReadHighActive", self->ssLock->getRunnersCount(rpr[type]));
|
||||||
te.detail("ReadHighWaiting", self->ssLock.numWaiters(rpr[type]));
|
te.detail("ReadHighWaiting", self->ssLock->getWaitersCount(rpr[type]));
|
||||||
StorageBytes sb = self->storage.getStorageBytes();
|
StorageBytes sb = self->storage.getStorageBytes();
|
||||||
te.detail("KvstoreBytesUsed", sb.used);
|
te.detail("KvstoreBytesUsed", sb.used);
|
||||||
te.detail("KvstoreBytesFree", sb.free);
|
te.detail("KvstoreBytesFree", sb.free);
|
||||||
|
@ -11228,7 +11259,7 @@ ACTOR Future<Void> storageServer(IKeyValueStore* persistentData,
|
||||||
// If the storage server dies while something that uses self is still on the stack,
|
// If the storage server dies while something that uses self is still on the stack,
|
||||||
// we want that actor to complete before we terminate and that memory goes out of scope
|
// we want that actor to complete before we terminate and that memory goes out of scope
|
||||||
|
|
||||||
self.ssLock.kill();
|
self.ssLock->kill();
|
||||||
|
|
||||||
state Error err = e;
|
state Error err = e;
|
||||||
if (storageServerTerminated(self, persistentData, err)) {
|
if (storageServerTerminated(self, persistentData, err)) {
|
||||||
|
@ -11326,7 +11357,7 @@ ACTOR Future<Void> storageServer(IKeyValueStore* persistentData,
|
||||||
throw internal_error();
|
throw internal_error();
|
||||||
} catch (Error& e) {
|
} catch (Error& e) {
|
||||||
|
|
||||||
self.ssLock.kill();
|
self.ssLock->kill();
|
||||||
|
|
||||||
if (self.byteSampleRecovery.isValid()) {
|
if (self.byteSampleRecovery.isValid()) {
|
||||||
self.byteSampleRecovery.cancel();
|
self.byteSampleRecovery.cancel();
|
||||||
|
|
|
@ -2335,6 +2335,7 @@ ACTOR Future<Void> workerServer(Reference<IClusterConnectionRecord> connRecord,
|
||||||
} else {
|
} else {
|
||||||
startRole(Role::BLOB_MIGRATOR, recruited.id(), interf.id());
|
startRole(Role::BLOB_MIGRATOR, recruited.id(), interf.id());
|
||||||
DUMPTOKEN(recruited.haltBlobMigrator);
|
DUMPTOKEN(recruited.haltBlobMigrator);
|
||||||
|
DUMPTOKEN(recruited.waitFailure);
|
||||||
DUMPTOKEN(recruited.ssi.getValue);
|
DUMPTOKEN(recruited.ssi.getValue);
|
||||||
DUMPTOKEN(recruited.ssi.getKey);
|
DUMPTOKEN(recruited.ssi.getKey);
|
||||||
DUMPTOKEN(recruited.ssi.getKeyValues);
|
DUMPTOKEN(recruited.ssi.getKeyValues);
|
||||||
|
@ -2345,7 +2346,6 @@ ACTOR Future<Void> workerServer(Reference<IClusterConnectionRecord> connRecord,
|
||||||
DUMPTOKEN(recruited.ssi.getReadHotRanges);
|
DUMPTOKEN(recruited.ssi.getReadHotRanges);
|
||||||
DUMPTOKEN(recruited.ssi.getRangeSplitPoints);
|
DUMPTOKEN(recruited.ssi.getRangeSplitPoints);
|
||||||
DUMPTOKEN(recruited.ssi.getStorageMetrics);
|
DUMPTOKEN(recruited.ssi.getStorageMetrics);
|
||||||
DUMPTOKEN(recruited.ssi.waitFailure);
|
|
||||||
DUMPTOKEN(recruited.ssi.getQueuingMetrics);
|
DUMPTOKEN(recruited.ssi.getQueuingMetrics);
|
||||||
DUMPTOKEN(recruited.ssi.getKeyValueStoreType);
|
DUMPTOKEN(recruited.ssi.getKeyValueStoreType);
|
||||||
DUMPTOKEN(recruited.ssi.watchValue);
|
DUMPTOKEN(recruited.ssi.watchValue);
|
||||||
|
|
|
@ -20,7 +20,9 @@
|
||||||
|
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
|
|
||||||
|
#include "fdbclient/Tenant.h"
|
||||||
#include "fdbclient/TenantManagement.actor.h"
|
#include "fdbclient/TenantManagement.actor.h"
|
||||||
|
#include "fdbserver/Knobs.h"
|
||||||
#include "fdbserver/workloads/workloads.actor.h"
|
#include "fdbserver/workloads/workloads.actor.h"
|
||||||
|
|
||||||
#include "flow/actorcompiler.h" // This must be the last #include.
|
#include "flow/actorcompiler.h" // This must be the last #include.
|
||||||
|
@ -28,9 +30,13 @@
|
||||||
struct CreateTenantWorkload : TestWorkload {
|
struct CreateTenantWorkload : TestWorkload {
|
||||||
static constexpr auto NAME = "CreateTenant";
|
static constexpr auto NAME = "CreateTenant";
|
||||||
TenantName tenant;
|
TenantName tenant;
|
||||||
|
Optional<TenantGroupName> tenantGroup;
|
||||||
|
|
||||||
CreateTenantWorkload(WorkloadContext const& wcx) : TestWorkload(wcx) {
|
CreateTenantWorkload(WorkloadContext const& wcx) : TestWorkload(wcx) {
|
||||||
tenant = getOption(options, "name"_sr, "DefaultTenant"_sr);
|
tenant = getOption(options, "name"_sr, "DefaultTenant"_sr);
|
||||||
|
if (hasOption(options, "group"_sr)) {
|
||||||
|
tenantGroup = getOption(options, "group"_sr, "DefaultGroup"_sr);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Future<Void> setup(Database const& cx) override {
|
Future<Void> setup(Database const& cx) override {
|
||||||
|
@ -46,7 +52,12 @@ struct CreateTenantWorkload : TestWorkload {
|
||||||
|
|
||||||
ACTOR static Future<Void> _setup(CreateTenantWorkload* self, Database db) {
|
ACTOR static Future<Void> _setup(CreateTenantWorkload* self, Database db) {
|
||||||
try {
|
try {
|
||||||
Optional<TenantMapEntry> entry = wait(TenantAPI::createTenant(db.getReference(), self->tenant));
|
TenantMapEntry givenEntry;
|
||||||
|
if (self->tenantGroup.present()) {
|
||||||
|
givenEntry.tenantGroup = self->tenantGroup.get();
|
||||||
|
givenEntry.encrypted = SERVER_KNOBS->ENABLE_ENCRYPTION;
|
||||||
|
}
|
||||||
|
Optional<TenantMapEntry> entry = wait(TenantAPI::createTenant(db.getReference(), self->tenant, givenEntry));
|
||||||
ASSERT(entry.present());
|
ASSERT(entry.present());
|
||||||
} catch (Error& e) {
|
} catch (Error& e) {
|
||||||
TraceEvent(SevError, "TenantCreationFailed").error(e);
|
TraceEvent(SevError, "TenantCreationFailed").error(e);
|
||||||
|
|
|
@ -38,6 +38,8 @@ const KeyRef prefix = "prefix"_sr;
|
||||||
const KeyRef RECORD = "RECORD"_sr;
|
const KeyRef RECORD = "RECORD"_sr;
|
||||||
const KeyRef INDEX = "INDEX"_sr;
|
const KeyRef INDEX = "INDEX"_sr;
|
||||||
|
|
||||||
|
int recordSize;
|
||||||
|
int indexSize;
|
||||||
struct GetMappedRangeWorkload : ApiWorkload {
|
struct GetMappedRangeWorkload : ApiWorkload {
|
||||||
static constexpr auto NAME = "GetMappedRange";
|
static constexpr auto NAME = "GetMappedRange";
|
||||||
bool enabled;
|
bool enabled;
|
||||||
|
@ -93,19 +95,32 @@ struct GetMappedRangeWorkload : ApiWorkload {
|
||||||
loop {
|
loop {
|
||||||
std::cout << "start fillInRecords n=" << n << std::endl;
|
std::cout << "start fillInRecords n=" << n << std::endl;
|
||||||
// TODO: When n is large, split into multiple transactions.
|
// TODO: When n is large, split into multiple transactions.
|
||||||
|
recordSize = 0;
|
||||||
|
indexSize = 0;
|
||||||
try {
|
try {
|
||||||
for (int i = 0; i < n; i++) {
|
for (int i = 0; i < n; i++) {
|
||||||
if (self->SPLIT_RECORDS) {
|
if (self->SPLIT_RECORDS) {
|
||||||
for (int split = 0; split < SPLIT_SIZE; split++) {
|
for (int split = 0; split < SPLIT_SIZE; split++) {
|
||||||
tr.set(recordKey(i, split), recordValue(i, split));
|
tr.set(recordKey(i, split), recordValue(i, split));
|
||||||
|
if (i == 0) {
|
||||||
|
recordSize +=
|
||||||
|
recordKey(i, split).size() + recordValue(i, split).size() + sizeof(KeyValueRef);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
tr.set(recordKey(i), recordValue(i));
|
tr.set(recordKey(i), recordValue(i));
|
||||||
|
if (i == 0) {
|
||||||
|
recordSize += recordKey(i).size() + recordValue(i).size() + sizeof(KeyValueRef);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
tr.set(indexEntryKey(i), EMPTY);
|
tr.set(indexEntryKey(i), EMPTY);
|
||||||
|
if (i == 0) {
|
||||||
|
indexSize += indexEntryKey(i).size() + sizeof(KeyValueRef);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
wait(tr.commit());
|
wait(tr.commit());
|
||||||
std::cout << "finished fillInRecords with version " << tr.getCommittedVersion() << std::endl;
|
std::cout << "finished fillInRecords with version " << tr.getCommittedVersion() << " recordSize "
|
||||||
|
<< recordSize << " indexSize " << indexSize << std::endl;
|
||||||
break;
|
break;
|
||||||
} catch (Error& e) {
|
} catch (Error& e) {
|
||||||
std::cout << "failed fillInRecords, retry" << std::endl;
|
std::cout << "failed fillInRecords, retry" << std::endl;
|
||||||
|
@ -146,8 +161,9 @@ struct GetMappedRangeWorkload : ApiWorkload {
|
||||||
int matchIndex,
|
int matchIndex,
|
||||||
bool isBoundary,
|
bool isBoundary,
|
||||||
bool allMissing) {
|
bool allMissing) {
|
||||||
// std::cout << "validateRecord expectedId " << expectedId << " it->key " << printable(it->key) << "
|
// std::cout << "validateRecord expectedId " << expectedId << " it->key " << printable(it->key)
|
||||||
// indexEntryKey(expectedId) " << printable(indexEntryKey(expectedId)) << std::endl;
|
// << " indexEntryKey(expectedId) " << printable(indexEntryKey(expectedId))
|
||||||
|
// << " matchIndex: " << matchIndex << std::endl;
|
||||||
if (matchIndex == MATCH_INDEX_ALL || isBoundary) {
|
if (matchIndex == MATCH_INDEX_ALL || isBoundary) {
|
||||||
ASSERT(it->key == indexEntryKey(expectedId));
|
ASSERT(it->key == indexEntryKey(expectedId));
|
||||||
} else if (matchIndex == MATCH_INDEX_MATCHED_ONLY) {
|
} else if (matchIndex == MATCH_INDEX_MATCHED_ONLY) {
|
||||||
|
@ -163,7 +179,6 @@ struct GetMappedRangeWorkload : ApiWorkload {
|
||||||
ASSERT(std::holds_alternative<GetRangeReqAndResultRef>(it->reqAndResult));
|
ASSERT(std::holds_alternative<GetRangeReqAndResultRef>(it->reqAndResult));
|
||||||
auto& getRange = std::get<GetRangeReqAndResultRef>(it->reqAndResult);
|
auto& getRange = std::get<GetRangeReqAndResultRef>(it->reqAndResult);
|
||||||
auto& rangeResult = getRange.result;
|
auto& rangeResult = getRange.result;
|
||||||
ASSERT(it->boundaryAndExist == (isBoundary && !rangeResult.empty()));
|
|
||||||
// std::cout << "rangeResult.size()=" << rangeResult.size() << std::endl;
|
// std::cout << "rangeResult.size()=" << rangeResult.size() << std::endl;
|
||||||
// In the future, we may be able to do the continuation more efficiently by combining partial results
|
// In the future, we may be able to do the continuation more efficiently by combining partial results
|
||||||
// together and then validate.
|
// together and then validate.
|
||||||
|
@ -200,6 +215,7 @@ struct GetMappedRangeWorkload : ApiWorkload {
|
||||||
KeySelector endSelector,
|
KeySelector endSelector,
|
||||||
Key mapper,
|
Key mapper,
|
||||||
int limit,
|
int limit,
|
||||||
|
int byteLimit,
|
||||||
int expectedBeginId,
|
int expectedBeginId,
|
||||||
GetMappedRangeWorkload* self,
|
GetMappedRangeWorkload* self,
|
||||||
int matchIndex,
|
int matchIndex,
|
||||||
|
@ -207,14 +223,16 @@ struct GetMappedRangeWorkload : ApiWorkload {
|
||||||
|
|
||||||
std::cout << "start scanMappedRangeWithLimits beginSelector:" << beginSelector.toString()
|
std::cout << "start scanMappedRangeWithLimits beginSelector:" << beginSelector.toString()
|
||||||
<< " endSelector:" << endSelector.toString() << " expectedBeginId:" << expectedBeginId
|
<< " endSelector:" << endSelector.toString() << " expectedBeginId:" << expectedBeginId
|
||||||
<< " limit:" << limit << std::endl;
|
<< " limit:" << limit << " byteLimit: " << byteLimit << " recordSize: " << recordSize
|
||||||
|
<< " STRICTLY_ENFORCE_BYTE_LIMIT: " << SERVER_KNOBS->STRICTLY_ENFORCE_BYTE_LIMIT << " allMissing "
|
||||||
|
<< allMissing << std::endl;
|
||||||
loop {
|
loop {
|
||||||
state Reference<TransactionWrapper> tr = self->createTransaction();
|
state Reference<TransactionWrapper> tr = self->createTransaction();
|
||||||
try {
|
try {
|
||||||
MappedRangeResult result = wait(tr->getMappedRange(beginSelector,
|
MappedRangeResult result = wait(tr->getMappedRange(beginSelector,
|
||||||
endSelector,
|
endSelector,
|
||||||
mapper,
|
mapper,
|
||||||
GetRangeLimits(limit),
|
GetRangeLimits(limit, byteLimit),
|
||||||
matchIndex,
|
matchIndex,
|
||||||
self->snapshot,
|
self->snapshot,
|
||||||
Reverse::False));
|
Reverse::False));
|
||||||
|
@ -270,17 +288,51 @@ struct GetMappedRangeWorkload : ApiWorkload {
|
||||||
Key endTuple = Tuple::makeTuple(prefix, INDEX, indexKey(endId)).getDataAsStandalone();
|
Key endTuple = Tuple::makeTuple(prefix, INDEX, indexKey(endId)).getDataAsStandalone();
|
||||||
state KeySelector endSelector = KeySelector(firstGreaterOrEqual(endTuple));
|
state KeySelector endSelector = KeySelector(firstGreaterOrEqual(endTuple));
|
||||||
state int limit = 100;
|
state int limit = 100;
|
||||||
|
state int byteLimit = deterministicRandom()->randomInt(1, 9) * 10000;
|
||||||
state int expectedBeginId = beginId;
|
state int expectedBeginId = beginId;
|
||||||
|
std::cout << "ByteLimit: " << byteLimit << " limit: " << limit
|
||||||
|
<< " FRACTION_INDEX_BYTELIMIT_PREFETCH: " << SERVER_KNOBS->FRACTION_INDEX_BYTELIMIT_PREFETCH
|
||||||
|
<< " MAX_PARALLEL_QUICK_GET_VALUE: " << SERVER_KNOBS->MAX_PARALLEL_QUICK_GET_VALUE << std::endl;
|
||||||
while (true) {
|
while (true) {
|
||||||
MappedRangeResult result = wait(self->scanMappedRangeWithLimits(
|
MappedRangeResult result = wait(self->scanMappedRangeWithLimits(cx,
|
||||||
cx, beginSelector, endSelector, mapper, limit, expectedBeginId, self, matchIndex, allMissing));
|
beginSelector,
|
||||||
|
endSelector,
|
||||||
|
mapper,
|
||||||
|
limit,
|
||||||
|
byteLimit,
|
||||||
|
expectedBeginId,
|
||||||
|
self,
|
||||||
|
matchIndex,
|
||||||
|
allMissing));
|
||||||
expectedBeginId += result.size();
|
expectedBeginId += result.size();
|
||||||
if (result.more) {
|
if (result.more) {
|
||||||
if (result.empty()) {
|
if (result.empty()) {
|
||||||
// This is usually not expected.
|
// This is usually not expected.
|
||||||
std::cout << "not result but have more, try again" << std::endl;
|
std::cout << "not result but have more, try again" << std::endl;
|
||||||
} else {
|
} else {
|
||||||
// auto& reqAndResult = std::get<GetRangeReqAndResultRef>(result.back().reqAndResult);
|
int size = allMissing ? indexSize : (indexSize + recordSize);
|
||||||
|
int expectedCnt = limit;
|
||||||
|
int indexByteLimit = byteLimit * SERVER_KNOBS->FRACTION_INDEX_BYTELIMIT_PREFETCH;
|
||||||
|
int indexCountByteLimit = indexByteLimit / indexSize + (indexByteLimit % indexSize != 0);
|
||||||
|
int indexCount = std::min(limit, indexCountByteLimit);
|
||||||
|
std::cout << "indexCount: " << indexCount << std::endl;
|
||||||
|
// result set cannot be larger than the number of index fetched
|
||||||
|
ASSERT(result.size() <= indexCount);
|
||||||
|
|
||||||
|
expectedCnt = std::min(expectedCnt, indexCount);
|
||||||
|
int boundByRecord;
|
||||||
|
if (SERVER_KNOBS->STRICTLY_ENFORCE_BYTE_LIMIT) {
|
||||||
|
// might have 1 additional entry over the limit
|
||||||
|
boundByRecord = byteLimit / size + (byteLimit % size != 0);
|
||||||
|
} else {
|
||||||
|
// might have 1 additional batch over the limit
|
||||||
|
int roundSize = size * SERVER_KNOBS->MAX_PARALLEL_QUICK_GET_VALUE;
|
||||||
|
int round = byteLimit / roundSize + (byteLimit % roundSize != 0);
|
||||||
|
boundByRecord = round * SERVER_KNOBS->MAX_PARALLEL_QUICK_GET_VALUE;
|
||||||
|
}
|
||||||
|
expectedCnt = std::min(expectedCnt, boundByRecord);
|
||||||
|
std::cout << "boundByRecord: " << boundByRecord << std::endl;
|
||||||
|
ASSERT(result.size() == expectedCnt);
|
||||||
beginSelector = KeySelector(firstGreaterThan(result.back().key));
|
beginSelector = KeySelector(firstGreaterThan(result.back().key));
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
@ -289,6 +341,7 @@ struct GetMappedRangeWorkload : ApiWorkload {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ASSERT(expectedBeginId == endId);
|
ASSERT(expectedBeginId == endId);
|
||||||
|
|
||||||
return Void();
|
return Void();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -433,6 +486,8 @@ struct GetMappedRangeWorkload : ApiWorkload {
|
||||||
} else if (r < 0.75) {
|
} else if (r < 0.75) {
|
||||||
matchIndex = MATCH_INDEX_UNMATCHED_ONLY;
|
matchIndex = MATCH_INDEX_UNMATCHED_ONLY;
|
||||||
}
|
}
|
||||||
|
state bool originalStrictlyEnforeByteLimit = SERVER_KNOBS->STRICTLY_ENFORCE_BYTE_LIMIT;
|
||||||
|
(const_cast<ServerKnobs*> SERVER_KNOBS)->STRICTLY_ENFORCE_BYTE_LIMIT = deterministicRandom()->coinflip();
|
||||||
wait(self->scanMappedRange(cx, 10, 490, mapper, self, matchIndex));
|
wait(self->scanMappedRange(cx, 10, 490, mapper, self, matchIndex));
|
||||||
|
|
||||||
{
|
{
|
||||||
|
@ -440,6 +495,8 @@ struct GetMappedRangeWorkload : ApiWorkload {
|
||||||
wait(self->scanMappedRange(cx, 10, 490, mapper, self, MATCH_INDEX_UNMATCHED_ONLY, true));
|
wait(self->scanMappedRange(cx, 10, 490, mapper, self, MATCH_INDEX_UNMATCHED_ONLY, true));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// reset it to default
|
||||||
|
(const_cast<ServerKnobs*> SERVER_KNOBS)->STRICTLY_ENFORCE_BYTE_LIMIT = originalStrictlyEnforeByteLimit;
|
||||||
return Void();
|
return Void();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -68,7 +68,17 @@ struct SpecialKeySpaceCorrectnessWorkload : TestWorkload {
|
||||||
void getMetrics(std::vector<PerfMetric>& m) override {}
|
void getMetrics(std::vector<PerfMetric>& m) override {}
|
||||||
// disable the default timeout setting
|
// disable the default timeout setting
|
||||||
double getCheckTimeout() const override { return std::numeric_limits<double>::max(); }
|
double getCheckTimeout() const override { return std::numeric_limits<double>::max(); }
|
||||||
void disableFailureInjectionWorkloads(std::set<std::string>& out) const override { out.insert("RandomMoveKeys"); }
|
|
||||||
|
void disableFailureInjectionWorkloads(std::set<std::string>& out) const override {
|
||||||
|
out.insert("RandomMoveKeys");
|
||||||
|
|
||||||
|
// Rollback interferes with the
|
||||||
|
// \xff\xff/worker_interfaces test, since it can
|
||||||
|
// trigger a cluster recvoery, causing the worker
|
||||||
|
// interface for a machine to be updated in the middle
|
||||||
|
// of the test.
|
||||||
|
out.insert("RollbackWorkload");
|
||||||
|
}
|
||||||
|
|
||||||
Future<Void> _setup(Database cx, SpecialKeySpaceCorrectnessWorkload* self) {
|
Future<Void> _setup(Database cx, SpecialKeySpaceCorrectnessWorkload* self) {
|
||||||
cx->specialKeySpace = std::make_unique<SpecialKeySpace>();
|
cx->specialKeySpace = std::make_unique<SpecialKeySpace>();
|
||||||
|
|
|
@ -18,9 +18,10 @@
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "fdbrpc/TenantName.h"
|
|
||||||
#include "fdbclient/ManagementAPI.actor.h"
|
#include "fdbclient/ManagementAPI.actor.h"
|
||||||
#include "fdbclient/SystemData.h"
|
#include "fdbclient/SystemData.h"
|
||||||
|
#include "fdbclient/Tenant.h"
|
||||||
|
#include "fdbclient/TenantManagement.actor.h"
|
||||||
#include "fdbrpc/TenantName.h"
|
#include "fdbrpc/TenantName.h"
|
||||||
#include "fdbserver/Knobs.h"
|
#include "fdbserver/Knobs.h"
|
||||||
#include "fdbserver/workloads/workloads.actor.h"
|
#include "fdbserver/workloads/workloads.actor.h"
|
||||||
|
@ -31,12 +32,16 @@
|
||||||
|
|
||||||
struct StorageQuotaWorkload : TestWorkload {
|
struct StorageQuotaWorkload : TestWorkload {
|
||||||
static constexpr auto NAME = "StorageQuota";
|
static constexpr auto NAME = "StorageQuota";
|
||||||
|
TenantGroupName group;
|
||||||
TenantName tenant;
|
TenantName tenant;
|
||||||
int nodeCount;
|
int nodeCount;
|
||||||
|
TenantName emptyTenant;
|
||||||
|
|
||||||
StorageQuotaWorkload(WorkloadContext const& wcx) : TestWorkload(wcx) {
|
StorageQuotaWorkload(WorkloadContext const& wcx) : TestWorkload(wcx) {
|
||||||
nodeCount = getOption(options, "nodeCount"_sr, 10000);
|
group = getOption(options, "group"_sr, "DefaultGroup"_sr);
|
||||||
tenant = getOption(options, "tenant"_sr, "DefaultTenant"_sr);
|
tenant = getOption(options, "tenant"_sr, "DefaultTenant"_sr);
|
||||||
|
nodeCount = getOption(options, "nodeCount"_sr, 10000);
|
||||||
|
emptyTenant = getOption(options, "emptyTenant"_sr, "DefaultTenant"_sr);
|
||||||
}
|
}
|
||||||
|
|
||||||
Future<Void> setup(Database const& cx) override {
|
Future<Void> setup(Database const& cx) override {
|
||||||
|
@ -67,27 +72,42 @@ struct StorageQuotaWorkload : TestWorkload {
|
||||||
Standalone<KeyValueRef> operator()(int n) { return KeyValueRef(keyForIndex(n), value((n + 1) % nodeCount)); }
|
Standalone<KeyValueRef> operator()(int n) { return KeyValueRef(keyForIndex(n), value((n + 1) % nodeCount)); }
|
||||||
|
|
||||||
ACTOR Future<Void> _start(StorageQuotaWorkload* self, Database cx) {
|
ACTOR Future<Void> _start(StorageQuotaWorkload* self, Database cx) {
|
||||||
// Check that the quota set/get functions work as expected.
|
state TenantMapEntry entry1 = wait(TenantAPI::getTenant(cx.getReference(), self->tenant));
|
||||||
// Set the quota to just below the current size.
|
state TenantMapEntry entry2 = wait(TenantAPI::getTenant(cx.getReference(), self->emptyTenant));
|
||||||
|
ASSERT(entry1.tenantGroup.present() && entry1.tenantGroup.get() == self->group &&
|
||||||
|
entry2.tenantGroup.present() && entry2.tenantGroup.get() == self->group);
|
||||||
|
|
||||||
|
// Get the size of the non-empty tenant. We will set the quota of the tenant group
|
||||||
|
// to just below the current size of this tenant.
|
||||||
state int64_t size = wait(getSize(cx, self->tenant));
|
state int64_t size = wait(getSize(cx, self->tenant));
|
||||||
state int64_t quota = size - 1;
|
state int64_t quota = size - 1;
|
||||||
wait(setStorageQuotaHelper(cx, self->tenant, quota));
|
|
||||||
state Optional<int64_t> quotaRead = wait(getStorageQuotaHelper(cx, self->tenant));
|
// Check that the quota set/get functions work as expected.
|
||||||
|
wait(setStorageQuotaHelper(cx, self->group, quota));
|
||||||
|
state Optional<int64_t> quotaRead = wait(getStorageQuotaHelper(cx, self->group));
|
||||||
ASSERT(quotaRead.present() && quotaRead.get() == quota);
|
ASSERT(quotaRead.present() && quotaRead.get() == quota);
|
||||||
|
|
||||||
if (!SERVER_KNOBS->DD_TENANT_AWARENESS_ENABLED) {
|
if (!SERVER_KNOBS->STORAGE_QUOTA_ENABLED) {
|
||||||
return Void();
|
return Void();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check that writes are rejected when the tenant is over quota.
|
// Check that writes to both the tenants are rejected when the group is over quota.
|
||||||
state bool rejected = wait(tryWrite(self, cx, /*expectOk=*/false));
|
state bool rejected1 = wait(tryWrite(self, cx, self->tenant, /*expectOk=*/false));
|
||||||
ASSERT(rejected);
|
ASSERT(rejected1);
|
||||||
|
state bool rejected2 = wait(tryWrite(self, cx, self->emptyTenant, /*expectOk=*/false));
|
||||||
|
ASSERT(rejected2);
|
||||||
|
|
||||||
// Increase the quota. Check that writes are now able to commit.
|
// Increase the quota or clear the quota. Check that writes to both the tenants are now able to commit.
|
||||||
quota = size * 2;
|
if (deterministicRandom()->coinflip()) {
|
||||||
wait(setStorageQuotaHelper(cx, self->tenant, quota));
|
quota = size * 2;
|
||||||
state bool committed = wait(tryWrite(self, cx, /*expectOk=*/true));
|
wait(setStorageQuotaHelper(cx, self->group, quota));
|
||||||
ASSERT(committed);
|
} else {
|
||||||
|
wait(clearStorageQuotaHelper(cx, self->group));
|
||||||
|
}
|
||||||
|
state bool committed1 = wait(tryWrite(self, cx, self->tenant, /*expectOk=*/true));
|
||||||
|
ASSERT(committed1);
|
||||||
|
state bool committed2 = wait(tryWrite(self, cx, self->emptyTenant, /*expectOk=*/true));
|
||||||
|
ASSERT(committed2);
|
||||||
|
|
||||||
return Void();
|
return Void();
|
||||||
}
|
}
|
||||||
|
@ -115,11 +135,11 @@ struct StorageQuotaWorkload : TestWorkload {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ACTOR static Future<Void> setStorageQuotaHelper(Database cx, TenantName tenantName, int64_t quota) {
|
ACTOR static Future<Void> setStorageQuotaHelper(Database cx, TenantGroupName tenantGroupName, int64_t quota) {
|
||||||
state Transaction tr(cx);
|
state Transaction tr(cx);
|
||||||
loop {
|
loop {
|
||||||
try {
|
try {
|
||||||
setStorageQuota(tr, tenantName, quota);
|
setStorageQuota(tr, tenantGroupName, quota);
|
||||||
wait(tr.commit());
|
wait(tr.commit());
|
||||||
return Void();
|
return Void();
|
||||||
} catch (Error& e) {
|
} catch (Error& e) {
|
||||||
|
@ -128,12 +148,24 @@ struct StorageQuotaWorkload : TestWorkload {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ACTOR static Future<Optional<int64_t>> getStorageQuotaHelper(Database cx, TenantName tenantName) {
|
ACTOR static Future<Void> clearStorageQuotaHelper(Database cx, TenantGroupName tenantGroupName) {
|
||||||
state Transaction tr(cx);
|
state Transaction tr(cx);
|
||||||
loop {
|
loop {
|
||||||
try {
|
try {
|
||||||
state Optional<int64_t> quota = wait(getStorageQuota(&tr, tenantName));
|
clearStorageQuota(tr, tenantGroupName);
|
||||||
wait(tr.commit());
|
wait(tr.commit());
|
||||||
|
return Void();
|
||||||
|
} catch (Error& e) {
|
||||||
|
wait(tr.onError(e));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ACTOR static Future<Optional<int64_t>> getStorageQuotaHelper(Database cx, TenantGroupName tenantGroupName) {
|
||||||
|
state Transaction tr(cx);
|
||||||
|
loop {
|
||||||
|
try {
|
||||||
|
state Optional<int64_t> quota = wait(getStorageQuota(&tr, tenantGroupName));
|
||||||
return quota;
|
return quota;
|
||||||
} catch (Error& e) {
|
} catch (Error& e) {
|
||||||
wait(tr.onError(e));
|
wait(tr.onError(e));
|
||||||
|
@ -141,13 +173,13 @@ struct StorageQuotaWorkload : TestWorkload {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ACTOR static Future<bool> tryWrite(StorageQuotaWorkload* self, Database cx, bool expectOk) {
|
ACTOR static Future<bool> tryWrite(StorageQuotaWorkload* self, Database cx, TenantName tenant, bool expectOk) {
|
||||||
state int i;
|
state int i;
|
||||||
// Retry the transaction a few times if needed; this allows us wait for a while for all
|
// Retry the transaction a few times if needed; this allows us wait for a while for all
|
||||||
// the storage usage and quota related monitors to fetch and propagate the latest information
|
// the storage usage and quota related monitors to fetch and propagate the latest information
|
||||||
// about the tenants that are over storage quota.
|
// about the tenants that are over storage quota.
|
||||||
for (i = 0; i < 10; i++) {
|
for (i = 0; i < 10; i++) {
|
||||||
state Transaction tr(cx, self->tenant);
|
state Transaction tr(cx, tenant);
|
||||||
loop {
|
loop {
|
||||||
try {
|
try {
|
||||||
Standalone<KeyValueRef> kv =
|
Standalone<KeyValueRef> kv =
|
||||||
|
|
|
@ -118,14 +118,14 @@ Arena::Arena(Arena&& r) noexcept = default;
|
||||||
Arena& Arena::operator=(const Arena& r) = default;
|
Arena& Arena::operator=(const Arena& r) = default;
|
||||||
Arena& Arena::operator=(Arena&& r) noexcept = default;
|
Arena& Arena::operator=(Arena&& r) noexcept = default;
|
||||||
void Arena::dependsOn(const Arena& p) {
|
void Arena::dependsOn(const Arena& p) {
|
||||||
if (p.impl) {
|
// x.dependsOn(y) is a no-op if they refer to the same ArenaBlocks.
|
||||||
|
// They will already have the same lifetime.
|
||||||
|
if (p.impl && p.impl.getPtr() != impl.getPtr()) {
|
||||||
allowAccess(impl.getPtr());
|
allowAccess(impl.getPtr());
|
||||||
allowAccess(p.impl.getPtr());
|
allowAccess(p.impl.getPtr());
|
||||||
ArenaBlock::dependOn(impl, p.impl.getPtr());
|
ArenaBlock::dependOn(impl, p.impl.getPtr());
|
||||||
disallowAccess(p.impl.getPtr());
|
disallowAccess(p.impl.getPtr());
|
||||||
if (p.impl.getPtr() != impl.getPtr()) {
|
disallowAccess(impl.getPtr());
|
||||||
disallowAccess(impl.getPtr());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -297,6 +297,7 @@ void* ArenaBlock::make4kAlignedBuffer(uint32_t size) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void ArenaBlock::dependOn(Reference<ArenaBlock>& self, ArenaBlock* other) {
|
void ArenaBlock::dependOn(Reference<ArenaBlock>& self, ArenaBlock* other) {
|
||||||
|
ASSERT(self->getData() != other->getData());
|
||||||
other->addref();
|
other->addref();
|
||||||
if (!self || self->isTiny() || self->unused() < sizeof(ArenaBlockRef))
|
if (!self || self->isTiny() || self->unused() < sizeof(ArenaBlockRef))
|
||||||
create(SMALL, self)->makeReference(other);
|
create(SMALL, self)->makeReference(other);
|
||||||
|
@ -775,6 +776,16 @@ TEST_CASE("/flow/Arena/Size") {
|
||||||
return Void();
|
return Void();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Test that x.dependsOn(x) works, and is effectively a no-op.
|
||||||
|
TEST_CASE("/flow/Arena/SelfRef") {
|
||||||
|
Arena a(4096);
|
||||||
|
|
||||||
|
// This should be a no-op.
|
||||||
|
a.dependsOn(a);
|
||||||
|
|
||||||
|
return Void();
|
||||||
|
}
|
||||||
|
|
||||||
TEST_CASE("flow/StringRef/eat") {
|
TEST_CASE("flow/StringRef/eat") {
|
||||||
StringRef str = "test/case"_sr;
|
StringRef str = "test/case"_sr;
|
||||||
StringRef first = str.eat("/");
|
StringRef first = str.eat("/");
|
||||||
|
@ -815,4 +826,4 @@ TEST_CASE("flow/StringRef/eat") {
|
||||||
ASSERT(str == ""_sr);
|
ASSERT(str == ""_sr);
|
||||||
|
|
||||||
return Void();
|
return Void();
|
||||||
}
|
}
|
||||||
|
|
|
@ -29,21 +29,25 @@
|
||||||
#define PRIORITYMULTILOCK_ACTOR_H
|
#define PRIORITYMULTILOCK_ACTOR_H
|
||||||
|
|
||||||
#include "flow/flow.h"
|
#include "flow/flow.h"
|
||||||
|
#include <boost/intrusive/list.hpp>
|
||||||
#include "flow/actorcompiler.h" // This must be the last #include.
|
#include "flow/actorcompiler.h" // This must be the last #include.
|
||||||
|
|
||||||
#define PRIORITYMULTILOCK_DEBUG 0
|
#define PRIORITYMULTILOCK_DEBUG 0
|
||||||
|
|
||||||
#if PRIORITYMULTILOCK_DEBUG || !defined(NO_INTELLISENSE)
|
#if PRIORITYMULTILOCK_DEBUG || !defined(NO_INTELLISENSE)
|
||||||
#define pml_debug_printf(...) \
|
#define pml_debug_printf(...) \
|
||||||
if (now() > 0) \
|
if (now() > 0) { \
|
||||||
printf(__VA_ARGS__)
|
printf("pml line=%04d ", __LINE__); \
|
||||||
|
printf(__VA_ARGS__); \
|
||||||
|
}
|
||||||
#else
|
#else
|
||||||
#define pml_debug_printf(...)
|
#define pml_debug_printf(...)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// A multi user lock with a concurrent holder limit where waiters request a lock with a priority
|
// A multi user lock with a concurrent holder limit where waiters request a lock with a priority
|
||||||
// id and are granted locks based on a total concurrency and relative weights of the current active
|
// id and are granted locks based on a total concurrency and relative weights of the current active
|
||||||
// priorities. Priority id's must start at 0 and are sequential integers.
|
// priorities. Priority id's must start at 0 and are sequential integers. Priority id numbers
|
||||||
|
// are not related to the importance of the priority in execution.
|
||||||
//
|
//
|
||||||
// Scheduling logic
|
// Scheduling logic
|
||||||
// Let
|
// Let
|
||||||
|
@ -64,17 +68,17 @@
|
||||||
// The interface is similar to FlowMutex except that lock holders can just drop the lock to release it.
|
// The interface is similar to FlowMutex except that lock holders can just drop the lock to release it.
|
||||||
//
|
//
|
||||||
// Usage:
|
// Usage:
|
||||||
// Lock lock = wait(prioritylock.lock(priorityLevel));
|
// Lock lock = wait(prioritylock.lock(priority_id));
|
||||||
// lock.release(); // Explicit release, or
|
// lock.release(); // Explicit release, or
|
||||||
// // let lock and all copies of lock go out of scope to release
|
// // let lock and all copies of lock go out of scope to release
|
||||||
class PriorityMultiLock {
|
class PriorityMultiLock : public ReferenceCounted<PriorityMultiLock> {
|
||||||
|
|
||||||
public:
|
public:
|
||||||
// Waiting on the lock returns a Lock, which is really just a Promise<Void>
|
// Waiting on the lock returns a Lock, which is really just a Promise<Void>
|
||||||
// Calling release() is not necessary, it exists in case the Lock holder wants to explicitly release
|
// Calling release() is not necessary, it exists in case the Lock holder wants to explicitly release
|
||||||
// the Lock before it goes out of scope.
|
// the Lock before it goes out of scope.
|
||||||
struct Lock {
|
struct Lock {
|
||||||
void release() { promise.send(Void()); }
|
void release() { promise.send(Void()); }
|
||||||
|
bool isLocked() const { return promise.canBeSet(); }
|
||||||
|
|
||||||
// This is exposed in case the caller wants to use/copy it directly
|
// This is exposed in case the caller wants to use/copy it directly
|
||||||
Promise<Void> promise;
|
Promise<Void> promise;
|
||||||
|
@ -84,10 +88,11 @@ public:
|
||||||
: PriorityMultiLock(concurrency, parseStringToVector<int>(weights, ',')) {}
|
: PriorityMultiLock(concurrency, parseStringToVector<int>(weights, ',')) {}
|
||||||
|
|
||||||
PriorityMultiLock(int concurrency, std::vector<int> weightsByPriority)
|
PriorityMultiLock(int concurrency, std::vector<int> weightsByPriority)
|
||||||
: concurrency(concurrency), available(concurrency), waiting(0), totalPendingWeights(0), releaseDebugID(0) {
|
: concurrency(concurrency), available(concurrency), waiting(0), totalPendingWeights(0) {
|
||||||
|
|
||||||
priorities.resize(weightsByPriority.size());
|
priorities.resize(weightsByPriority.size());
|
||||||
for (int i = 0; i < priorities.size(); ++i) {
|
for (int i = 0; i < priorities.size(); ++i) {
|
||||||
|
priorities[i].priority = i;
|
||||||
priorities[i].weight = weightsByPriority[i];
|
priorities[i].weight = weightsByPriority[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -102,7 +107,8 @@ public:
|
||||||
|
|
||||||
// If this priority currently has no waiters
|
// If this priority currently has no waiters
|
||||||
if (q.empty()) {
|
if (q.empty()) {
|
||||||
// Add this priority's weight to the total for priorities with pending work
|
// Add this priority's weight to the total for priorities with pending work. This must be done
|
||||||
|
// so that currenctCapacity() below will assign capacaity to this priority.
|
||||||
totalPendingWeights += p.weight;
|
totalPendingWeights += p.weight;
|
||||||
|
|
||||||
// If there are slots available and the priority has capacity then don't make the caller wait
|
// If there are slots available and the priority has capacity then don't make the caller wait
|
||||||
|
@ -114,80 +120,69 @@ public:
|
||||||
Lock lock;
|
Lock lock;
|
||||||
addRunner(lock, &p);
|
addRunner(lock, &p);
|
||||||
|
|
||||||
pml_debug_printf("lock nowait line %d priority %d %s\n", __LINE__, priority, toString().c_str());
|
pml_debug_printf("lock nowait priority %d %s\n", priority, toString().c_str());
|
||||||
return lock;
|
return lock;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If we didn't return above then add the priority to the waitingPriorities list
|
||||||
|
waitingPriorities.push_back(p);
|
||||||
}
|
}
|
||||||
|
|
||||||
Waiter w;
|
Waiter& w = q.emplace_back();
|
||||||
q.push_back(w);
|
|
||||||
++waiting;
|
++waiting;
|
||||||
|
|
||||||
pml_debug_printf("lock wait line %d priority %d %s\n", __LINE__, priority, toString().c_str());
|
pml_debug_printf("lock wait priority %d %s\n", priority, toString().c_str());
|
||||||
return w.lockPromise.getFuture();
|
return w.lockPromise.getFuture();
|
||||||
}
|
}
|
||||||
|
|
||||||
void kill() {
|
void kill() {
|
||||||
|
pml_debug_printf("kill %s\n", toString().c_str());
|
||||||
brokenOnDestruct.reset();
|
brokenOnDestruct.reset();
|
||||||
|
|
||||||
// handleRelease will not free up any execution slots when it ends via cancel
|
// handleRelease will not free up any execution slots when it ends via cancel
|
||||||
fRunner.cancel();
|
fRunner.cancel();
|
||||||
available = 0;
|
available = 0;
|
||||||
runners.clear();
|
|
||||||
|
waitingPriorities.clear();
|
||||||
priorities.clear();
|
priorities.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string toString() const {
|
std::string toString() const {
|
||||||
int runnersDone = 0;
|
std::string s = format("{ ptr=%p concurrency=%d available=%d running=%d waiting=%d "
|
||||||
for (int i = 0; i < runners.size(); ++i) {
|
"pendingWeights=%d ",
|
||||||
if (runners[i].isReady()) {
|
|
||||||
++runnersDone;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string s = format("{ ptr=%p concurrency=%d available=%d running=%d waiting=%d runnersQueue=%d "
|
|
||||||
"runnersDone=%d pendingWeights=%d ",
|
|
||||||
this,
|
this,
|
||||||
concurrency,
|
concurrency,
|
||||||
available,
|
available,
|
||||||
concurrency - available,
|
concurrency - available,
|
||||||
waiting,
|
waiting,
|
||||||
runners.size(),
|
|
||||||
runnersDone,
|
|
||||||
totalPendingWeights);
|
totalPendingWeights);
|
||||||
|
|
||||||
for (int i = 0; i < priorities.size(); ++i) {
|
for (auto& p : priorities) {
|
||||||
s += format("p%d:{%s} ", i, priorities[i].toString(this).c_str());
|
s += format("{%s} ", p.toString(this).c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
s += "}";
|
s += "}";
|
||||||
|
|
||||||
if (concurrency - available != runners.size() - runnersDone) {
|
|
||||||
pml_debug_printf("%s\n", s.c_str());
|
|
||||||
ASSERT_EQ(concurrency - available, runners.size() - runnersDone);
|
|
||||||
}
|
|
||||||
|
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
int maxPriority() const { return priorities.size() - 1; }
|
int maxPriority() const { return priorities.size() - 1; }
|
||||||
|
|
||||||
int totalWaiters() const { return waiting; }
|
int getRunnersCount() const { return concurrency - available; }
|
||||||
|
int getWaitersCount() const { return waiting; }
|
||||||
|
|
||||||
int numWaiters(const unsigned int priority) const {
|
int getWaitersCount(const unsigned int priority) const {
|
||||||
ASSERT(priority < priorities.size());
|
ASSERT(priority < priorities.size());
|
||||||
return priorities[priority].queue.size();
|
return priorities[priority].queue.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
int totalRunners() const { return concurrency - available; }
|
int getRunnersCount(const unsigned int priority) const {
|
||||||
|
|
||||||
int numRunners(const unsigned int priority) const {
|
|
||||||
ASSERT(priority < priorities.size());
|
ASSERT(priority < priorities.size());
|
||||||
return priorities[priority].runners;
|
return priorities[priority].runners;
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
struct Waiter {
|
struct Waiter {
|
||||||
Waiter() {}
|
|
||||||
Promise<Lock> lockPromise;
|
Promise<Lock> lockPromise;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -202,8 +197,8 @@ private:
|
||||||
|
|
||||||
typedef Deque<Waiter> Queue;
|
typedef Deque<Waiter> Queue;
|
||||||
|
|
||||||
struct Priority {
|
struct Priority : boost::intrusive::list_base_hook<> {
|
||||||
Priority() : runners(0), weight(0) {}
|
Priority() : runners(0), weight(0), priority(-1) {}
|
||||||
|
|
||||||
// Queue of waiters at this priority
|
// Queue of waiters at this priority
|
||||||
Queue queue;
|
Queue queue;
|
||||||
|
@ -211,9 +206,12 @@ private:
|
||||||
int runners;
|
int runners;
|
||||||
// Configured weight for this priority
|
// Configured weight for this priority
|
||||||
int weight;
|
int weight;
|
||||||
|
// Priority number for convenience, matches *this's index in PML priorities vector
|
||||||
|
int priority;
|
||||||
|
|
||||||
std::string toString(const PriorityMultiLock* pml) const {
|
std::string toString(const PriorityMultiLock* pml) const {
|
||||||
return format("weight=%d run=%d wait=%d cap=%d",
|
return format("priority=%d weight=%d run=%d wait=%d cap=%d",
|
||||||
|
priority,
|
||||||
weight,
|
weight,
|
||||||
runners,
|
runners,
|
||||||
queue.size(),
|
queue.size(),
|
||||||
|
@ -222,51 +220,41 @@ private:
|
||||||
};
|
};
|
||||||
|
|
||||||
std::vector<Priority> priorities;
|
std::vector<Priority> priorities;
|
||||||
|
typedef boost::intrusive::list<Priority, boost::intrusive::constant_time_size<false>> WaitingPrioritiesList;
|
||||||
|
|
||||||
// Current or recent (ended) runners
|
// List of all priorities with 1 or more waiters. This list exists so that the scheduling loop
|
||||||
Deque<Future<Void>> runners;
|
// does not have to iterage over the priorities vector checking priorities without waiters.
|
||||||
|
WaitingPrioritiesList waitingPriorities;
|
||||||
|
|
||||||
Future<Void> fRunner;
|
Future<Void> fRunner;
|
||||||
AsyncTrigger wakeRunner;
|
AsyncTrigger wakeRunner;
|
||||||
Promise<Void> brokenOnDestruct;
|
Promise<Void> brokenOnDestruct;
|
||||||
|
|
||||||
// Used for debugging, can roll over without issue
|
ACTOR static void handleRelease(Reference<PriorityMultiLock> self, Priority* priority, Future<Void> holder) {
|
||||||
unsigned int releaseDebugID;
|
pml_debug_printf("%f handleRelease self=%p start\n", now(), self.getPtr());
|
||||||
|
|
||||||
ACTOR static Future<Void> handleRelease(PriorityMultiLock* self, Future<Void> f, Priority* priority) {
|
|
||||||
state [[maybe_unused]] unsigned int id = self->releaseDebugID++;
|
|
||||||
|
|
||||||
pml_debug_printf("%f handleRelease self=%p id=%u start \n", now(), self, id);
|
|
||||||
try {
|
try {
|
||||||
wait(f);
|
wait(holder);
|
||||||
pml_debug_printf("%f handleRelease self=%p id=%u success\n", now(), self, id);
|
pml_debug_printf("%f handleRelease self=%p success\n", now(), self.getPtr());
|
||||||
} catch (Error& e) {
|
} catch (Error& e) {
|
||||||
pml_debug_printf("%f handleRelease self=%p id=%u error %s\n", now(), self, id, e.what());
|
pml_debug_printf("%f handleRelease self=%p error %s\n", now(), self.getPtr(), e.what());
|
||||||
if (e.code() == error_code_actor_cancelled) {
|
|
||||||
throw;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pml_debug_printf("lock release line %d priority %d %s\n",
|
pml_debug_printf("lock release priority %d %s\n", (int)(priority->priority), self->toString().c_str());
|
||||||
__LINE__,
|
|
||||||
(int)(priority - &self->priorities.front()),
|
|
||||||
self->toString().c_str());
|
|
||||||
|
|
||||||
pml_debug_printf("%f handleRelease self=%p id=%u releasing\n", now(), self, id);
|
pml_debug_printf("%f handleRelease self=%p releasing\n", now(), self.getPtr());
|
||||||
++self->available;
|
++self->available;
|
||||||
priority->runners -= 1;
|
priority->runners -= 1;
|
||||||
|
|
||||||
// If there are any waiters or if the runners array is getting large, trigger the runner loop
|
// If there are any waiters or if the runners array is getting large, trigger the runner loop
|
||||||
if (self->waiting > 0 || self->runners.size() > 1000) {
|
if (self->waiting > 0) {
|
||||||
self->wakeRunner.trigger();
|
self->wakeRunner.trigger();
|
||||||
}
|
}
|
||||||
return Void();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void addRunner(Lock& lock, Priority* p) {
|
void addRunner(Lock& lock, Priority* priority) {
|
||||||
p->runners += 1;
|
priority->runners += 1;
|
||||||
--available;
|
--available;
|
||||||
runners.push_back(handleRelease(this, lock.promise.getFuture(), p));
|
handleRelease(Reference<PriorityMultiLock>::addRef(this), priority, lock.promise.getFuture());
|
||||||
}
|
}
|
||||||
|
|
||||||
// Current maximum running tasks for the specified priority, which must have waiters
|
// Current maximum running tasks for the specified priority, which must have waiters
|
||||||
|
@ -278,76 +266,50 @@ private:
|
||||||
}
|
}
|
||||||
|
|
||||||
ACTOR static Future<Void> runner(PriorityMultiLock* self) {
|
ACTOR static Future<Void> runner(PriorityMultiLock* self) {
|
||||||
state int sinceYield = 0;
|
|
||||||
state Future<Void> error = self->brokenOnDestruct.getFuture();
|
state Future<Void> error = self->brokenOnDestruct.getFuture();
|
||||||
|
|
||||||
// Priority to try to run tasks from next
|
// Priority to try to run tasks from next
|
||||||
state int priority = 0;
|
state WaitingPrioritiesList::iterator p = self->waitingPriorities.end();
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
pml_debug_printf(
|
pml_debug_printf("runner loop start priority=%d %s\n", p->priority, self->toString().c_str());
|
||||||
"runner loop start line %d priority=%d %s\n", __LINE__, priority, self->toString().c_str());
|
|
||||||
|
|
||||||
// Cleanup finished runner futures at the front of the runner queue.
|
|
||||||
while (!self->runners.empty() && self->runners.front().isReady()) {
|
|
||||||
self->runners.pop_front();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Wait for a runner to release its lock
|
// Wait for a runner to release its lock
|
||||||
pml_debug_printf(
|
pml_debug_printf("runner loop waitTrigger priority=%d %s\n", p->priority, self->toString().c_str());
|
||||||
"runner loop waitTrigger line %d priority=%d %s\n", __LINE__, priority, self->toString().c_str());
|
|
||||||
wait(self->wakeRunner.onTrigger());
|
wait(self->wakeRunner.onTrigger());
|
||||||
pml_debug_printf(
|
pml_debug_printf("%f runner loop wake priority=%d %s\n", now(), p->priority, self->toString().c_str());
|
||||||
"%f runner loop wake line %d priority=%d %s\n", now(), __LINE__, priority, self->toString().c_str());
|
|
||||||
|
|
||||||
if (++sinceYield == 100) {
|
|
||||||
sinceYield = 0;
|
|
||||||
pml_debug_printf(
|
|
||||||
" runner waitDelay line %d priority=%d %s\n", __LINE__, priority, self->toString().c_str());
|
|
||||||
wait(delay(0));
|
|
||||||
pml_debug_printf(
|
|
||||||
" runner afterDelay line %d priority=%d %s\n", __LINE__, priority, self->toString().c_str());
|
|
||||||
}
|
|
||||||
|
|
||||||
// While there are available slots and there are waiters, launch tasks
|
// While there are available slots and there are waiters, launch tasks
|
||||||
while (self->available > 0 && self->waiting > 0) {
|
while (self->available > 0 && self->waiting > 0) {
|
||||||
pml_debug_printf(
|
pml_debug_printf(" launch loop start priority=%d %s\n", p->priority, self->toString().c_str());
|
||||||
" launch loop start line %d priority=%d %s\n", __LINE__, priority, self->toString().c_str());
|
|
||||||
|
|
||||||
Priority* pPriority;
|
|
||||||
|
|
||||||
// Find the next priority with waiters and capacity. There must be at least one.
|
// Find the next priority with waiters and capacity. There must be at least one.
|
||||||
loop {
|
loop {
|
||||||
// Rotate to next priority
|
if (p == self->waitingPriorities.end()) {
|
||||||
if (++priority == self->priorities.size()) {
|
p = self->waitingPriorities.begin();
|
||||||
priority = 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pPriority = &self->priorities[priority];
|
pml_debug_printf(" launch loop scan priority=%d %s\n", p->priority, self->toString().c_str());
|
||||||
|
|
||||||
pml_debug_printf(" launch loop scan line %d priority=%d %s\n",
|
if (!p->queue.empty() && p->runners < self->currentCapacity(p->weight)) {
|
||||||
__LINE__,
|
|
||||||
priority,
|
|
||||||
self->toString().c_str());
|
|
||||||
|
|
||||||
if (!pPriority->queue.empty() && pPriority->runners < self->currentCapacity(pPriority->weight)) {
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
++p;
|
||||||
}
|
}
|
||||||
|
|
||||||
Queue& queue = pPriority->queue;
|
Queue& queue = p->queue;
|
||||||
|
|
||||||
Waiter w = queue.front();
|
Waiter w = queue.front();
|
||||||
queue.pop_front();
|
queue.pop_front();
|
||||||
|
|
||||||
// If this priority is now empty, subtract its weight from the total pending weights
|
// If this priority is now empty, subtract its weight from the total pending weights an remove it
|
||||||
|
// from the waitingPriorities list
|
||||||
|
Priority* pPriority = &*p;
|
||||||
if (queue.empty()) {
|
if (queue.empty()) {
|
||||||
|
p = self->waitingPriorities.erase(p);
|
||||||
self->totalPendingWeights -= pPriority->weight;
|
self->totalPendingWeights -= pPriority->weight;
|
||||||
|
|
||||||
pml_debug_printf(" emptied priority line %d priority=%d %s\n",
|
pml_debug_printf(
|
||||||
__LINE__,
|
" emptied priority priority=%d %s\n", pPriority->priority, self->toString().c_str());
|
||||||
priority,
|
|
||||||
self->toString().c_str());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
--self->waiting;
|
--self->waiting;
|
||||||
|
@ -365,10 +327,9 @@ private:
|
||||||
self->addRunner(lock, pPriority);
|
self->addRunner(lock, pPriority);
|
||||||
}
|
}
|
||||||
|
|
||||||
pml_debug_printf(" launched line %d alreadyDone=%d priority=%d %s\n",
|
pml_debug_printf(" launched alreadyDone=%d priority=%d %s\n",
|
||||||
__LINE__,
|
|
||||||
!lock.promise.canBeSet(),
|
!lock.promise.canBeSet(),
|
||||||
priority,
|
pPriority->priority,
|
||||||
self->toString().c_str());
|
self->toString().c_str());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,180 @@
|
||||||
|
/*
|
||||||
|
* BenchBlobDeltaFiles.cpp
|
||||||
|
*
|
||||||
|
* This source file is part of the FoundationDB open source project
|
||||||
|
*
|
||||||
|
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "benchmark/benchmark.h"
|
||||||
|
#include "fdbclient/FDBTypes.h"
|
||||||
|
#include "fdbclient/SystemData.h"
|
||||||
|
#include "flow/IRandom.h"
|
||||||
|
#include "flow/DeterministicRandom.h"
|
||||||
|
|
||||||
|
#include "fdbclient/BlobGranuleFiles.h"
|
||||||
|
#include "flow/flow.h"
|
||||||
|
#include <cstdlib>
|
||||||
|
#include <stdexcept>
|
||||||
|
|
||||||
|
// Pre-generated GranuleDelta size in bytes for benchmark.
|
||||||
|
const static int PRE_GEN_TARGET_BYTES[] = { 128 * 1024, 512 * 1024, 1024 * 1024 };
|
||||||
|
|
||||||
|
// Generate GranuleDelta using a deterministic way. Change the seed if you would test a new data set
|
||||||
|
class DeltaGenerator {
|
||||||
|
public:
|
||||||
|
DeltaGenerator(uint32_t seed = 12345678) {
|
||||||
|
randGen = Reference<IRandom>(new DeterministicRandom(seed));
|
||||||
|
// Generate key range
|
||||||
|
prefix = StringRef(ar, randGen->randomUniqueID().toString() + "_");
|
||||||
|
range = KeyRangeRef(prefix, StringRef(ar, strinc(prefix)));
|
||||||
|
// Generate version jump size
|
||||||
|
minVersionJump = randGen->randomExp(0, 25);
|
||||||
|
maxVersionJump = minVersionJump + randGen->randomExp(0, 25);
|
||||||
|
// Generate value size range
|
||||||
|
maxValueSize = randGen->randomExp(7, 9);
|
||||||
|
// Generate start version
|
||||||
|
version = randGen->randomUInt32();
|
||||||
|
// Generate probabilty of update existing keys
|
||||||
|
updateExistingKeysProb = randGen->random01();
|
||||||
|
// Generate deltas
|
||||||
|
for (auto i : PRE_GEN_TARGET_BYTES) {
|
||||||
|
genDeltas(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt::print("key range: {} - {}\n", range.begin.printable(), range.end.printable());
|
||||||
|
fmt::print("start version: {}\n", version);
|
||||||
|
fmt::print("max value bytes: {}\n", maxValueSize);
|
||||||
|
fmt::print("version jump range: {} - {}\n", minVersionJump, maxVersionJump);
|
||||||
|
fmt::print("probability for update: {}\n", updateExistingKeysProb);
|
||||||
|
fmt::print("unseed: {}\n", randGen->randomUInt32());
|
||||||
|
}
|
||||||
|
|
||||||
|
KeyRange getRange() { return range; }
|
||||||
|
|
||||||
|
Standalone<GranuleDeltas> getDelta(int targetBytes) {
|
||||||
|
if (deltas.find(targetBytes) != deltas.end()) {
|
||||||
|
return deltas[targetBytes];
|
||||||
|
}
|
||||||
|
throw std::invalid_argument("Test delta file size is not pre-generated!");
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
void genDeltas(int targetBytes) {
|
||||||
|
Standalone<GranuleDeltas> data;
|
||||||
|
int totalDataBytes = 0;
|
||||||
|
while (totalDataBytes < targetBytes) {
|
||||||
|
data.push_back(ar, newDelta());
|
||||||
|
totalDataBytes += data.back().expectedSize();
|
||||||
|
}
|
||||||
|
deltas[targetBytes] = data;
|
||||||
|
}
|
||||||
|
|
||||||
|
MutationRef newMutation() { return MutationRef(ar, MutationRef::SetValue, key(), value()); }
|
||||||
|
|
||||||
|
MutationsAndVersionRef newDelta() {
|
||||||
|
version += randGen->randomInt(minVersionJump, maxVersionJump);
|
||||||
|
MutationsAndVersionRef ret(version, version);
|
||||||
|
for (int i = 0; i < 10; i++) {
|
||||||
|
ret.mutations.push_back_deep(ar, newMutation());
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
StringRef key() {
|
||||||
|
// Pick an existing key
|
||||||
|
if (randGen->random01() < updateExistingKeysProb && !usedKeys.empty()) {
|
||||||
|
int r = randGen->randomUInt32() % usedKeys.size();
|
||||||
|
auto it = usedKeys.begin();
|
||||||
|
for (; r != 0; r--)
|
||||||
|
it++;
|
||||||
|
return StringRef(ar, *it);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a new key
|
||||||
|
std::string key = prefix.toString() + randGen->randomUniqueID().toString();
|
||||||
|
usedKeys.insert(key);
|
||||||
|
return StringRef(ar, key);
|
||||||
|
}
|
||||||
|
|
||||||
|
StringRef value() {
|
||||||
|
int valueSize = randGen->randomInt(maxValueSize / 2, maxValueSize * 3 / 2);
|
||||||
|
std::string value = randGen->randomUniqueID().toString();
|
||||||
|
if (value.size() > valueSize) {
|
||||||
|
value = value.substr(0, valueSize);
|
||||||
|
}
|
||||||
|
if (value.size() < valueSize) {
|
||||||
|
// repeated string so it's compressible
|
||||||
|
value += std::string(valueSize - value.size(), 'x');
|
||||||
|
}
|
||||||
|
return StringRef(ar, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
Reference<IRandom> randGen;
|
||||||
|
Arena ar;
|
||||||
|
KeyRangeRef range;
|
||||||
|
Key prefix;
|
||||||
|
int maxValueSize;
|
||||||
|
Version version;
|
||||||
|
int minVersionJump;
|
||||||
|
int maxVersionJump;
|
||||||
|
std::set<std::string> usedKeys;
|
||||||
|
double updateExistingKeysProb;
|
||||||
|
std::map<int, Standalone<GranuleDeltas>> deltas;
|
||||||
|
};
|
||||||
|
|
||||||
|
static DeltaGenerator deltaGen; // Pre-generate deltas
|
||||||
|
|
||||||
|
// Benchmark serialization without compression/encryption. The main CPU cost should be sortDeltasByKey
|
||||||
|
static void bench_serialize_deltas(benchmark::State& state) {
|
||||||
|
int targetBytes = state.range(0);
|
||||||
|
int chunkSize = state.range(1);
|
||||||
|
|
||||||
|
Standalone<GranuleDeltas> delta = deltaGen.getDelta(targetBytes);
|
||||||
|
KeyRange range = deltaGen.getRange();
|
||||||
|
|
||||||
|
Standalone<StringRef> fileName = "testdelta"_sr; // unused
|
||||||
|
Optional<CompressionFilter> compressFilter; // unused. no compression
|
||||||
|
Optional<BlobGranuleCipherKeysCtx> cipherKeysCtx; // unused. no encryption
|
||||||
|
|
||||||
|
uint32_t serializedBytes = 0;
|
||||||
|
for (auto _ : state) {
|
||||||
|
Value serialized = serializeChunkedDeltaFile(fileName, delta, range, chunkSize, compressFilter, cipherKeysCtx);
|
||||||
|
serializedBytes += serialized.size();
|
||||||
|
}
|
||||||
|
state.SetBytesProcessed(static_cast<long>(state.iterations()) * targetBytes);
|
||||||
|
state.counters["serialized_bytes"] = serializedBytes;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Benchmark sorting deltas
|
||||||
|
static void bench_sort_deltas(benchmark::State& state) {
|
||||||
|
int targetBytes = state.range(0);
|
||||||
|
Standalone<GranuleDeltas> delta = deltaGen.getDelta(targetBytes);
|
||||||
|
KeyRange range = deltaGen.getRange();
|
||||||
|
|
||||||
|
for (auto _ : state) {
|
||||||
|
sortDeltasByKey(delta, range);
|
||||||
|
}
|
||||||
|
state.SetBytesProcessed(static_cast<long>(state.iterations()) * targetBytes);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Benchmark serialization for granule deltas 128KB, 512KB and 1024KB. Chunk size 32KB
|
||||||
|
BENCHMARK(bench_serialize_deltas)
|
||||||
|
->Args({ 128 * 1024, 32 * 1024 })
|
||||||
|
->Args({ 512 * 1024, 32 * 1024 })
|
||||||
|
->Args({ 1024 * 1024, 32 * 1024 });
|
||||||
|
|
||||||
|
// Benchmark sorting for granule deltas 128KB, 512KB and 1024KB. Chunk size 32KB
|
||||||
|
BENCHMARK(bench_sort_deltas)->Args({ 128 * 1024 })->Args({ 512 * 1024 })->Args({ 1024 * 1024 });
|
|
@ -25,26 +25,28 @@
|
||||||
#include "flow/PriorityMultiLock.actor.h"
|
#include "flow/PriorityMultiLock.actor.h"
|
||||||
#include <deque>
|
#include <deque>
|
||||||
#include "flow/actorcompiler.h" // This must be the last #include.
|
#include "flow/actorcompiler.h" // This must be the last #include.
|
||||||
|
#include "fmt/printf.h"
|
||||||
|
|
||||||
ACTOR static Future<Void> benchPriorityMultiLock(benchmark::State* benchState) {
|
ACTOR static Future<Void> benchPriorityMultiLock(benchmark::State* benchState) {
|
||||||
state std::vector<int> priorities;
|
// Arg1 is the number of active priorities to use
|
||||||
|
// Arg2 is the number of inactive priorities to use
|
||||||
|
state int active = benchState->range(0);
|
||||||
|
state int inactive = benchState->range(1);
|
||||||
|
|
||||||
// Set up priority list with limits 10, 20, 30, ...
|
// Set up priority list with limits 10, 20, 30, ...
|
||||||
while (priorities.size() < benchState->range(0)) {
|
state std::vector<int> priorities;
|
||||||
|
while (priorities.size() < active + inactive) {
|
||||||
priorities.push_back(10 * (priorities.size() + 1));
|
priorities.push_back(10 * (priorities.size() + 1));
|
||||||
}
|
}
|
||||||
|
|
||||||
state int concurrency = priorities.size() * 10;
|
state int concurrency = priorities.size() * 10;
|
||||||
state PriorityMultiLock* pml = new PriorityMultiLock(concurrency, priorities);
|
state Reference<PriorityMultiLock> pml = makeReference<PriorityMultiLock>(concurrency, priorities);
|
||||||
state std::vector<int> counts;
|
|
||||||
counts.resize(priorities.size(), 0);
|
|
||||||
|
|
||||||
// Clog the lock buy taking concurrency locks
|
// Clog the lock buy taking n=concurrency locks
|
||||||
state std::deque<Future<PriorityMultiLock::Lock>> lockFutures;
|
state std::deque<Future<PriorityMultiLock::Lock>> lockFutures;
|
||||||
for (int j = 0; j < concurrency; ++j) {
|
for (int j = 0; j < concurrency; ++j) {
|
||||||
lockFutures.push_back(pml->lock(j % priorities.size()));
|
lockFutures.push_back(pml->lock(j % active));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Wait for all of the initial locks to be taken
|
// Wait for all of the initial locks to be taken
|
||||||
// This will work regardless of their priorities as there are only n = concurrency of them
|
// This will work regardless of their priorities as there are only n = concurrency of them
|
||||||
wait(waitForAll(std::vector<Future<PriorityMultiLock::Lock>>(lockFutures.begin(), lockFutures.end())));
|
wait(waitForAll(std::vector<Future<PriorityMultiLock::Lock>>(lockFutures.begin(), lockFutures.end())));
|
||||||
|
@ -64,7 +66,7 @@ ACTOR static Future<Void> benchPriorityMultiLock(benchmark::State* benchState) {
|
||||||
PriorityMultiLock::Lock lock = wait(f);
|
PriorityMultiLock::Lock lock = wait(f);
|
||||||
|
|
||||||
// Rotate to another priority
|
// Rotate to another priority
|
||||||
if (++p == priorities.size()) {
|
if (++p == active) {
|
||||||
p = 0;
|
p = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -76,7 +78,6 @@ ACTOR static Future<Void> benchPriorityMultiLock(benchmark::State* benchState) {
|
||||||
|
|
||||||
benchState->SetItemsProcessed(static_cast<long>(benchState->iterations()));
|
benchState->SetItemsProcessed(static_cast<long>(benchState->iterations()));
|
||||||
|
|
||||||
delete pml;
|
|
||||||
return Void();
|
return Void();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -84,4 +85,4 @@ static void bench_priorityMultiLock(benchmark::State& benchState) {
|
||||||
onMainThread([&benchState]() { return benchPriorityMultiLock(&benchState); }).blockUntilReady();
|
onMainThread([&benchState]() { return benchPriorityMultiLock(&benchState); }).blockUntilReady();
|
||||||
}
|
}
|
||||||
|
|
||||||
BENCHMARK(bench_priorityMultiLock)->DenseRange(1, 8)->ReportAggregatesOnly(true);
|
BENCHMARK(bench_priorityMultiLock)->Args({ 5, 0 })->Ranges({ { 1, 64 }, { 0, 128 } })->ReportAggregatesOnly(true);
|
||||||
|
|
|
@ -334,9 +334,6 @@ logdir = {logdir}
|
||||||
db_config += " blob_granules_enabled:=1"
|
db_config += " blob_granules_enabled:=1"
|
||||||
self.fdbcli_exec(db_config)
|
self.fdbcli_exec(db_config)
|
||||||
|
|
||||||
if self.blob_granules_enabled:
|
|
||||||
self.fdbcli_exec("blobrange start \\x00 \\xff")
|
|
||||||
|
|
||||||
# Generate and install test certificate chains and keys
|
# Generate and install test certificate chains and keys
|
||||||
def create_tls_cert(self):
|
def create_tls_cert(self):
|
||||||
assert self.tls_config is not None, "TLS not enabled"
|
assert self.tls_config is not None, "TLS not enabled"
|
||||||
|
|
|
@ -6,6 +6,7 @@ enable_encryption = true
|
||||||
enable_tlog_encryption = true
|
enable_tlog_encryption = true
|
||||||
enable_storage_server_encryption = false
|
enable_storage_server_encryption = false
|
||||||
enable_blob_granule_encryption = true
|
enable_blob_granule_encryption = true
|
||||||
|
max_write_transaction_life_versions = 5000000
|
||||||
|
|
||||||
[[test]]
|
[[test]]
|
||||||
testTitle = 'EncryptedBackupAndRestore'
|
testTitle = 'EncryptedBackupAndRestore'
|
||||||
|
|
|
@ -8,20 +8,36 @@ testTitle = 'TenantCreation'
|
||||||
[[test.workload]]
|
[[test.workload]]
|
||||||
testName = 'CreateTenant'
|
testName = 'CreateTenant'
|
||||||
name = 'First'
|
name = 'First'
|
||||||
|
group = 'GroupA'
|
||||||
|
|
||||||
[[test.workload]]
|
[[test.workload]]
|
||||||
testName = 'CreateTenant'
|
testName = 'CreateTenant'
|
||||||
name = 'Second'
|
name = 'Second'
|
||||||
|
group = 'GroupA'
|
||||||
|
|
||||||
|
[[test.workload]]
|
||||||
|
testName = 'CreateTenant'
|
||||||
|
name = 'Third'
|
||||||
|
group = 'GroupB'
|
||||||
|
|
||||||
|
[[test.workload]]
|
||||||
|
testName = 'CreateTenant'
|
||||||
|
name = 'Fourth'
|
||||||
|
group = 'GroupB'
|
||||||
|
|
||||||
[[test]]
|
[[test]]
|
||||||
testTitle = 'StorageQuota'
|
testTitle = 'StorageQuota'
|
||||||
|
|
||||||
[[test.workload]]
|
[[test.workload]]
|
||||||
testName = 'StorageQuota'
|
testName = 'StorageQuota'
|
||||||
|
group = 'GroupA'
|
||||||
tenant = 'First'
|
tenant = 'First'
|
||||||
nodeCount = 250000
|
nodeCount = 250000
|
||||||
|
emptyTenant = 'Second'
|
||||||
|
|
||||||
[[test.workload]]
|
[[test.workload]]
|
||||||
testName = 'StorageQuota'
|
testName = 'StorageQuota'
|
||||||
tenant = 'Second'
|
group = 'GroupB'
|
||||||
|
tenant = 'Third'
|
||||||
nodeCount = 25000
|
nodeCount = 25000
|
||||||
|
emptyTenant = 'Fourth'
|
||||||
|
|
Loading…
Reference in New Issue