adding logic to disable splitting within a truncated tuple, and validating it in test (#10106)
This commit is contained in:
parent
7e872c4a59
commit
22155c84f4
|
@ -1085,6 +1085,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
|
|||
init( BG_CONSISTENCY_CHECK_ENABLED, true ); if (randomize && BUGGIFY) BG_CONSISTENCY_CHECK_ENABLED = false;
|
||||
init( BG_CONSISTENCY_CHECK_TARGET_SPEED_KB, 1000 ); if (randomize && BUGGIFY) BG_CONSISTENCY_CHECK_TARGET_SPEED_KB *= (deterministicRandom()->randomInt(2, 50) / 10);
|
||||
init( BG_KEY_TUPLE_TRUNCATE_OFFSET, 0 );
|
||||
init( BG_ENABLE_SPLIT_TRUNCATED, false ); if (randomize && BUGGIFY) BG_ENABLE_SPLIT_TRUNCATED = true;
|
||||
init( BG_ENABLE_READ_DRIVEN_COMPACTION, true ); if (randomize && BUGGIFY) BG_ENABLE_READ_DRIVEN_COMPACTION = false;
|
||||
init( BG_RDC_BYTES_FACTOR, 2 ); if (randomize && BUGGIFY) BG_RDC_BYTES_FACTOR = deterministicRandom()->randomInt(1, 10);
|
||||
init( BG_RDC_READ_FACTOR, 3 ); if (randomize && BUGGIFY) BG_RDC_READ_FACTOR = deterministicRandom()->randomInt(1, 10);
|
||||
|
|
|
@ -1092,6 +1092,7 @@ public:
|
|||
int BG_MERGE_CANDIDATE_THRESHOLD_SECONDS;
|
||||
int BG_MERGE_CANDIDATE_DELAY_SECONDS;
|
||||
int BG_KEY_TUPLE_TRUNCATE_OFFSET;
|
||||
bool BG_ENABLE_SPLIT_TRUNCATED;
|
||||
bool BG_ENABLE_READ_DRIVEN_COMPACTION;
|
||||
int BG_RDC_BYTES_FACTOR;
|
||||
int BG_RDC_READ_FACTOR;
|
||||
|
|
|
@ -578,12 +578,14 @@ static void alignKeyBoundary(Reference<BlobManagerData> bmData,
|
|||
alignedKey = alignedKey.withPrefix(tenantData->entry.prefix, keys.arena());
|
||||
}
|
||||
|
||||
// Only add the alignedKey if it's larger than the last key. If it's the same, drop the split.
|
||||
// Only add the alignedKey if it's larger than the last key. If it's the same, drop the split if not allowed.
|
||||
if (alignedKey <= keys.back()) {
|
||||
// Set split boundary.
|
||||
BlobGranuleMergeBoundary boundary = { /*buddy=*/true };
|
||||
boundaries[key] = boundary;
|
||||
keys.push_back_deep(keys.arena(), key);
|
||||
if (SERVER_KNOBS->BG_ENABLE_SPLIT_TRUNCATED) {
|
||||
// Set split boundary.
|
||||
BlobGranuleMergeBoundary boundary = { /*buddy=*/true };
|
||||
boundaries[key] = boundary;
|
||||
keys.push_back_deep(keys.arena(), key);
|
||||
} // else drop the split
|
||||
} else {
|
||||
keys.push_back_deep(keys.arena(), alignedKey);
|
||||
}
|
||||
|
@ -1596,8 +1598,6 @@ ACTOR Future<Void> reevaluateInitialSplit(Reference<BlobManagerData> bmData,
|
|||
// FIXME: only need to align propsedSplitKey in the middle
|
||||
state BlobGranuleSplitPoints finalSplit = wait(alignKeys(bmData, granuleRange, newRanges));
|
||||
|
||||
ASSERT(finalSplit.keys.size() > 2);
|
||||
|
||||
if (BM_DEBUG) {
|
||||
fmt::print("Aligned split ({0}):\n", finalSplit.keys.size());
|
||||
for (auto& it : finalSplit.keys) {
|
||||
|
@ -1605,6 +1605,8 @@ ACTOR Future<Void> reevaluateInitialSplit(Reference<BlobManagerData> bmData,
|
|||
}
|
||||
}
|
||||
|
||||
ASSERT(finalSplit.keys.size() > 2);
|
||||
|
||||
// Check lock to see if lock is still the specified epoch and seqno, and there are no files for the granule.
|
||||
// If either of these are false, some other worker now has the granule. if there are files, it already succeeded at
|
||||
// a split. if not, and it fails too, it will retry and get back here
|
||||
|
|
|
@ -941,13 +941,15 @@ ACTOR Future<BlobFileIndex> writeSnapshot(Reference<BlobWorkerData> bwData,
|
|||
state std::string fileName = randomBGFilename(bwData->id, granuleID, version, ".snapshot");
|
||||
state Standalone<GranuleSnapshot> snapshot;
|
||||
state int64_t bytesRead = 0;
|
||||
state bool canStopEarly =
|
||||
(SERVER_KNOBS->BG_KEY_TUPLE_TRUNCATE_OFFSET == 0 || SERVER_KNOBS->BG_ENABLE_SPLIT_TRUNCATED);
|
||||
state bool injectTooBig = initialSnapshot && g_network->isSimulated() && BUGGIFY_WITH_PROB(0.1);
|
||||
|
||||
wait(delay(0, TaskPriority::BlobWorkerUpdateStorage));
|
||||
|
||||
loop {
|
||||
try {
|
||||
if (initialSnapshot && snapshot.size() > 1 &&
|
||||
if (initialSnapshot && snapshot.size() > 1 && canStopEarly &&
|
||||
(injectTooBig || bytesRead >= 3 * SERVER_KNOBS->BG_SNAPSHOT_FILE_TARGET_BYTES)) {
|
||||
// throw transaction too old either on injection for simulation, or if snapshot would be too large now
|
||||
throw transaction_too_old();
|
||||
|
|
|
@ -0,0 +1,229 @@
|
|||
/*
|
||||
* BlobGranuleMergeBoundariesWorkload.actor.cpp
|
||||
*
|
||||
* This source file is part of the FoundationDB open source project
|
||||
*
|
||||
* Copyright 2013-2023 Apple Inc. and the FoundationDB project authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "fdbclient/FDBOptions.g.h"
|
||||
#include "fdbclient/FDBTypes.h"
|
||||
#include "fdbclient/ManagementAPI.actor.h"
|
||||
#include "fdbclient/NativeAPI.actor.h"
|
||||
#include "fdbclient/SystemData.h"
|
||||
#include "fdbclient/TenantManagement.actor.h"
|
||||
#include "fdbserver/Knobs.h"
|
||||
#include "fdbserver/workloads/workloads.actor.h"
|
||||
#include "fdbserver/workloads/BulkSetup.actor.h"
|
||||
#include "flow/Arena.h"
|
||||
#include "flow/IRandom.h"
|
||||
#include "flow/Trace.h"
|
||||
#include "flow/Util.h"
|
||||
#include <cstring>
|
||||
#include <limits>
|
||||
|
||||
#include "flow/actorcompiler.h" // This must be the last #include.
|
||||
|
||||
/*
|
||||
* Verifies that, depending on the specified functionality for splitting a tuple prefix with
|
||||
* bg_key_tuple_truncate_offset=1 Writes several normal granules' worth of data to one tuple prefix, and validates the
|
||||
* desired configuration of splitting within a tuple prefix being allowed/disallowed.
|
||||
*/
|
||||
struct BlobGranuleMergeBoundariesWorkload : TestWorkload {
|
||||
static constexpr auto NAME = "BlobGranuleMergeBoundaries";
|
||||
int targetGranules;
|
||||
bool initAfter;
|
||||
int nodeCount;
|
||||
int targetValueLen;
|
||||
|
||||
Optional<TenantName> tenantName;
|
||||
Optional<Reference<Tenant>> tenant;
|
||||
|
||||
BlobGranuleMergeBoundariesWorkload(WorkloadContext const& wcx) : TestWorkload(wcx) {
|
||||
targetGranules = 3 + sharedRandomNumber % 6;
|
||||
sharedRandomNumber /= 6;
|
||||
initAfter = (sharedRandomNumber % 4) == 0;
|
||||
sharedRandomNumber /= 4;
|
||||
targetValueLen = 100 * (1 + sharedRandomNumber % 10);
|
||||
sharedRandomNumber /= 10;
|
||||
|
||||
int64_t targetBytes = targetGranules * SERVER_KNOBS->BG_SNAPSHOT_FILE_TARGET_BYTES;
|
||||
targetBytes = std::max<int64_t>(
|
||||
1000000, targetBytes); // write at least 1 MB to avoid very small granule/byte sample issues
|
||||
nodeCount = (int)(targetBytes / targetValueLen);
|
||||
|
||||
tenantName = "bgMergeBoundsTenant"_sr;
|
||||
|
||||
// FIXME: maybe enable for completeness at some point? We probably will never convert non-empty ranges to blob
|
||||
// after 71.3
|
||||
initAfter = false;
|
||||
|
||||
TraceEvent("BlobGranuleMergeBoundariesWorkloadInit")
|
||||
.detail("TargetGranules", targetGranules)
|
||||
.detail("InitAfter", initAfter)
|
||||
.detail("TargetValSize", targetValueLen)
|
||||
.detail("TargetBytes", targetBytes)
|
||||
.detail("GranuleSize", SERVER_KNOBS->BG_SNAPSHOT_FILE_TARGET_BYTES)
|
||||
.detail("NodeCount", nodeCount);
|
||||
}
|
||||
|
||||
Future<Void> setup(Database const& cx) override { return _setup(cx, this); }
|
||||
|
||||
ACTOR Future<Void> setUpBlobRange(Database cx, BlobGranuleMergeBoundariesWorkload* self) {
|
||||
bool success = wait(cx->blobbifyRange(normalKeys, self->tenant));
|
||||
ASSERT(success);
|
||||
return Void();
|
||||
}
|
||||
|
||||
// Functions required by `bulkSetup()`
|
||||
// key is always a 2-tuple with the same first element and a different last element
|
||||
Key keyForIndex(int n) { return Tuple::makeTuple(7, n).pack(); }
|
||||
Value value(int n) {
|
||||
// FIXME: shared with BlobGranuleCorrectnessWorkload
|
||||
int valLen = deterministicRandom()->randomInt(1, 2 * targetValueLen);
|
||||
valLen = std::max(10, valLen);
|
||||
std::string v(valLen, 'z');
|
||||
auto valFormatted = format("%08x", n);
|
||||
ASSERT(valFormatted.size() <= v.size());
|
||||
|
||||
for (int i = 0; i < valFormatted.size(); i++) {
|
||||
v[i] = valFormatted[i];
|
||||
}
|
||||
// copy into an arena
|
||||
// TODO do this in original arena? a bit more efficient that way
|
||||
Arena a;
|
||||
return Standalone<StringRef>(StringRef(a, v), a);
|
||||
}
|
||||
Standalone<KeyValueRef> operator()(int n) { return KeyValueRef(keyForIndex(n), value(n)); }
|
||||
|
||||
ACTOR Future<Void> _setup(Database cx, BlobGranuleMergeBoundariesWorkload* self) {
|
||||
if (self->clientId != 0) {
|
||||
return Void();
|
||||
}
|
||||
TraceEvent("BlobGranuleMergeBoundariesInit")
|
||||
.detail("TargetGranules", self->targetGranules)
|
||||
.detail("InitAfter", self->initAfter);
|
||||
|
||||
// set up blob granules
|
||||
wait(success(ManagementAPI::changeConfig(cx.getReference(), "blob_granules_enabled=1", true)));
|
||||
|
||||
Optional<TenantMapEntry> entry = wait(TenantAPI::createTenant(cx.getReference(), self->tenantName.get()));
|
||||
ASSERT(entry.present());
|
||||
self->tenant = makeReference<Tenant>(cx, self->tenantName.get());
|
||||
|
||||
if (!self->initAfter) {
|
||||
wait(self->setUpBlobRange(cx, self));
|
||||
|
||||
TraceEvent("BlobGranuleMergeBoundariesSetupVerifying");
|
||||
loop {
|
||||
Version checkVersion = wait(cx->verifyBlobRange(normalKeys, latestVersion, self->tenant));
|
||||
if (checkVersion != -1) {
|
||||
break;
|
||||
}
|
||||
|
||||
TraceEvent("BlobGranuleMergeBoundariesSetupVerifyRetrying");
|
||||
|
||||
wait(delay(1.0));
|
||||
}
|
||||
}
|
||||
|
||||
TraceEvent("BlobGranuleMergeBoundariesLoading");
|
||||
|
||||
// we only have one client and bulk setup divides the writes amongst them, so multiply node count by client
|
||||
// count
|
||||
wait(bulkSetup(cx,
|
||||
self,
|
||||
self->nodeCount * self->clientCount,
|
||||
Promise<double>(),
|
||||
true,
|
||||
0.0,
|
||||
1e12,
|
||||
std::vector<uint64_t>(),
|
||||
Promise<std::vector<std::pair<uint64_t, double>>>(),
|
||||
0,
|
||||
0.1,
|
||||
0,
|
||||
0,
|
||||
{ self->tenant.get() }));
|
||||
|
||||
TraceEvent("BlobGranuleMergeBoundariesLoadingComplete");
|
||||
|
||||
if (self->initAfter) {
|
||||
wait(self->setUpBlobRange(cx, self));
|
||||
}
|
||||
|
||||
TraceEvent("BlobGranuleMergeBoundariesSetupComplete");
|
||||
return Void();
|
||||
}
|
||||
|
||||
Future<Void> start(Database const& cx) override {
|
||||
// no test phase
|
||||
return Void();
|
||||
}
|
||||
|
||||
Future<bool> check(Database const& cx) override { return _check(cx, this); }
|
||||
|
||||
ACTOR Future<bool> _check(Database cx, BlobGranuleMergeBoundariesWorkload* self) {
|
||||
if (self->clientId != 0) {
|
||||
return true;
|
||||
}
|
||||
state Key tuplePrefix = Tuple::makeTuple(7).pack();
|
||||
// FIXME: checking normalKeys finds another empty granule, that's metadata overhead we should fix at some point
|
||||
state KeyRange tupleRange(KeyRangeRef(tuplePrefix, strinc(tuplePrefix)));
|
||||
TraceEvent("BlobGranuleMergeBoundariesCheckStart").detail("Range", tupleRange);
|
||||
loop {
|
||||
Version checkVersion = wait(cx->verifyBlobRange(tupleRange, latestVersion, self->tenant));
|
||||
if (checkVersion != -1) {
|
||||
TraceEvent("BlobGranuleMergeBoundariesCheckRead").detail("CheckVersion", checkVersion);
|
||||
break;
|
||||
}
|
||||
|
||||
TraceEvent("BlobGranuleMergeBoundariesCheckRetrying");
|
||||
|
||||
wait(delay(1.0));
|
||||
}
|
||||
|
||||
state Transaction tr(cx, self->tenant);
|
||||
loop {
|
||||
try {
|
||||
Standalone<VectorRef<KeyRangeRef>> granules = wait(tr.getBlobGranuleRanges(tupleRange, 1000000));
|
||||
TraceEvent("BlobGranuleMergeBoundariesCheckGranules")
|
||||
.detail("GranuleCount", granules.size())
|
||||
.detail("EnableSplitTruncated", SERVER_KNOBS->BG_ENABLE_SPLIT_TRUNCATED)
|
||||
.detail("TruncateOffset", SERVER_KNOBS->BG_KEY_TUPLE_TRUNCATE_OFFSET);
|
||||
if (SERVER_KNOBS->BG_ENABLE_SPLIT_TRUNCATED) {
|
||||
// test the test to ensure in the case where this knob wasn't set, we would be producing multiple
|
||||
// granules
|
||||
// FIXME: sometimes behind granule resnapshotting means we still only have one granule so we can't
|
||||
// assert > 1
|
||||
ASSERT(granules.size() >= 1);
|
||||
} else {
|
||||
ASSERT(granules.size() == 1);
|
||||
}
|
||||
break;
|
||||
} catch (Error& e) {
|
||||
wait(tr.onError(e));
|
||||
}
|
||||
}
|
||||
|
||||
TraceEvent("BlobGranuleMergeBoundariesCheckDone");
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void getMetrics(std::vector<PerfMetric>& m) override {}
|
||||
};
|
||||
|
||||
WorkloadFactory<BlobGranuleMergeBoundariesWorkload> BlobGranuleMergeBoundariesWorkloadFactory;
|
|
@ -244,6 +244,7 @@ if(WITH_PYTHON)
|
|||
endif()
|
||||
|
||||
add_fdb_test(TEST_FILES rare/BlobGranuleRanges.toml)
|
||||
add_fdb_test(TEST_FILES rare/BlobGranuleMergeBoundaries.toml)
|
||||
add_fdb_test(TEST_FILES rare/CheckRelocation.toml)
|
||||
add_fdb_test(TEST_FILES rare/ClogTlog.toml)
|
||||
add_fdb_test(TEST_FILES rare/ClogUnclog.toml)
|
||||
|
|
|
@ -0,0 +1,21 @@
|
|||
# this test is lower value than the other blob granule tests, it's essentially a unit test, so run it less frequently
|
||||
testPriority = '10'
|
||||
|
||||
[configuration]
|
||||
blobGranulesEnabled = true
|
||||
allowDefaultTenant = false
|
||||
injectTargetedSSRestart = true
|
||||
injectSSDelay = true
|
||||
tenantModes = ['required']
|
||||
|
||||
[[knobs]]
|
||||
bg_key_tuple_truncate_offset = 1
|
||||
bg_metadata_source = "tenant"
|
||||
enable_rest_kms_communication = true
|
||||
|
||||
[[test]]
|
||||
testTitle = 'BlobGranuleMergeBoundaries'
|
||||
|
||||
[[test.workload]]
|
||||
testName = 'BlobGranuleMergeBoundaries'
|
||||
|
Loading…
Reference in New Issue