Merge branch 'main' into feature-metacluster
This commit is contained in:
commit
7c6b3fb0b8
|
@ -765,6 +765,71 @@ JNIEXPORT jdouble JNICALL Java_com_apple_foundationdb_FDBDatabase_Database_1getM
|
|||
return (jdouble)fdb_database_get_main_thread_busyness(database);
|
||||
}
|
||||
|
||||
JNIEXPORT jlong JNICALL Java_com_apple_foundationdb_FDBDatabase_Database_1purgeBlobGranules(JNIEnv* jenv,
|
||||
jobject,
|
||||
jlong dbPtr,
|
||||
jbyteArray beginKeyBytes,
|
||||
jbyteArray endKeyBytes,
|
||||
jlong purgeVersion,
|
||||
jboolean force) {
|
||||
if (!dbPtr || !beginKeyBytes || !endKeyBytes) {
|
||||
throwParamNotNull(jenv);
|
||||
return 0;
|
||||
}
|
||||
|
||||
FDBDatabase* database = (FDBDatabase*)dbPtr;
|
||||
|
||||
uint8_t* beginKeyArr = (uint8_t*)jenv->GetByteArrayElements(beginKeyBytes, JNI_NULL);
|
||||
if (!beginKeyArr) {
|
||||
if (!jenv->ExceptionOccurred())
|
||||
throwRuntimeEx(jenv, "Error getting handle to native resources");
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint8_t* endKeyArr = (uint8_t*)jenv->GetByteArrayElements(endKeyBytes, JNI_NULL);
|
||||
if (!endKeyArr) {
|
||||
jenv->ReleaseByteArrayElements(beginKeyBytes, (jbyte*)beginKeyArr, JNI_ABORT);
|
||||
if (!jenv->ExceptionOccurred())
|
||||
throwRuntimeEx(jenv, "Error getting handle to native resources");
|
||||
return 0;
|
||||
}
|
||||
|
||||
FDBFuture* f = fdb_database_purge_blob_granules(database,
|
||||
beginKeyArr,
|
||||
jenv->GetArrayLength(beginKeyBytes),
|
||||
endKeyArr,
|
||||
jenv->GetArrayLength(endKeyBytes),
|
||||
purgeVersion,
|
||||
(fdb_bool_t)force);
|
||||
jenv->ReleaseByteArrayElements(beginKeyBytes, (jbyte*)beginKeyArr, JNI_ABORT);
|
||||
jenv->ReleaseByteArrayElements(endKeyBytes, (jbyte*)endKeyArr, JNI_ABORT);
|
||||
return (jlong)f;
|
||||
}
|
||||
|
||||
JNIEXPORT jlong JNICALL
|
||||
Java_com_apple_foundationdb_FDBDatabase_Database_1waitPurgeGranulesComplete(JNIEnv* jenv,
|
||||
jobject,
|
||||
jlong dbPtr,
|
||||
jbyteArray purgeKeyBytes) {
|
||||
if (!dbPtr || !purgeKeyBytes) {
|
||||
throwParamNotNull(jenv);
|
||||
return 0;
|
||||
}
|
||||
FDBDatabase* database = (FDBDatabase*)dbPtr;
|
||||
uint8_t* purgeKeyArr = (uint8_t*)jenv->GetByteArrayElements(purgeKeyBytes, JNI_NULL);
|
||||
|
||||
if (!purgeKeyArr) {
|
||||
if (!jenv->ExceptionOccurred())
|
||||
throwRuntimeEx(jenv, "Error getting handle to native resources");
|
||||
return 0;
|
||||
}
|
||||
FDBFuture* f =
|
||||
fdb_database_wait_purge_granules_complete(database, purgeKeyArr, jenv->GetArrayLength(purgeKeyBytes));
|
||||
jenv->ReleaseByteArrayElements(purgeKeyBytes, (jbyte*)purgeKeyArr, JNI_ABORT);
|
||||
|
||||
return (jlong)f;
|
||||
}
|
||||
|
||||
JNIEXPORT jboolean JNICALL Java_com_apple_foundationdb_FDB_Error_1predicate(JNIEnv* jenv,
|
||||
jobject,
|
||||
jint predicate,
|
||||
|
|
|
@ -161,6 +161,24 @@ public interface Database extends AutoCloseable, TransactionContext {
|
|||
*/
|
||||
double getMainThreadBusyness();
|
||||
|
||||
/**
|
||||
* Queues a purge of blob granules for the specified key range, at the specified version.
|
||||
*
|
||||
* @param beginKey start of the key range
|
||||
* @param endKey end of the key range
|
||||
* @param purgeVersion version to purge at
|
||||
* @param force if true delete all data, if not keep data >= purgeVersion
|
||||
* @return the key to watch for purge complete
|
||||
*/
|
||||
CompletableFuture<byte[]> purgeBlobGranules(byte[] beginKey, byte[] endKey, long purgeVersion, boolean force, Executor e);
|
||||
|
||||
/**
|
||||
* Wait for a previous call to purgeBlobGranules to complete
|
||||
*
|
||||
* @param purgeKey key to watch
|
||||
*/
|
||||
CompletableFuture<Void> waitPurgeGranulesComplete(byte[] purgeKey, Executor e);
|
||||
|
||||
/**
|
||||
* Runs a read-only transactional function against this {@code Database} with retry logic.
|
||||
* {@link Function#apply(Object) apply(ReadTransaction)} will be called on the
|
||||
|
|
|
@ -200,6 +200,26 @@ class FDBDatabase extends NativeObjectWrapper implements Database, OptionConsume
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public CompletableFuture<byte[]> purgeBlobGranules(byte[] beginKey, byte[] endKey, long purgeVersion, boolean force, Executor executor) {
|
||||
pointerReadLock.lock();
|
||||
try {
|
||||
return new FutureKey(Database_purgeBlobGranules(getPtr(), beginKey, endKey, purgeVersion, force), executor, eventKeeper);
|
||||
} finally {
|
||||
pointerReadLock.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public CompletableFuture<Void> waitPurgeGranulesComplete(byte[] purgeKey, Executor executor) {
|
||||
pointerReadLock.lock();
|
||||
try {
|
||||
return new FutureVoid(Database_waitPurgeGranulesComplete(getPtr(), purgeKey), executor);
|
||||
} finally {
|
||||
pointerReadLock.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Executor getExecutor() {
|
||||
return executor;
|
||||
|
@ -215,4 +235,6 @@ class FDBDatabase extends NativeObjectWrapper implements Database, OptionConsume
|
|||
private native void Database_dispose(long cPtr);
|
||||
private native void Database_setOption(long cPtr, int code, byte[] value) throws FDBException;
|
||||
private native double Database_getMainThreadBusyness(long cPtr);
|
||||
private native long Database_purgeBlobGranules(long cPtr, byte[] beginKey, byte[] endKey, long purgeVersion, boolean force);
|
||||
private native long Database_waitPurgeGranulesComplete(long cPtr, byte[] purgeKey);
|
||||
}
|
|
@ -666,14 +666,6 @@ def tenants(logger):
|
|||
assert len(lines) == 4
|
||||
assert lines[3].strip() == 'tenant group: tenant_group1'
|
||||
|
||||
output = run_fdbcli_command('configuretenant tenant tenant_group=tenant_group1 tenant_group=tenant_group2')
|
||||
assert output == 'The configuration for tenant `tenant\' has been updated'
|
||||
|
||||
output = run_fdbcli_command('gettenant tenant')
|
||||
lines = output.split('\n')
|
||||
assert len(lines) == 4
|
||||
assert lines[3].strip() == 'tenant group: tenant_group2'
|
||||
|
||||
output = run_fdbcli_command('configuretenant tenant unset tenant_group')
|
||||
assert output == 'The configuration for tenant `tenant\' has been updated'
|
||||
|
||||
|
|
|
@ -68,6 +68,7 @@ if(WIN32)
|
|||
add_definitions(-DBOOST_USE_WINDOWS_H)
|
||||
add_definitions(-DWIN32_LEAN_AND_MEAN)
|
||||
add_definitions(-D_ITERATOR_DEBUG_LEVEL=0)
|
||||
add_definitions(-DNOGDI) # WinGDI.h defines macro ERROR
|
||||
endif()
|
||||
|
||||
if (USE_CCACHE)
|
||||
|
|
|
@ -8,6 +8,8 @@ add_subdirectory(rapidxml)
|
|||
add_subdirectory(sqlite)
|
||||
add_subdirectory(SimpleOpt)
|
||||
add_subdirectory(fmt-8.1.1)
|
||||
add_subdirectory(md5)
|
||||
add_subdirectory(libb64)
|
||||
if(NOT WIN32)
|
||||
add_subdirectory(linenoise)
|
||||
add_subdirectory(debug_determinism)
|
||||
|
|
|
@ -1,2 +1,8 @@
|
|||
add_library(crc32 STATIC crc32.S crc32_wrapper.c crc32c.cpp)
|
||||
if (CLANG)
|
||||
# This is necessary for clang since the compiler reports that crc32_align is
|
||||
# defined but not used. With -Werror, crc32 will not compile.
|
||||
# TODO: Remove this when the upstream issue is repaired.
|
||||
target_compile_options(crc32 PUBLIC -Wno-unused-function)
|
||||
endif ()
|
||||
target_include_directories(crc32 PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include")
|
||||
|
|
|
@ -0,0 +1,72 @@
|
|||
#!/usr/bin/env python3
|
||||
#
|
||||
# ddsketch_calc.py
|
||||
#
|
||||
# This source file is part of the FoundationDB open source project
|
||||
#
|
||||
# Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import numpy as np
|
||||
import math as m
|
||||
|
||||
|
||||
# Implements a DDSketch class as desrcibed in:
|
||||
# https://arxiv.org/pdf/1908.10693.pdf
|
||||
|
||||
# This class has methods that use cubic interpolation to quickly compute log
|
||||
# and inverse log. The coefficients A,B,C as well as correctingFactor are
|
||||
# all constants used for interpolating.
|
||||
|
||||
# The implementation for interpolation was originally seen here in:
|
||||
# https://github.com/DataDog/sketches-java/
|
||||
# in the file CubicallyInterpolatedMapping.java
|
||||
|
||||
class DDSketch(object):
|
||||
A = 6.0 / 35.0
|
||||
B = -3.0 / 5.0
|
||||
C = 10.0 / 7.0
|
||||
EPS = 1e-18
|
||||
correctingFactor = 1.00988652862227438516
|
||||
offset = 0
|
||||
multiplier = 0
|
||||
gamma = 0
|
||||
|
||||
def __init__(self, errorGuarantee):
|
||||
self.gamma = (1 + errorGuarantee) / (1 - errorGuarantee)
|
||||
self.multiplier = (self.correctingFactor * m.log(2)) / m.log(self.gamma)
|
||||
self.offset = self.getIndex(1.0 / self.EPS)
|
||||
|
||||
def fastlog(self, value):
|
||||
s = np.frexp(value)
|
||||
e = s[1]
|
||||
s = s[0]
|
||||
s = s * 2 - 1
|
||||
return ((self.A * s + self.B) * s + self.C) * s + e - 1
|
||||
|
||||
def reverseLog(self, index):
|
||||
exponent = m.floor(index)
|
||||
d0 = self.B * self.B - 3 * self.A * self.C
|
||||
d1 = 2 * self.B * self.B * self.B - 9 * self.A * self.B * self.C - 27 * self.A * self.A * (index - exponent)
|
||||
p = np.cbrt((d1 - np.sqrt(d1 * d1 - 4 * d0 * d0 * d0)) / 2)
|
||||
significandPlusOne = - (self.B + p + d0 / p) / (3 * self.A) + 1
|
||||
return np.ldexp(significandPlusOne / 2, exponent + 1)
|
||||
|
||||
def getIndex(self, sample):
|
||||
return m.ceil(self.fastlog(sample) * self.multiplier) + self.offset
|
||||
|
||||
def getValue(self, idx):
|
||||
return self.reverseLog((idx - self.offset) / self.multiplier) * 2.0 / (1 + self.gamma)
|
||||
|
|
@ -0,0 +1,70 @@
|
|||
#!/usr/bin/env python3
|
||||
#
|
||||
# ddsketch_compare.py
|
||||
#
|
||||
# This source file is part of the FoundationDB open source project
|
||||
#
|
||||
# Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import numpy as np
|
||||
|
||||
|
||||
# kullback-leibler divergence (or relative entropy)
|
||||
def relative_entropy(p, q):
|
||||
difference = 0.0
|
||||
for i in range(len(p)):
|
||||
if p[i] != 0.0 and q[i] != 0.0:
|
||||
difference += (p[i] * np.log2(p[i]/q[i]))
|
||||
return difference
|
||||
|
||||
# jensen-shannon divergence (or symmetric relative entropy)
|
||||
def relative_entropy_symmetric(dd1, dd2):
|
||||
# normalize p, q into distribution
|
||||
sum1 = sum(dd1)
|
||||
sum2 = sum(dd2)
|
||||
|
||||
p = [dd1[i] / sum1 for i in range(len(dd1))]
|
||||
q = [dd2[i] / sum2 for i in range(len(dd2))]
|
||||
m = [0.5 * (p[i] + q[i]) for i in range(len(p))]
|
||||
|
||||
return 0.5 * relative_entropy(p, m) + 0.5 * relative_entropy(q, m)
|
||||
|
||||
# setup cmdline args
|
||||
parser = argparse.ArgumentParser(description="Compares two DDSketch distributions")
|
||||
parser.add_argument('--txn1', help='Transaction type for first file', required=True, type=str)
|
||||
parser.add_argument('--txn2', help='Transaction type for second file', required=True, type=str)
|
||||
parser.add_argument('--file1', help='Path to first ddsketch json', required=True, type=str)
|
||||
parser.add_argument('--file2', help="Path to second ddsketch json'", required=True, type=str)
|
||||
parser.add_argument("--op", help='Operation name', type=str)
|
||||
args = parser.parse_args()
|
||||
|
||||
f1 = open(args.file1)
|
||||
f2 = open(args.file2)
|
||||
data1 = json.load(f1)
|
||||
data2 = json.load(f2)
|
||||
|
||||
if data1[args.txn1][args.op]["errorGuarantee"] != data2[args.txn2][args.op]["errorGuarantee"]:
|
||||
print("ERROR: The sketches have different error guarantees and cannot be compared!")
|
||||
exit()
|
||||
|
||||
b1 = data1[args.txn1][args.op]["buckets"]
|
||||
b2 = data2[args.txn2][args.op]["buckets"]
|
||||
|
||||
re = relative_entropy_symmetric(b1, b2)
|
||||
print("The similarity is: ", round(re, 8))
|
||||
print("1 means least alike, 0 means most alike")
|
|
@ -0,0 +1,45 @@
|
|||
#!/usr/bin/env python3
|
||||
#
|
||||
# ddsketch_conversion.py
|
||||
#
|
||||
# This source file is part of the FoundationDB open source project
|
||||
#
|
||||
# Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import argparse
|
||||
import ddsketch_calc as dd
|
||||
|
||||
|
||||
parser = argparse.ArgumentParser(description="Converts values to DDSketch buckets")
|
||||
parser.add_argument('-e', '--error_guarantee', help='Error guarantee (default is 0.005)', required=False, type=float)
|
||||
parser.add_argument('-v', '--value', help="Value", required=False, type=int)
|
||||
parser.add_argument('-b', '--bucket', help='Bucket index', required=False, type=int)
|
||||
args = parser.parse_args()
|
||||
|
||||
error = 0.005
|
||||
|
||||
if args.error_guarantee is not None:
|
||||
error = args.error_guarantee
|
||||
|
||||
sketch = dd.DDSketch(error)
|
||||
|
||||
if args.value is not None:
|
||||
print("Bucket index for ", args.value)
|
||||
print(sketch.getIndex(args.value))
|
||||
|
||||
if args.bucket is not None:
|
||||
print("Value for bucket ", args.bucket)
|
||||
print(sketch.getValue(args.bucket))
|
|
@ -0,0 +1,67 @@
|
|||
#!/usr/bin/env python3
|
||||
#
|
||||
# export_graph.py
|
||||
#
|
||||
# This source file is part of the FoundationDB open source project
|
||||
#
|
||||
# Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import json
|
||||
import matplotlib.pyplot as plt
|
||||
import argparse
|
||||
import ddsketch_calc as dd
|
||||
|
||||
# setup cmdline args
|
||||
parser = argparse.ArgumentParser(description="Graphs DDSketch distribution")
|
||||
parser.add_argument('-t', '--txn', help='Transaction type (ex: g8ui)', required=True, type=str)
|
||||
parser.add_argument('--file', help='Path to ddsketch json', required=True, type=str)
|
||||
parser.add_argument('--title', help='Title for the graph', required=False, type=str)
|
||||
parser.add_argument('--savefig', help='Will save the plot to a file if set', type=str)
|
||||
parser.add_argument('--op', help='Which OP to plot (casing matters)', type=str)
|
||||
args = parser.parse_args()
|
||||
|
||||
|
||||
# Opening JSON file
|
||||
f = open(args.file)
|
||||
data = json.load(f)
|
||||
|
||||
# parse json and init sketch
|
||||
buckets = data[args.t][args.op]["buckets"]
|
||||
error = data[args.t][args.op]["errorGuarantee"]
|
||||
sketch = dd.DDSketch(error)
|
||||
|
||||
# trim the tails of the distribution
|
||||
ls = [i for i, e in enumerate(buckets) if e != 0]
|
||||
actual_data = buckets[ls[0]:ls[-1]+1]
|
||||
indices = range(ls[0], ls[-1]+1)
|
||||
actual_indices = [sketch.getValue(i) for i in indices]
|
||||
|
||||
# configure the x-axis to make more sense
|
||||
fig, ax = plt.subplots()
|
||||
ax.ticklabel_format(useOffset=False, style='plain')
|
||||
plt.plot(actual_indices, actual_data)
|
||||
plt.xlabel("Latency (in us)")
|
||||
plt.ylabel("Frequency count")
|
||||
|
||||
plt_title = "Title"
|
||||
if args.title is not None:
|
||||
plt_title = args.title
|
||||
plt.title(plt_title)
|
||||
plt.xlim([actual_indices[0], actual_indices[-1]])
|
||||
if args.savefig is not None:
|
||||
plt.savefig(args.savefig, format='png')
|
||||
else:
|
||||
plt.show()
|
|
@ -0,0 +1,2 @@
|
|||
add_library(libb64 STATIC cdecode.c cencode.c)
|
||||
target_include_directories(libb64 PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include")
|
|
@ -5,7 +5,7 @@ This is part of the libb64 project, and has been placed in the public domain.
|
|||
For details, see http://sourceforge.net/projects/libb64
|
||||
*/
|
||||
|
||||
#include "fdbclient/libb64/cdecode.h"
|
||||
#include "libb64/cdecode.h"
|
||||
|
||||
int base64_decode_value(char value_in) {
|
||||
static const char decoding[] = { 62, -1, -1, -1, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -2, -1,
|
|
@ -5,7 +5,7 @@ This is part of the libb64 project, and has been placed in the public domain.
|
|||
For details, see http://sourceforge.net/projects/libb64
|
||||
*/
|
||||
|
||||
#include "fdbclient/libb64/cencode.h"
|
||||
#include "libb64/cencode.h"
|
||||
|
||||
const int CHARS_PER_LINE = 72;
|
||||
|
|
@ -9,7 +9,7 @@ For details, see http://sourceforge.net/projects/libb64
|
|||
#define BASE64_DECODE_H
|
||||
|
||||
#include <iostream>
|
||||
#include "fdbclient/libb64/encode.h"
|
||||
#include "libb64/encode.h"
|
||||
|
||||
namespace base64 {
|
||||
extern "C" {
|
|
@ -0,0 +1,2 @@
|
|||
add_library(md5 STATIC md5.c)
|
||||
target_include_directories(md5 PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include")
|
|
@ -194,7 +194,7 @@ class BaseInfo(object):
|
|||
if protocol_version >= PROTOCOL_VERSION_6_3:
|
||||
self.dc_id = bb.get_bytes_with_length()
|
||||
if protocol_version >= PROTOCOL_VERSION_7_1:
|
||||
if bb.get_bytes(1):
|
||||
if bb.get_bool():
|
||||
self.tenant = bb.get_bytes_with_length()
|
||||
|
||||
class GetVersionInfo(BaseInfo):
|
||||
|
|
|
@ -523,7 +523,8 @@
|
|||
"duplicate_mutation_streams",
|
||||
"duplicate_mutation_fetch_timeout",
|
||||
"primary_dc_missing",
|
||||
"fetch_primary_dc_timeout"
|
||||
"fetch_primary_dc_timeout",
|
||||
"fetch_storage_wiggler_stats_timeout"
|
||||
]
|
||||
},
|
||||
"issues":[
|
||||
|
|
|
@ -36,11 +36,21 @@
|
|||
|
||||
namespace fdb_cli {
|
||||
|
||||
// TODO: API version
|
||||
const KeyRangeRef tenantMapSpecialKeyRange(LiteralStringRef("\xff\xff/management/tenant/map/"),
|
||||
LiteralStringRef("\xff\xff/management/tenant/map0"));
|
||||
const KeyRangeRef tenantConfigSpecialKeyRange(LiteralStringRef("\xff\xff/management/tenant/configure/"),
|
||||
LiteralStringRef("\xff\xff/management/tenant/configure0"));
|
||||
const KeyRangeRef tenantMapSpecialKeyRange720("\xff\xff/management/tenant/map/"_sr,
|
||||
"\xff\xff/management/tenant/map0"_sr);
|
||||
const KeyRangeRef tenantConfigSpecialKeyRange("\xff\xff/management/tenant/configure/"_sr,
|
||||
"\xff\xff/management/tenant/configure0"_sr);
|
||||
|
||||
const KeyRangeRef tenantMapSpecialKeyRange710("\xff\xff/management/tenant_map/"_sr,
|
||||
"\xff\xff/management/tenant_map0"_sr);
|
||||
|
||||
KeyRangeRef const& tenantMapSpecialKeyRange(int apiVersion) {
|
||||
if (apiVersion >= 720) {
|
||||
return tenantMapSpecialKeyRange720;
|
||||
} else {
|
||||
return tenantMapSpecialKeyRange710;
|
||||
}
|
||||
}
|
||||
|
||||
Optional<std::map<Standalone<StringRef>, Optional<Value>>>
|
||||
parseTenantConfiguration(std::vector<StringRef> const& tokens, int startIndex, bool allowUnset) {
|
||||
|
@ -68,10 +78,16 @@ parseTenantConfiguration(std::vector<StringRef> const& tokens, int startIndex, b
|
|||
value = token;
|
||||
}
|
||||
|
||||
if (configParams.count(param)) {
|
||||
fmt::print(
|
||||
stderr, "ERROR: configuration parameter `{}' specified more than once.\n", param.toString().c_str());
|
||||
return {};
|
||||
}
|
||||
|
||||
if (tokencmp(param, "tenant_group")) {
|
||||
configParams[param] = value;
|
||||
} else {
|
||||
fmt::print(stderr, "ERROR: unrecognized configuration parameter `{}'\n", param.toString().c_str());
|
||||
fmt::print(stderr, "ERROR: unrecognized configuration parameter `{}'.\n", param.toString().c_str());
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
@ -96,13 +112,13 @@ void applyConfigurationToSpecialKeys(Reference<ITransaction> tr,
|
|||
}
|
||||
|
||||
// createtenant command
|
||||
ACTOR Future<bool> createTenantCommandActor(Reference<IDatabase> db, std::vector<StringRef> tokens) {
|
||||
ACTOR Future<bool> createTenantCommandActor(Reference<IDatabase> db, std::vector<StringRef> tokens, int apiVersion) {
|
||||
if (tokens.size() < 2 || tokens.size() > 3) {
|
||||
printUsage(tokens[0]);
|
||||
return false;
|
||||
}
|
||||
|
||||
state Key tenantNameKey = tenantMapSpecialKeyRange.begin.withSuffix(tokens[1]);
|
||||
state Key tenantNameKey = tenantMapSpecialKeyRange(apiVersion).begin.withSuffix(tokens[1]);
|
||||
state Reference<ITransaction> tr = db->createTransaction();
|
||||
state bool doneExistenceCheck = false;
|
||||
|
||||
|
@ -113,6 +129,11 @@ ACTOR Future<bool> createTenantCommandActor(Reference<IDatabase> db, std::vector
|
|||
return false;
|
||||
}
|
||||
|
||||
if (apiVersion < 720 && !configuration.get().empty()) {
|
||||
fmt::print(stderr, "ERROR: tenants do not accept configuration options before API version 720.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
loop {
|
||||
try {
|
||||
tr->setOption(FDBTransactionOptions::SPECIAL_KEY_SPACE_ENABLE_WRITES);
|
||||
|
@ -164,13 +185,13 @@ CommandFactory createTenantFactory(
|
|||
"that will require this tenant to be placed on the same cluster as other tenants in the same group."));
|
||||
|
||||
// deletetenant command
|
||||
ACTOR Future<bool> deleteTenantCommandActor(Reference<IDatabase> db, std::vector<StringRef> tokens) {
|
||||
ACTOR Future<bool> deleteTenantCommandActor(Reference<IDatabase> db, std::vector<StringRef> tokens, int apiVersion) {
|
||||
if (tokens.size() != 2) {
|
||||
printUsage(tokens[0]);
|
||||
return false;
|
||||
}
|
||||
|
||||
state Key tenantNameKey = tenantMapSpecialKeyRange.begin.withSuffix(tokens[1]);
|
||||
state Key tenantNameKey = tenantMapSpecialKeyRange(apiVersion).begin.withSuffix(tokens[1]);
|
||||
state Reference<ITransaction> tr = db->createTransaction();
|
||||
state bool doneExistenceCheck = false;
|
||||
|
||||
|
@ -220,7 +241,7 @@ CommandFactory deleteTenantFactory(
|
|||
"Deletes a tenant from the cluster. Deletion will be allowed only if the specified tenant contains no data."));
|
||||
|
||||
// listtenants command
|
||||
ACTOR Future<bool> listTenantsCommandActor(Reference<IDatabase> db, std::vector<StringRef> tokens) {
|
||||
ACTOR Future<bool> listTenantsCommandActor(Reference<IDatabase> db, std::vector<StringRef> tokens, int apiVersion) {
|
||||
if (tokens.size() > 4) {
|
||||
printUsage(tokens[0]);
|
||||
return false;
|
||||
|
@ -248,8 +269,8 @@ ACTOR Future<bool> listTenantsCommandActor(Reference<IDatabase> db, std::vector<
|
|||
}
|
||||
}
|
||||
|
||||
state Key beginTenantKey = tenantMapSpecialKeyRange.begin.withSuffix(beginTenant);
|
||||
state Key endTenantKey = tenantMapSpecialKeyRange.begin.withSuffix(endTenant);
|
||||
state Key beginTenantKey = tenantMapSpecialKeyRange(apiVersion).begin.withSuffix(beginTenant);
|
||||
state Key endTenantKey = tenantMapSpecialKeyRange(apiVersion).begin.withSuffix(endTenant);
|
||||
state Reference<ITransaction> tr = db->createTransaction();
|
||||
|
||||
loop {
|
||||
|
@ -269,7 +290,7 @@ ACTOR Future<bool> listTenantsCommandActor(Reference<IDatabase> db, std::vector<
|
|||
tr->getRange(firstGreaterOrEqual(beginTenantKey), firstGreaterOrEqual(endTenantKey), limit);
|
||||
RangeResult tenants = wait(safeThreadFutureToFuture(kvsFuture));
|
||||
for (auto tenant : tenants) {
|
||||
tenantNames.push_back(tenant.key.removePrefix(tenantMapSpecialKeyRange.begin));
|
||||
tenantNames.push_back(tenant.key.removePrefix(tenantMapSpecialKeyRange(apiVersion).begin));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -314,7 +335,7 @@ ACTOR Future<bool> getTenantCommandActor(Reference<IDatabase> db, std::vector<St
|
|||
}
|
||||
|
||||
state bool useJson = tokens.size() == 3;
|
||||
state Key tenantNameKey = tenantMapSpecialKeyRange.begin.withSuffix(tokens[1]);
|
||||
state Key tenantNameKey = tenantMapSpecialKeyRange(apiVersion).begin.withSuffix(tokens[1]);
|
||||
state Reference<ITransaction> tr = db->createTransaction();
|
||||
|
||||
loop {
|
||||
|
|
|
@ -1909,14 +1909,14 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise) {
|
|||
}
|
||||
|
||||
if (tokencmp(tokens[0], "createtenant")) {
|
||||
bool _result = wait(makeInterruptable(createTenantCommandActor(db, tokens)));
|
||||
bool _result = wait(makeInterruptable(createTenantCommandActor(db, tokens, opt.apiVersion)));
|
||||
if (!_result)
|
||||
is_error = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (tokencmp(tokens[0], "deletetenant")) {
|
||||
bool _result = wait(makeInterruptable(deleteTenantCommandActor(db, tokens)));
|
||||
bool _result = wait(makeInterruptable(deleteTenantCommandActor(db, tokens, opt.apiVersion)));
|
||||
if (!_result)
|
||||
is_error = true;
|
||||
else if (tenantName.present() && tokens[1] == tenantName.get()) {
|
||||
|
@ -1928,7 +1928,7 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise) {
|
|||
}
|
||||
|
||||
if (tokencmp(tokens[0], "listtenants")) {
|
||||
bool _result = wait(makeInterruptable(listTenantsCommandActor(db, tokens)));
|
||||
bool _result = wait(makeInterruptable(listTenantsCommandActor(db, tokens, opt.apiVersion)));
|
||||
if (!_result)
|
||||
is_error = true;
|
||||
continue;
|
||||
|
@ -1942,6 +1942,12 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise) {
|
|||
}
|
||||
|
||||
if (tokencmp(tokens[0], "configuretenant")) {
|
||||
if (opt.apiVersion < 720) {
|
||||
fmt::print(stderr, "ERROR: tenants cannot be configured before API version 720.\n");
|
||||
is_error = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
bool _result = wait(makeInterruptable(configureTenantCommandActor(db, tokens)));
|
||||
if (!_result)
|
||||
is_error = true;
|
||||
|
@ -1949,6 +1955,12 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise) {
|
|||
}
|
||||
|
||||
if (tokencmp(tokens[0], "renametenant")) {
|
||||
if (opt.apiVersion < 720) {
|
||||
fmt::print(stderr, "ERROR: tenants cannot be renamed before API version 720.\n");
|
||||
is_error = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
bool _result = wait(makeInterruptable(renameTenantCommandActor(db, tokens)));
|
||||
if (!_result)
|
||||
is_error = true;
|
||||
|
|
|
@ -166,11 +166,11 @@ ACTOR Future<bool> consistencyCheckCommandActor(Reference<ITransaction> tr,
|
|||
// coordinators command
|
||||
ACTOR Future<bool> coordinatorsCommandActor(Reference<IDatabase> db, std::vector<StringRef> tokens);
|
||||
// createtenant command
|
||||
ACTOR Future<bool> createTenantCommandActor(Reference<IDatabase> db, std::vector<StringRef> tokens);
|
||||
ACTOR Future<bool> createTenantCommandActor(Reference<IDatabase> db, std::vector<StringRef> tokens, int apiVersion);
|
||||
// datadistribution command
|
||||
ACTOR Future<bool> dataDistributionCommandActor(Reference<IDatabase> db, std::vector<StringRef> tokens);
|
||||
// deletetenant command
|
||||
ACTOR Future<bool> deleteTenantCommandActor(Reference<IDatabase> db, std::vector<StringRef> tokens);
|
||||
ACTOR Future<bool> deleteTenantCommandActor(Reference<IDatabase> db, std::vector<StringRef> tokens, int apiVersion);
|
||||
// exclude command
|
||||
ACTOR Future<bool> excludeCommandActor(Reference<IDatabase> db, std::vector<StringRef> tokens, Future<Void> warn);
|
||||
// expensive_data_check command
|
||||
|
@ -196,7 +196,7 @@ ACTOR Future<bool> killCommandActor(Reference<IDatabase> db,
|
|||
std::vector<StringRef> tokens,
|
||||
std::map<Key, std::pair<Value, ClientLeaderRegInterface>>* address_interface);
|
||||
// listtenants command
|
||||
ACTOR Future<bool> listTenantsCommandActor(Reference<IDatabase> db, std::vector<StringRef> tokens);
|
||||
ACTOR Future<bool> listTenantsCommandActor(Reference<IDatabase> db, std::vector<StringRef> tokens, int apiVersion);
|
||||
// lock/unlock command
|
||||
ACTOR Future<bool> lockCommandActor(Reference<IDatabase> db, std::vector<StringRef> tokens);
|
||||
ACTOR Future<bool> unlockDatabaseActor(Reference<IDatabase> db, UID uid);
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,8 +1,5 @@
|
|||
fdb_find_sources(FDBCLIENT_SRCS)
|
||||
list(APPEND FDBCLIENT_SRCS
|
||||
sha1/SHA1.cpp
|
||||
libb64/cdecode.c
|
||||
libb64/cencode.c)
|
||||
list(APPEND FDBCLIENT_SRCS sha1/SHA1.cpp)
|
||||
|
||||
message(STATUS "FDB version is ${FDB_VERSION}")
|
||||
message(STATUS "FDB package name is ${FDB_PACKAGE_NAME}")
|
||||
|
|
|
@ -209,12 +209,7 @@ void ClientKnobs::initialize(Randomize randomize) {
|
|||
|
||||
init( IS_ACCEPTABLE_DELAY, 1.5 );
|
||||
|
||||
init( HTTP_READ_SIZE, 128*1024 );
|
||||
init( HTTP_SEND_SIZE, 32*1024 );
|
||||
init( HTTP_VERBOSE_LEVEL, 0 );
|
||||
init( HTTP_REQUEST_ID_HEADER, "" );
|
||||
init( HTTP_REQUEST_AWS_V4_HEADER, true );
|
||||
init( HTTP_RESPONSE_SKIP_VERIFY_CHECKSUM_FOR_PARTIAL_CONTENT, false );
|
||||
init( BLOBSTORE_ENCRYPTION_TYPE, "" );
|
||||
init( BLOBSTORE_CONNECT_TRIES, 10 );
|
||||
init( BLOBSTORE_CONNECT_TIMEOUT, 10 );
|
||||
|
|
|
@ -3236,13 +3236,26 @@ TenantInfo TransactionState::getTenantInfo() {
|
|||
} else if (!t.present()) {
|
||||
return TenantInfo();
|
||||
} else if (cx->clientInfo->get().tenantMode == TenantMode::DISABLED && t.present()) {
|
||||
throw tenants_disabled();
|
||||
// If we are running provisional proxies, we allow a tenant request to go through since we don't know the tenant
|
||||
// mode. Such a transaction would not be allowed to commit without enabling provisional commits because either
|
||||
// the commit proxies will be provisional or the read version will be too old.
|
||||
if (!cx->clientInfo->get().grvProxies.empty() && !cx->clientInfo->get().grvProxies[0].provisional) {
|
||||
throw tenants_disabled();
|
||||
} else {
|
||||
ASSERT(!useProvisionalProxies);
|
||||
}
|
||||
}
|
||||
|
||||
ASSERT(tenantId != TenantInfo::INVALID_TENANT);
|
||||
return TenantInfo(t.get(), tenantId);
|
||||
}
|
||||
|
||||
// Returns the tenant used in this transaction. If the tenant is unset and raw access isn't specified, then the default
|
||||
// tenant from DatabaseContext is applied to this transaction (note: the default tenant is typically unset, but in
|
||||
// simulation could be something different).
|
||||
//
|
||||
// This function should not be called in the transaction constructor or in the setOption function to allow a user the
|
||||
// opportunity to set raw access.
|
||||
Optional<TenantName> const& TransactionState::tenant() {
|
||||
if (tenantSet) {
|
||||
return tenant_;
|
||||
|
@ -3255,6 +3268,9 @@ Optional<TenantName> const& TransactionState::tenant() {
|
|||
}
|
||||
}
|
||||
|
||||
// Returns true if the tenant has been set, but does not cause default tenant resolution. This is useful in setOption
|
||||
// (where we do not want to call tenant()) if we want to enforce that an option not be set on a Tenant transaction (e.g.
|
||||
// for raw access).
|
||||
bool TransactionState::hasTenant() const {
|
||||
return tenantSet && tenant_.present();
|
||||
}
|
||||
|
@ -6572,6 +6588,11 @@ void Transaction::setOption(FDBTransactionOptions::Option option, Optional<Strin
|
|||
|
||||
case FDBTransactionOptions::USE_PROVISIONAL_PROXIES:
|
||||
validateOptionValueNotPresent(value);
|
||||
if (trState->hasTenant()) {
|
||||
Error e = invalid_option();
|
||||
TraceEvent(SevWarn, "TenantTransactionUseProvisionalProxies").error(e).detail("Tenant", trState->tenant());
|
||||
throw e;
|
||||
}
|
||||
trState->options.getReadVersionFlags |= GetReadVersionRequest::FLAG_USE_PROVISIONAL_PROXIES;
|
||||
trState->useProvisionalProxies = UseProvisionalProxies::True;
|
||||
break;
|
||||
|
@ -9390,11 +9411,20 @@ Future<Void> DatabaseContext::getChangeFeedStream(Reference<ChangeFeedData> resu
|
|||
Reference<DatabaseContext>::addRef(this), results, rangeID, begin, end, range, replyBufferSize, canReadPopped);
|
||||
}
|
||||
|
||||
ACTOR Future<std::vector<OverlappingChangeFeedEntry>> singleLocationOverlappingChangeFeeds(
|
||||
Database cx,
|
||||
Reference<LocationInfo> location,
|
||||
KeyRangeRef range,
|
||||
Version minVersion) {
|
||||
Version OverlappingChangeFeedsInfo::getFeedMetadataVersion(const KeyRangeRef& range) const {
|
||||
Version v = invalidVersion;
|
||||
for (auto& it : feedMetadataVersions) {
|
||||
if (it.second > v && it.first.intersects(range)) {
|
||||
v = it.second;
|
||||
}
|
||||
}
|
||||
return v;
|
||||
}
|
||||
|
||||
ACTOR Future<OverlappingChangeFeedsReply> singleLocationOverlappingChangeFeeds(Database cx,
|
||||
Reference<LocationInfo> location,
|
||||
KeyRangeRef range,
|
||||
Version minVersion) {
|
||||
state OverlappingChangeFeedsRequest req;
|
||||
req.range = range;
|
||||
req.minVersion = minVersion;
|
||||
|
@ -9406,16 +9436,16 @@ ACTOR Future<std::vector<OverlappingChangeFeedEntry>> singleLocationOverlappingC
|
|||
TaskPriority::DefaultPromiseEndpoint,
|
||||
AtMostOnce::False,
|
||||
cx->enableLocalityLoadBalance ? &cx->queueModel : nullptr));
|
||||
return rep.rangeIds;
|
||||
return rep;
|
||||
}
|
||||
|
||||
bool compareChangeFeedResult(const OverlappingChangeFeedEntry& i, const OverlappingChangeFeedEntry& j) {
|
||||
return i.rangeId < j.rangeId;
|
||||
return i.feedId < j.feedId;
|
||||
}
|
||||
|
||||
ACTOR Future<std::vector<OverlappingChangeFeedEntry>> getOverlappingChangeFeedsActor(Reference<DatabaseContext> db,
|
||||
KeyRangeRef range,
|
||||
Version minVersion) {
|
||||
ACTOR Future<OverlappingChangeFeedsInfo> getOverlappingChangeFeedsActor(Reference<DatabaseContext> db,
|
||||
KeyRangeRef range,
|
||||
Version minVersion) {
|
||||
state Database cx(db);
|
||||
state Span span("NAPI:GetOverlappingChangeFeeds"_loc);
|
||||
|
||||
|
@ -9441,19 +9471,33 @@ ACTOR Future<std::vector<OverlappingChangeFeedEntry>> getOverlappingChangeFeedsA
|
|||
throw all_alternatives_failed();
|
||||
}
|
||||
|
||||
state std::vector<Future<std::vector<OverlappingChangeFeedEntry>>> allOverlappingRequests;
|
||||
state std::vector<Future<OverlappingChangeFeedsReply>> allOverlappingRequests;
|
||||
for (auto& it : locations) {
|
||||
allOverlappingRequests.push_back(
|
||||
singleLocationOverlappingChangeFeeds(cx, it.locations, it.range & range, minVersion));
|
||||
}
|
||||
wait(waitForAll(allOverlappingRequests));
|
||||
|
||||
std::vector<OverlappingChangeFeedEntry> result;
|
||||
for (auto& it : allOverlappingRequests) {
|
||||
result.insert(result.end(), it.get().begin(), it.get().end());
|
||||
OverlappingChangeFeedsInfo result;
|
||||
std::unordered_map<KeyRef, OverlappingChangeFeedEntry> latestFeedMetadata;
|
||||
for (int i = 0; i < locations.size(); i++) {
|
||||
result.arena.dependsOn(allOverlappingRequests[i].get().arena);
|
||||
result.arena.dependsOn(locations[i].range.arena());
|
||||
result.feedMetadataVersions.push_back(
|
||||
{ locations[i].range, allOverlappingRequests[i].get().feedMetadataVersion });
|
||||
for (auto& it : allOverlappingRequests[i].get().feeds) {
|
||||
auto res = latestFeedMetadata.insert({ it.feedId, it });
|
||||
if (!res.second) {
|
||||
CODE_PROBE(true, "deduping fetched overlapping feed by higher metadata version");
|
||||
if (res.first->second.feedMetadataVersion < it.feedMetadataVersion) {
|
||||
res.first->second = it;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
for (auto& it : latestFeedMetadata) {
|
||||
result.feeds.push_back(result.arena, it.second);
|
||||
}
|
||||
std::sort(result.begin(), result.end(), compareChangeFeedResult);
|
||||
result.resize(std::unique(result.begin(), result.end()) - result.begin());
|
||||
return result;
|
||||
} catch (Error& e) {
|
||||
if (e.code() == error_code_wrong_shard_server || e.code() == error_code_all_alternatives_failed) {
|
||||
|
@ -9466,8 +9510,7 @@ ACTOR Future<std::vector<OverlappingChangeFeedEntry>> getOverlappingChangeFeedsA
|
|||
}
|
||||
}
|
||||
|
||||
Future<std::vector<OverlappingChangeFeedEntry>> DatabaseContext::getOverlappingChangeFeeds(KeyRangeRef range,
|
||||
Version minVersion) {
|
||||
Future<OverlappingChangeFeedsInfo> DatabaseContext::getOverlappingChangeFeeds(KeyRangeRef range, Version minVersion) {
|
||||
return getOverlappingChangeFeedsActor(Reference<DatabaseContext>::addRef(this), range, minVersion);
|
||||
}
|
||||
|
||||
|
@ -9591,7 +9634,7 @@ ACTOR Future<Key> purgeBlobGranulesActor(Reference<DatabaseContext> db,
|
|||
state bool loadedTenantPrefix = false;
|
||||
|
||||
// FIXME: implement force
|
||||
if (!force) {
|
||||
if (force) {
|
||||
throw unsupported_operation();
|
||||
}
|
||||
|
||||
|
|
|
@ -20,7 +20,7 @@
|
|||
|
||||
#include "fdbclient/RESTClient.h"
|
||||
|
||||
#include "fdbclient/HTTP.h"
|
||||
#include "fdbrpc/HTTP.h"
|
||||
#include "flow/IRateControl.h"
|
||||
#include "fdbclient/RESTUtils.h"
|
||||
#include "flow/Arena.h"
|
||||
|
|
|
@ -20,8 +20,8 @@
|
|||
|
||||
#include "fdbclient/S3BlobStore.h"
|
||||
|
||||
#include "fdbclient/md5/md5.h"
|
||||
#include "fdbclient/libb64/encode.h"
|
||||
#include "md5/md5.h"
|
||||
#include "libb64/encode.h"
|
||||
#include "fdbclient/sha1/SHA1.h"
|
||||
#include <time.h>
|
||||
#include <iomanip>
|
||||
|
|
|
@ -701,8 +701,9 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
|
|||
init( FETCH_BLOCK_BYTES, 2e6 );
|
||||
init( FETCH_KEYS_PARALLELISM_BYTES, 4e6 ); if( randomize && BUGGIFY ) FETCH_KEYS_PARALLELISM_BYTES = 3e6;
|
||||
init( FETCH_KEYS_PARALLELISM, 2 );
|
||||
init( FETCH_KEYS_PARALLELISM_FULL, 10 );
|
||||
init( FETCH_KEYS_LOWER_PRIORITY, 0 );
|
||||
init( FETCH_CHANGEFEED_PARALLELISM, 2 );
|
||||
init( FETCH_CHANGEFEED_PARALLELISM, 4 );
|
||||
init( SERVE_FETCH_CHECKPOINT_PARALLELISM, 4 );
|
||||
init( BUGGIFY_BLOCK_BYTES, 10000 );
|
||||
init( STORAGE_RECOVERY_VERSION_LAG_LIMIT, 2 * MAX_READ_TRANSACTION_LIFE_VERSIONS );
|
||||
|
@ -907,11 +908,13 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
|
|||
// BlobGranuleVerify* simulation tests use "blobRangeKeys", BlobGranuleCorrectness* use "tenant", default in real clusters is "tenant"
|
||||
init( BG_RANGE_SOURCE, "tenant" );
|
||||
// BlobGranuleVerify* simulation tests use "knobs", BlobGranuleCorrectness* use "tenant", default in real clusters is "knobs"
|
||||
bool buggifyMediumGranules = simulationMediumShards || (randomize && BUGGIFY);
|
||||
init( BG_METADATA_SOURCE, "knobs" );
|
||||
init( BG_SNAPSHOT_FILE_TARGET_BYTES, 10000000 ); if( buggifySmallShards ) BG_SNAPSHOT_FILE_TARGET_BYTES = 100000; else if (simulationMediumShards || (randomize && BUGGIFY) ) BG_SNAPSHOT_FILE_TARGET_BYTES = 1000000;
|
||||
init( BG_SNAPSHOT_FILE_TARGET_CHUNKS, 100 ); if ( randomize && BUGGIFY ) BG_SNAPSHOT_FILE_TARGET_CHUNKS = 1 << deterministicRandom()->randomInt(0, 8);
|
||||
init( BG_SNAPSHOT_FILE_TARGET_BYTES, 10000000 ); if( buggifySmallShards ) BG_SNAPSHOT_FILE_TARGET_BYTES = 100000; else if (buggifyMediumGranules) BG_SNAPSHOT_FILE_TARGET_BYTES = 1000000;
|
||||
init( BG_SNAPSHOT_FILE_TARGET_CHUNK_BYTES, 64*1024 ); if ( randomize && BUGGIFY ) BG_SNAPSHOT_FILE_TARGET_CHUNK_BYTES = BG_SNAPSHOT_FILE_TARGET_BYTES / (1 << deterministicRandom()->randomInt(0, 8));
|
||||
init( BG_DELTA_BYTES_BEFORE_COMPACT, BG_SNAPSHOT_FILE_TARGET_BYTES/2 );
|
||||
init( BG_DELTA_FILE_TARGET_BYTES, BG_DELTA_BYTES_BEFORE_COMPACT/10 );
|
||||
init( BG_DELTA_FILE_TARGET_CHUNK_BYTES, 64*1024 ); if ( randomize && BUGGIFY ) BG_DELTA_FILE_TARGET_CHUNK_BYTES = BG_DELTA_FILE_TARGET_BYTES / (1 << deterministicRandom()->randomInt(0, 7));
|
||||
init( BG_MAX_SPLIT_FANOUT, 10 ); if( randomize && BUGGIFY ) BG_MAX_SPLIT_FANOUT = deterministicRandom()->randomInt(5, 15);
|
||||
init( BG_MAX_MERGE_FANIN, 10 ); if( randomize && BUGGIFY ) BG_MAX_MERGE_FANIN = deterministicRandom()->randomInt(2, 15);
|
||||
init( BG_HOT_SNAPSHOT_VERSIONS, 5000000 );
|
||||
|
|
|
@ -21,7 +21,7 @@
|
|||
#include "fdbclient/NativeAPI.actor.h"
|
||||
#include "fdbclient/SystemData.h"
|
||||
#include "fdbclient/Tenant.h"
|
||||
#include "fdbclient/libb64/encode.h"
|
||||
#include "libb64/encode.h"
|
||||
#include "flow/UnitTest.h"
|
||||
|
||||
Key TenantMapEntry::idToPrefix(int64_t id) {
|
||||
|
@ -118,6 +118,17 @@ std::string TenantMapEntry::toJson(int apiVersion) const {
|
|||
tenantEntry["tenant_group"] = tenantGroupObject;
|
||||
}
|
||||
|
||||
if (tenantGroup.present()) {
|
||||
json_spirit::mObject tenantGroupObject;
|
||||
std::string encodedTenantGroup = base64::encoder::from_string(tenantGroup.get().toString());
|
||||
// Remove trailing newline
|
||||
encodedTenantGroup.resize(encodedTenantGroup.size() - 1);
|
||||
|
||||
tenantGroupObject["base64"] = encodedTenantGroup;
|
||||
tenantGroupObject["printable"] = printable(tenantGroup.get());
|
||||
tenantEntry["tenant_group"] = tenantGroupObject;
|
||||
}
|
||||
|
||||
return json_spirit::write_string(json_spirit::mValue(tenantEntry));
|
||||
}
|
||||
|
||||
|
|
|
@ -36,8 +36,8 @@
|
|||
#include "flow/Net2Packet.h"
|
||||
#include "flow/IRateControl.h"
|
||||
#include "fdbclient/S3BlobStore.h"
|
||||
#include "fdbclient/md5/md5.h"
|
||||
#include "fdbclient/libb64/encode.h"
|
||||
#include "md5/md5.h"
|
||||
#include "libb64/encode.h"
|
||||
#include "flow/actorcompiler.h" // This must be the last #include.
|
||||
|
||||
ACTOR template <typename T>
|
||||
|
|
|
@ -46,6 +46,7 @@ struct GranuleSnapshot : VectorRef<KeyValueRef> {
|
|||
}
|
||||
};
|
||||
|
||||
// Deltas in version order
|
||||
struct GranuleDeltas : VectorRef<MutationsAndVersionRef> {
|
||||
constexpr static FileIdentifier file_identifier = 8563013;
|
||||
|
||||
|
|
|
@ -27,11 +27,15 @@
|
|||
#include "flow/CompressionUtils.h"
|
||||
|
||||
Value serializeChunkedSnapshot(Standalone<GranuleSnapshot> snapshot,
|
||||
int chunks,
|
||||
int chunkSize,
|
||||
Optional<CompressionFilter> compressFilter,
|
||||
Optional<BlobGranuleCipherKeysCtx> cipherKeysCtx = Optional<BlobGranuleCipherKeysCtx>());
|
||||
Optional<BlobGranuleCipherKeysCtx> cipherKeysCtx = {});
|
||||
|
||||
// FIXME: support sorted and chunked delta files
|
||||
Value serializeChunkedDeltaFile(Standalone<GranuleDeltas> deltas,
|
||||
const KeyRangeRef& fileRange,
|
||||
int chunkSize,
|
||||
Optional<CompressionFilter> compressFilter,
|
||||
Optional<BlobGranuleCipherKeysCtx> cipherKeysCtx = {});
|
||||
|
||||
ErrorOr<RangeResult> loadAndMaterializeBlobGranules(const Standalone<VectorRef<BlobGranuleChunkRef>>& files,
|
||||
const KeyRangeRef& keyRange,
|
||||
|
|
|
@ -220,12 +220,7 @@ public:
|
|||
int64_t CSI_SIZE_LIMIT;
|
||||
double CSI_STATUS_DELAY;
|
||||
|
||||
int HTTP_SEND_SIZE;
|
||||
int HTTP_READ_SIZE;
|
||||
int HTTP_VERBOSE_LEVEL;
|
||||
std::string HTTP_REQUEST_ID_HEADER;
|
||||
bool HTTP_REQUEST_AWS_V4_HEADER; // setting this knob to true will enable AWS V4 style header.
|
||||
bool HTTP_RESPONSE_SKIP_VERIFY_CHECKSUM_FOR_PARTIAL_CONTENT; // skip verify md5 checksum for 206 response
|
||||
std::string BLOBSTORE_ENCRYPTION_TYPE;
|
||||
int BLOBSTORE_CONNECT_TRIES;
|
||||
int BLOBSTORE_CONNECT_TIMEOUT;
|
||||
|
|
|
@ -207,6 +207,16 @@ struct KeyRangeLocationInfo {
|
|||
: tenantEntry(tenantEntry), range(range), locations(locations) {}
|
||||
};
|
||||
|
||||
struct OverlappingChangeFeedsInfo {
|
||||
Arena arena;
|
||||
VectorRef<OverlappingChangeFeedEntry> feeds;
|
||||
// would prefer to use key range map but it complicates copy/move constructors
|
||||
std::vector<std::pair<KeyRangeRef, Version>> feedMetadataVersions;
|
||||
|
||||
// for a feed that wasn't present, returns the metadata version it would have been fetched at.
|
||||
Version getFeedMetadataVersion(const KeyRangeRef& feedRange) const;
|
||||
};
|
||||
|
||||
class DatabaseContext : public ReferenceCounted<DatabaseContext>, public FastAllocated<DatabaseContext>, NonCopyable {
|
||||
public:
|
||||
static DatabaseContext* allocateOnForeignThread() {
|
||||
|
@ -361,7 +371,7 @@ public:
|
|||
int replyBufferSize = -1,
|
||||
bool canReadPopped = true);
|
||||
|
||||
Future<std::vector<OverlappingChangeFeedEntry>> getOverlappingChangeFeeds(KeyRangeRef ranges, Version minVersion);
|
||||
Future<OverlappingChangeFeedsInfo> getOverlappingChangeFeeds(KeyRangeRef ranges, Version minVersion);
|
||||
Future<Void> popChangeFeedMutations(Key rangeID, Version version);
|
||||
|
||||
Future<Key> purgeBlobGranules(KeyRange keyRange,
|
||||
|
|
|
@ -25,7 +25,7 @@
|
|||
#pragma once
|
||||
|
||||
#include "fdbclient/JSONDoc.h"
|
||||
#include "fdbclient/HTTP.h"
|
||||
#include "fdbrpc/HTTP.h"
|
||||
#include "fdbclient/RESTUtils.h"
|
||||
#include "flow/Arena.h"
|
||||
#include "flow/FastRef.h"
|
||||
|
|
|
@ -26,7 +26,7 @@
|
|||
#include "flow/Net2Packet.h"
|
||||
#include "fdbclient/Knobs.h"
|
||||
#include "flow/IRateControl.h"
|
||||
#include "fdbclient/HTTP.h"
|
||||
#include "fdbrpc/HTTP.h"
|
||||
#include "fdbclient/JSONDoc.h"
|
||||
|
||||
// Representation of all the things you need to connect to a blob store instance with some credentials.
|
||||
|
|
|
@ -659,6 +659,7 @@ public:
|
|||
int FETCH_BLOCK_BYTES;
|
||||
int FETCH_KEYS_PARALLELISM_BYTES;
|
||||
int FETCH_KEYS_PARALLELISM;
|
||||
int FETCH_KEYS_PARALLELISM_FULL;
|
||||
int FETCH_KEYS_LOWER_PRIORITY;
|
||||
int FETCH_CHANGEFEED_PARALLELISM;
|
||||
int SERVE_FETCH_CHECKPOINT_PARALLELISM;
|
||||
|
@ -887,8 +888,9 @@ public:
|
|||
std::string BG_METADATA_SOURCE;
|
||||
|
||||
int BG_SNAPSHOT_FILE_TARGET_BYTES;
|
||||
int BG_SNAPSHOT_FILE_TARGET_CHUNKS;
|
||||
int BG_SNAPSHOT_FILE_TARGET_CHUNK_BYTES;
|
||||
int BG_DELTA_FILE_TARGET_BYTES;
|
||||
int BG_DELTA_FILE_TARGET_CHUNK_BYTES;
|
||||
int BG_DELTA_BYTES_BEFORE_COMPACT;
|
||||
int BG_MAX_SPLIT_FANOUT;
|
||||
int BG_MAX_MERGE_FANIN;
|
||||
|
|
|
@ -970,39 +970,51 @@ struct FetchCheckpointKeyValuesRequest {
|
|||
};
|
||||
|
||||
struct OverlappingChangeFeedEntry {
|
||||
Key rangeId;
|
||||
KeyRange range;
|
||||
KeyRef feedId;
|
||||
KeyRangeRef range;
|
||||
Version emptyVersion;
|
||||
Version stopVersion;
|
||||
Version feedMetadataVersion;
|
||||
|
||||
bool operator==(const OverlappingChangeFeedEntry& r) const {
|
||||
return rangeId == r.rangeId && range == r.range && emptyVersion == r.emptyVersion &&
|
||||
stopVersion == r.stopVersion;
|
||||
return feedId == r.feedId && range == r.range && emptyVersion == r.emptyVersion &&
|
||||
stopVersion == r.stopVersion && feedMetadataVersion == r.feedMetadataVersion;
|
||||
}
|
||||
|
||||
OverlappingChangeFeedEntry() {}
|
||||
OverlappingChangeFeedEntry(Key const& rangeId, KeyRange const& range, Version emptyVersion, Version stopVersion)
|
||||
: rangeId(rangeId), range(range), emptyVersion(emptyVersion), stopVersion(stopVersion) {}
|
||||
OverlappingChangeFeedEntry(KeyRef const& feedId,
|
||||
KeyRangeRef const& range,
|
||||
Version emptyVersion,
|
||||
Version stopVersion,
|
||||
Version feedMetadataVersion)
|
||||
: feedId(feedId), range(range), emptyVersion(emptyVersion), stopVersion(stopVersion),
|
||||
feedMetadataVersion(feedMetadataVersion) {}
|
||||
|
||||
OverlappingChangeFeedEntry(Arena& arena, const OverlappingChangeFeedEntry& rhs)
|
||||
: feedId(arena, rhs.feedId), range(arena, rhs.range), emptyVersion(rhs.emptyVersion),
|
||||
stopVersion(rhs.stopVersion), feedMetadataVersion(rhs.feedMetadataVersion) {}
|
||||
|
||||
template <class Ar>
|
||||
void serialize(Ar& ar) {
|
||||
serializer(ar, rangeId, range, emptyVersion, stopVersion);
|
||||
serializer(ar, feedId, range, emptyVersion, stopVersion, feedMetadataVersion);
|
||||
}
|
||||
};
|
||||
|
||||
struct OverlappingChangeFeedsReply {
|
||||
constexpr static FileIdentifier file_identifier = 11815134;
|
||||
std::vector<OverlappingChangeFeedEntry> rangeIds;
|
||||
VectorRef<OverlappingChangeFeedEntry> feeds;
|
||||
bool cached;
|
||||
Arena arena;
|
||||
Version feedMetadataVersion;
|
||||
|
||||
OverlappingChangeFeedsReply() : cached(false) {}
|
||||
explicit OverlappingChangeFeedsReply(std::vector<OverlappingChangeFeedEntry> const& rangeIds)
|
||||
: rangeIds(rangeIds), cached(false) {}
|
||||
OverlappingChangeFeedsReply() : cached(false), feedMetadataVersion(invalidVersion) {}
|
||||
explicit OverlappingChangeFeedsReply(VectorRef<OverlappingChangeFeedEntry> const& feeds,
|
||||
Version feedMetadataVersion)
|
||||
: feeds(feeds), cached(false), feedMetadataVersion(feedMetadataVersion) {}
|
||||
|
||||
template <class Ar>
|
||||
void serialize(Ar& ar) {
|
||||
serializer(ar, rangeIds, arena);
|
||||
serializer(ar, feeds, arena, feedMetadataVersion);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -80,13 +80,9 @@ struct TenantMapEntry {
|
|||
bool matchesConfiguration(TenantMapEntry const& other) const;
|
||||
void configure(Standalone<StringRef> parameter, Optional<Value> value);
|
||||
|
||||
Value encode() const { return ObjectWriter::toValue(*this, IncludeVersion(ProtocolVersion::withTenants())); }
|
||||
|
||||
Value encode() const { return ObjectWriter::toValue(*this, IncludeVersion()); }
|
||||
static TenantMapEntry decode(ValueRef const& value) {
|
||||
TenantMapEntry entry;
|
||||
ObjectReader reader(value.begin(), IncludeVersion());
|
||||
reader.deserialize(entry);
|
||||
return entry;
|
||||
return ObjectReader::fromStringRef<TenantMapEntry>(value, IncludeVersion());
|
||||
}
|
||||
|
||||
template <class Ar>
|
||||
|
@ -109,7 +105,7 @@ struct TenantGroupEntry {
|
|||
TenantGroupEntry() = default;
|
||||
TenantGroupEntry(Optional<ClusterName> assignedCluster) : assignedCluster(assignedCluster) {}
|
||||
|
||||
Value encode() { return ObjectWriter::toValue(*this, IncludeVersion(ProtocolVersion::withTenants())); }
|
||||
Value encode() { return ObjectWriter::toValue(*this, IncludeVersion()); }
|
||||
static TenantGroupEntry decode(ValueRef const& value) {
|
||||
TenantGroupEntry entry;
|
||||
ObjectReader reader(value.begin(), IncludeVersion());
|
||||
|
@ -153,14 +149,12 @@ struct TenantMetadataSpecification {
|
|||
KeyBackedObjectMap<TenantGroupName, TenantGroupEntry, decltype(IncludeVersion()), NullCodec> tenantGroupMap;
|
||||
|
||||
TenantMetadataSpecification(KeyRef subspace)
|
||||
: tenantMap(subspace.withSuffix("tenant/map/"_sr), IncludeVersion(ProtocolVersion::withTenants())),
|
||||
: tenantMap(subspace.withSuffix("tenant/map/"_sr), IncludeVersion()),
|
||||
lastTenantId(subspace.withSuffix("tenant/lastId"_sr)),
|
||||
tenantTombstones(subspace.withSuffix("tenant/tombstones/"_sr)),
|
||||
tombstoneCleanupData(subspace.withSuffix("tenant/tombstoneCleanup"_sr),
|
||||
IncludeVersion(ProtocolVersion::withTenants())),
|
||||
tombstoneCleanupData(subspace.withSuffix("tenant/tombstoneCleanup"_sr), IncludeVersion()),
|
||||
tenantGroupTenantIndex(subspace.withSuffix("tenant/tenantGroup/tenantIndex/"_sr)),
|
||||
tenantGroupMap(subspace.withSuffix("tenant/tenantGroup/map/"_sr),
|
||||
IncludeVersion(ProtocolVersion::withTenants())) {}
|
||||
tenantGroupMap(subspace.withSuffix("tenant/tenantGroup/map/"_sr), IncludeVersion()) {}
|
||||
};
|
||||
|
||||
struct TenantMetadata {
|
||||
|
|
|
@ -61,10 +61,10 @@ if(${COROUTINE_IMPL} STREQUAL libcoro)
|
|||
endif()
|
||||
|
||||
target_include_directories(fdbrpc PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include" "${CMAKE_CURRENT_BINARY_DIR}/include" PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/libeio)
|
||||
target_link_libraries(fdbrpc PUBLIC flow PRIVATE rapidjson)
|
||||
target_link_libraries(fdbrpc PUBLIC flow libb64 md5 PRIVATE rapidjson)
|
||||
|
||||
target_include_directories(fdbrpc_sampling PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include" "${CMAKE_CURRENT_BINARY_DIR}/include" PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/libeio)
|
||||
target_link_libraries(fdbrpc_sampling PUBLIC flow_sampling PRIVATE rapidjson)
|
||||
target_link_libraries(fdbrpc_sampling PUBLIC flow_sampling libb64 md5 PRIVATE rapidjson)
|
||||
|
||||
if(${COROUTINE_IMPL} STREQUAL libcoro)
|
||||
target_link_libraries(fdbrpc PUBLIC coro)
|
||||
|
|
|
@ -18,12 +18,11 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "fdbclient/HTTP.h"
|
||||
#include "fdbrpc/HTTP.h"
|
||||
|
||||
#include "fdbclient/md5/md5.h"
|
||||
#include "fdbclient/ClientKnobs.h"
|
||||
#include "fdbclient/libb64/encode.h"
|
||||
#include "fdbclient/Knobs.h"
|
||||
#include "md5/md5.h"
|
||||
#include "libb64/encode.h"
|
||||
#include "flow/Knobs.h"
|
||||
#include <cctype>
|
||||
|
||||
#include "flow/actorcompiler.h" // has to be last include
|
||||
|
@ -153,7 +152,7 @@ ACTOR Future<size_t> read_delimited_into_string(Reference<IConnection> conn,
|
|||
// Next search will start at the current end of the buffer - delim size + 1
|
||||
if (sPos >= lookBack)
|
||||
sPos -= lookBack;
|
||||
wait(success(read_into_string(conn, buf, CLIENT_KNOBS->HTTP_READ_SIZE)));
|
||||
wait(success(read_into_string(conn, buf, FLOW_KNOBS->HTTP_READ_SIZE)));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -161,7 +160,7 @@ ACTOR Future<size_t> read_delimited_into_string(Reference<IConnection> conn,
|
|||
ACTOR Future<Void> read_fixed_into_string(Reference<IConnection> conn, int len, std::string* buf, size_t pos) {
|
||||
state int stop_size = pos + len;
|
||||
while (buf->size() < stop_size)
|
||||
wait(success(read_into_string(conn, buf, CLIENT_KNOBS->HTTP_READ_SIZE)));
|
||||
wait(success(read_into_string(conn, buf, FLOW_KNOBS->HTTP_READ_SIZE)));
|
||||
return Void();
|
||||
}
|
||||
|
||||
|
@ -329,7 +328,7 @@ ACTOR Future<Void> read_http_response(Reference<HTTP::Response> r, Reference<ICo
|
|||
|
||||
// If there is actual response content, check the MD5 sum against the Content-MD5 response header
|
||||
if (r->content.size() > 0) {
|
||||
if (r->code == 206 && CLIENT_KNOBS->HTTP_RESPONSE_SKIP_VERIFY_CHECKSUM_FOR_PARTIAL_CONTENT) {
|
||||
if (r->code == 206 && FLOW_KNOBS->HTTP_RESPONSE_SKIP_VERIFY_CHECKSUM_FOR_PARTIAL_CONTENT) {
|
||||
return Void();
|
||||
}
|
||||
|
||||
|
@ -368,7 +367,7 @@ ACTOR Future<Reference<HTTP::Response>> doRequest(Reference<IConnection> conn,
|
|||
// There is no standard http request id header field, so either a global default can be set via a knob
|
||||
// or it can be set per-request with the requestIDHeader argument (which overrides the default)
|
||||
if (requestIDHeader.empty()) {
|
||||
requestIDHeader = CLIENT_KNOBS->HTTP_REQUEST_ID_HEADER;
|
||||
requestIDHeader = FLOW_KNOBS->HTTP_REQUEST_ID_HEADER;
|
||||
}
|
||||
|
||||
state bool earlyResponse = false;
|
||||
|
@ -400,13 +399,13 @@ ACTOR Future<Reference<HTTP::Response>> doRequest(Reference<IConnection> conn,
|
|||
// Prepend headers to content packer buffer chain
|
||||
pContent->prependWriteBuffer(pFirst, pLast);
|
||||
|
||||
if (CLIENT_KNOBS->HTTP_VERBOSE_LEVEL > 1)
|
||||
if (FLOW_KNOBS->HTTP_VERBOSE_LEVEL > 1)
|
||||
printf("[%s] HTTP starting %s %s ContentLen:%d\n",
|
||||
conn->getDebugID().toString().c_str(),
|
||||
verb.c_str(),
|
||||
resource.c_str(),
|
||||
contentLen);
|
||||
if (CLIENT_KNOBS->HTTP_VERBOSE_LEVEL > 2) {
|
||||
if (FLOW_KNOBS->HTTP_VERBOSE_LEVEL > 2) {
|
||||
for (auto h : headers)
|
||||
printf("Request Header: %s: %s\n", h.first.c_str(), h.second.c_str());
|
||||
}
|
||||
|
@ -427,7 +426,7 @@ ACTOR Future<Reference<HTTP::Response>> doRequest(Reference<IConnection> conn,
|
|||
break;
|
||||
}
|
||||
|
||||
state int trySend = CLIENT_KNOBS->HTTP_SEND_SIZE;
|
||||
state int trySend = FLOW_KNOBS->HTTP_SEND_SIZE;
|
||||
wait(sendRate->getAllowance(trySend));
|
||||
int len = conn->write(pContent->getUnsent(), trySend);
|
||||
if (pSent != nullptr)
|
||||
|
@ -481,7 +480,7 @@ ACTOR Future<Reference<HTTP::Response>> doRequest(Reference<IConnection> conn,
|
|||
}
|
||||
}
|
||||
|
||||
if (CLIENT_KNOBS->HTTP_VERBOSE_LEVEL > 0) {
|
||||
if (FLOW_KNOBS->HTTP_VERBOSE_LEVEL > 0) {
|
||||
printf("[%s] HTTP %scode=%d early=%d, time=%fs %s %s contentLen=%d [%d out, response content len %d]\n",
|
||||
conn->getDebugID().toString().c_str(),
|
||||
(err.present() ? format("*ERROR*=%s ", err.get().name()).c_str() : ""),
|
||||
|
@ -494,7 +493,7 @@ ACTOR Future<Reference<HTTP::Response>> doRequest(Reference<IConnection> conn,
|
|||
total_sent,
|
||||
(int)r->contentLen);
|
||||
}
|
||||
if (CLIENT_KNOBS->HTTP_VERBOSE_LEVEL > 2) {
|
||||
if (FLOW_KNOBS->HTTP_VERBOSE_LEVEL > 2) {
|
||||
printf("[%s] HTTP RESPONSE: %s %s\n%s\n",
|
||||
conn->getDebugID().toString().c_str(),
|
||||
verb.c_str(),
|
||||
|
@ -510,7 +509,7 @@ ACTOR Future<Reference<HTTP::Response>> doRequest(Reference<IConnection> conn,
|
|||
} catch (Error& e) {
|
||||
double elapsed = timer() - send_start;
|
||||
// A bad_request_id error would have already been logged in verbose mode before err is thrown above.
|
||||
if (CLIENT_KNOBS->HTTP_VERBOSE_LEVEL > 0 && e.code() != error_code_http_bad_request_id) {
|
||||
if (FLOW_KNOBS->HTTP_VERBOSE_LEVEL > 0 && e.code() != error_code_http_bad_request_id) {
|
||||
printf("[%s] HTTP *ERROR*=%s early=%d, time=%fs %s %s contentLen=%d [%d out]\n",
|
||||
conn->getDebugID().toString().c_str(),
|
||||
e.name(),
|
|
@ -48,6 +48,7 @@ public:
|
|||
int write(SendBuffer const* buffer, int limit) override;
|
||||
NetworkAddress getPeerAddress() const override;
|
||||
UID getDebugID() const override;
|
||||
boost::asio::ip::tcp::socket& getSocket() override { return socket; }
|
||||
static Future<std::vector<NetworkAddress>> resolveTCPEndpoint(const std::string& host,
|
||||
const std::string& service,
|
||||
DNSCache* dnsCache);
|
||||
|
|
|
@ -23,6 +23,7 @@
|
|||
#include "flow/ProtocolVersion.h"
|
||||
#include <algorithm>
|
||||
#include <string>
|
||||
#include <limits>
|
||||
#pragma once
|
||||
|
||||
#include "flow/flow.h"
|
||||
|
@ -488,6 +489,8 @@ public:
|
|||
bool setDiffProtocol; // true if a process with a different protocol version has been started
|
||||
|
||||
bool allowStorageMigrationTypeChange = false;
|
||||
double injectTargetedSSRestartTime = std::numeric_limits<double>::max();
|
||||
double injectSSDelayTime = std::numeric_limits<double>::max();
|
||||
|
||||
flowGlobalType global(int id) const final { return getCurrentProcess()->global(id); };
|
||||
void setGlobal(size_t id, flowGlobalType v) final { getCurrentProcess()->setGlobal(id, v); };
|
||||
|
|
|
@ -324,6 +324,8 @@ struct Sim2Conn final : IConnection, ReferenceCounted<Sim2Conn> {
|
|||
NetworkAddress getPeerAddress() const override { return peerEndpoint; }
|
||||
UID getDebugID() const override { return dbgid; }
|
||||
|
||||
boost::asio::ip::tcp::socket& getSocket() override { throw operation_failed(); }
|
||||
|
||||
bool opened, closedByCaller, stableConnection;
|
||||
|
||||
private:
|
||||
|
@ -948,8 +950,9 @@ public:
|
|||
TaskPriority getCurrentTask() const override { return currentTaskID; }
|
||||
void setCurrentTask(TaskPriority taskID) override { currentTaskID = taskID; }
|
||||
// Sets the taskID/priority of the current task, without yielding
|
||||
Future<Reference<IConnection>> connect(NetworkAddress toAddr, const std::string& host) override {
|
||||
ASSERT(host.empty());
|
||||
Future<Reference<IConnection>> connect(NetworkAddress toAddr,
|
||||
boost::asio::ip::tcp::socket* existingSocket = nullptr) override {
|
||||
ASSERT(existingSocket == nullptr);
|
||||
if (!addressMap.count(toAddr)) {
|
||||
return waitForProcessAndConnect(toAddr, this);
|
||||
}
|
||||
|
@ -975,7 +978,7 @@ public:
|
|||
return onConnect(::delay(0.5 * deterministicRandom()->random01()), myc);
|
||||
}
|
||||
|
||||
Future<Reference<IConnection>> connectExternal(NetworkAddress toAddr, const std::string& host) override {
|
||||
Future<Reference<IConnection>> connectExternal(NetworkAddress toAddr) override {
|
||||
return SimExternalConnection::connect(toAddr);
|
||||
}
|
||||
|
||||
|
|
|
@ -143,30 +143,34 @@ bool compareFDBAndBlob(RangeResult fdb,
|
|||
}
|
||||
}
|
||||
|
||||
printf("Chunks:\n");
|
||||
for (auto& chunk : blob.second) {
|
||||
printf("[%s - %s)\n", chunk.keyRange.begin.printable().c_str(), chunk.keyRange.end.printable().c_str());
|
||||
|
||||
printf(" SnapshotFile:\n %s\n",
|
||||
chunk.snapshotFile.present() ? chunk.snapshotFile.get().toString().c_str() : "<none>");
|
||||
printf(" DeltaFiles:\n");
|
||||
for (auto& df : chunk.deltaFiles) {
|
||||
printf(" %s\n", df.toString().c_str());
|
||||
}
|
||||
printf(" Deltas: (%d)", chunk.newDeltas.size());
|
||||
if (chunk.newDeltas.size() > 0) {
|
||||
fmt::print(" with version [{0} - {1}]",
|
||||
chunk.newDeltas[0].version,
|
||||
chunk.newDeltas[chunk.newDeltas.size() - 1].version);
|
||||
}
|
||||
fmt::print(" IncludedVersion: {}\n", chunk.includedVersion);
|
||||
}
|
||||
printf("\n");
|
||||
printGranuleChunks(blob.second);
|
||||
}
|
||||
}
|
||||
return correct;
|
||||
}
|
||||
|
||||
void printGranuleChunks(const Standalone<VectorRef<BlobGranuleChunkRef>>& chunks) {
|
||||
printf("Chunks:\n");
|
||||
for (auto& chunk : chunks) {
|
||||
printf("[%s - %s)\n", chunk.keyRange.begin.printable().c_str(), chunk.keyRange.end.printable().c_str());
|
||||
|
||||
printf(" SnapshotFile:\n %s\n",
|
||||
chunk.snapshotFile.present() ? chunk.snapshotFile.get().toString().c_str() : "<none>");
|
||||
printf(" DeltaFiles:\n");
|
||||
for (auto& df : chunk.deltaFiles) {
|
||||
printf(" %s\n", df.toString().c_str());
|
||||
}
|
||||
printf(" Deltas: (%d)", chunk.newDeltas.size());
|
||||
if (chunk.newDeltas.size() > 0) {
|
||||
fmt::print(" with version [{0} - {1}]",
|
||||
chunk.newDeltas[0].version,
|
||||
chunk.newDeltas[chunk.newDeltas.size() - 1].version);
|
||||
}
|
||||
fmt::print(" IncludedVersion: {}\n", chunk.includedVersion);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
ACTOR Future<Void> clearAndAwaitMerge(Database cx, KeyRange range) {
|
||||
// clear key range and check whether it is merged or not, repeatedly
|
||||
state Transaction tr(cx);
|
||||
|
|
|
@ -52,6 +52,7 @@
|
|||
*/
|
||||
|
||||
#define BM_DEBUG false
|
||||
#define BM_PURGE_DEBUG false
|
||||
|
||||
void handleClientBlobRange(KeyRangeMap<bool>* knownBlobRanges,
|
||||
Arena& ar,
|
||||
|
@ -1652,7 +1653,9 @@ ACTOR Future<Void> persistMergeGranulesDone(Reference<BlobManagerData> bmData,
|
|||
state Key lockKey = blobGranuleLockKeyFor(parentRange);
|
||||
state Future<Optional<Value>> oldLockFuture = tr->get(lockKey);
|
||||
|
||||
wait(updateChangeFeed(tr,
|
||||
// This has to be
|
||||
// TODO: fix this better! (privatize change feed key clear)
|
||||
wait(updateChangeFeed(&tr->getTransaction(),
|
||||
granuleIDToCFKey(parentGranuleIDs[parentIdx]),
|
||||
ChangeFeedStatus::CHANGE_FEED_DESTROY,
|
||||
parentRange));
|
||||
|
@ -3171,8 +3174,8 @@ ACTOR Future<Void> fullyDeleteGranule(Reference<BlobManagerData> self,
|
|||
Key historyKey,
|
||||
Version purgeVersion,
|
||||
KeyRange granuleRange) {
|
||||
if (BM_DEBUG) {
|
||||
fmt::print("Fully deleting granule {0}: init\n", granuleId.toString());
|
||||
if (BM_PURGE_DEBUG) {
|
||||
fmt::print("BM {0} Fully deleting granule {1}: init\n", self->epoch, granuleId.toString());
|
||||
}
|
||||
|
||||
// if granule is still splitting and files are needed for new sub-granules to re-snapshot, we can only partially
|
||||
|
@ -3198,8 +3201,11 @@ ACTOR Future<Void> fullyDeleteGranule(Reference<BlobManagerData> self,
|
|||
filesToDelete.emplace_back(fname);
|
||||
}
|
||||
|
||||
if (BM_DEBUG) {
|
||||
fmt::print("Fully deleting granule {0}: deleting {1} files\n", granuleId.toString(), filesToDelete.size());
|
||||
if (BM_PURGE_DEBUG) {
|
||||
fmt::print("BM {0} Fully deleting granule {1}: deleting {2} files\n",
|
||||
self->epoch,
|
||||
granuleId.toString(),
|
||||
filesToDelete.size());
|
||||
for (auto filename : filesToDelete) {
|
||||
fmt::print(" - {}\n", filename.c_str());
|
||||
}
|
||||
|
@ -3212,8 +3218,9 @@ ACTOR Future<Void> fullyDeleteGranule(Reference<BlobManagerData> self,
|
|||
wait(waitForAll(deletions));
|
||||
|
||||
// delete metadata in FDB (history entry and file keys)
|
||||
if (BM_DEBUG) {
|
||||
fmt::print("Fully deleting granule {0}: deleting history and file keys\n", granuleId.toString());
|
||||
if (BM_PURGE_DEBUG) {
|
||||
fmt::print(
|
||||
"BM {0} Fully deleting granule {1}: deleting history and file keys\n", self->epoch, granuleId.toString());
|
||||
}
|
||||
|
||||
state Transaction tr(self->db);
|
||||
|
@ -3232,8 +3239,8 @@ ACTOR Future<Void> fullyDeleteGranule(Reference<BlobManagerData> self,
|
|||
}
|
||||
}
|
||||
|
||||
if (BM_DEBUG) {
|
||||
fmt::print("Fully deleting granule {0}: success\n", granuleId.toString());
|
||||
if (BM_PURGE_DEBUG) {
|
||||
fmt::print("BM {0} Fully deleting granule {1}: success\n", self->epoch, granuleId.toString());
|
||||
}
|
||||
|
||||
TraceEvent("GranuleFullPurge", self->id)
|
||||
|
@ -3245,6 +3252,8 @@ ACTOR Future<Void> fullyDeleteGranule(Reference<BlobManagerData> self,
|
|||
++self->stats.granulesFullyPurged;
|
||||
self->stats.filesPurged += filesToDelete.size();
|
||||
|
||||
CODE_PROBE(true, "full granule purged");
|
||||
|
||||
return Void();
|
||||
}
|
||||
|
||||
|
@ -3260,8 +3269,8 @@ ACTOR Future<Void> partiallyDeleteGranule(Reference<BlobManagerData> self,
|
|||
UID granuleId,
|
||||
Version purgeVersion,
|
||||
KeyRange granuleRange) {
|
||||
if (BM_DEBUG) {
|
||||
fmt::print("Partially deleting granule {0}: init\n", granuleId.toString());
|
||||
if (BM_PURGE_DEBUG) {
|
||||
fmt::print("BM {0} Partially deleting granule {1}: init\n", self->epoch, granuleId.toString());
|
||||
}
|
||||
|
||||
state Reference<BlobConnectionProvider> bstore = wait(getBStoreForGranule(self, granuleRange));
|
||||
|
@ -3310,8 +3319,11 @@ ACTOR Future<Void> partiallyDeleteGranule(Reference<BlobManagerData> self,
|
|||
filesToDelete.emplace_back(fname);
|
||||
}
|
||||
|
||||
if (BM_DEBUG) {
|
||||
fmt::print("Partially deleting granule {0}: deleting {1} files\n", granuleId.toString(), filesToDelete.size());
|
||||
if (BM_PURGE_DEBUG) {
|
||||
fmt::print("BM {0} Partially deleting granule {1}: deleting {2} files\n",
|
||||
self->epoch,
|
||||
granuleId.toString(),
|
||||
filesToDelete.size());
|
||||
for (auto filename : filesToDelete) {
|
||||
fmt::print(" - {0}\n", filename);
|
||||
}
|
||||
|
@ -3328,8 +3340,8 @@ ACTOR Future<Void> partiallyDeleteGranule(Reference<BlobManagerData> self,
|
|||
wait(waitForAll(deletions));
|
||||
|
||||
// delete metadata in FDB (deleted file keys)
|
||||
if (BM_DEBUG) {
|
||||
fmt::print("Partially deleting granule {0}: deleting file keys\n", granuleId.toString());
|
||||
if (BM_PURGE_DEBUG) {
|
||||
fmt::print("BM {0} Partially deleting granule {1}: deleting file keys\n", self->epoch, granuleId.toString());
|
||||
}
|
||||
|
||||
state Transaction tr(self->db);
|
||||
|
@ -3348,8 +3360,8 @@ ACTOR Future<Void> partiallyDeleteGranule(Reference<BlobManagerData> self,
|
|||
}
|
||||
}
|
||||
|
||||
if (BM_DEBUG) {
|
||||
fmt::print("Partially deleting granule {0}: success\n", granuleId.toString());
|
||||
if (BM_PURGE_DEBUG) {
|
||||
fmt::print("BM {0} Partially deleting granule {1}: success\n", self->epoch, granuleId.toString());
|
||||
}
|
||||
TraceEvent("GranulePartialPurge", self->id)
|
||||
.detail("Epoch", self->epoch)
|
||||
|
@ -3360,6 +3372,8 @@ ACTOR Future<Void> partiallyDeleteGranule(Reference<BlobManagerData> self,
|
|||
++self->stats.granulesPartiallyPurged;
|
||||
self->stats.filesPurged += filesToDelete.size();
|
||||
|
||||
CODE_PROBE(true, " partial granule purged");
|
||||
|
||||
return Void();
|
||||
}
|
||||
|
||||
|
@ -3372,8 +3386,9 @@ ACTOR Future<Void> partiallyDeleteGranule(Reference<BlobManagerData> self,
|
|||
* processing this purge intent.
|
||||
*/
|
||||
ACTOR Future<Void> purgeRange(Reference<BlobManagerData> self, KeyRangeRef range, Version purgeVersion, bool force) {
|
||||
if (BM_DEBUG) {
|
||||
fmt::print("purgeRange starting for range [{0} - {1}) @ purgeVersion={2}, force={3}\n",
|
||||
if (BM_PURGE_DEBUG) {
|
||||
fmt::print("BM {0} purgeRange starting for range [{1} - {2}) @ purgeVersion={3}, force={4}\n",
|
||||
self->epoch,
|
||||
range.begin.printable(),
|
||||
range.end.printable(),
|
||||
purgeVersion,
|
||||
|
@ -3395,8 +3410,7 @@ ACTOR Future<Void> purgeRange(Reference<BlobManagerData> self, KeyRangeRef range
|
|||
|
||||
// track which granules we have already added to traversal
|
||||
// note: (startKey, startVersion) uniquely identifies a granule
|
||||
state std::unordered_set<std::pair<const uint8_t*, Version>, boost::hash<std::pair<const uint8_t*, Version>>>
|
||||
visited;
|
||||
state std::unordered_set<std::pair<std::string, Version>, boost::hash<std::pair<std::string, Version>>> visited;
|
||||
|
||||
// find all active granules (that comprise the range) and add to the queue
|
||||
state KeyRangeMap<UID>::Ranges activeRanges = self->workerAssignments.intersectingRanges(range);
|
||||
|
@ -3407,8 +3421,9 @@ ACTOR Future<Void> purgeRange(Reference<BlobManagerData> self, KeyRangeRef range
|
|||
|
||||
state KeyRangeMap<UID>::iterator activeRange;
|
||||
for (activeRange = activeRanges.begin(); activeRange != activeRanges.end(); ++activeRange) {
|
||||
if (BM_DEBUG) {
|
||||
fmt::print("Checking if active range [{0} - {1}), owned by BW {2}, should be purged\n",
|
||||
if (BM_PURGE_DEBUG) {
|
||||
fmt::print("BM {0} Checking if active range [{1} - {2}), owned by BW {3}, should be purged\n",
|
||||
self->epoch,
|
||||
activeRange.begin().printable(),
|
||||
activeRange.end().printable(),
|
||||
activeRange.value().toString());
|
||||
|
@ -3416,6 +3431,10 @@ ACTOR Future<Void> purgeRange(Reference<BlobManagerData> self, KeyRangeRef range
|
|||
|
||||
// assumption: purge boundaries must respect granule boundaries
|
||||
if (activeRange.begin() < range.begin || activeRange.end() > range.end) {
|
||||
TraceEvent(SevWarn, "GranulePurgeRangesUnaligned", self->id)
|
||||
.detail("Epoch", self->epoch)
|
||||
.detail("PurgeRange", range)
|
||||
.detail("GranuleRange", activeRange.range());
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -3425,20 +3444,29 @@ ACTOR Future<Void> purgeRange(Reference<BlobManagerData> self, KeyRangeRef range
|
|||
|
||||
loop {
|
||||
try {
|
||||
if (BM_DEBUG) {
|
||||
fmt::print("Fetching latest history entry for range [{0} - {1})\n",
|
||||
if (BM_PURGE_DEBUG) {
|
||||
fmt::print("BM {0} Fetching latest history entry for range [{1} - {2})\n",
|
||||
self->epoch,
|
||||
activeRange.begin().printable(),
|
||||
activeRange.end().printable());
|
||||
}
|
||||
// FIXME: doing this serially will likely be too slow for large purges
|
||||
Optional<GranuleHistory> history = wait(getLatestGranuleHistory(&tr, activeRange.range()));
|
||||
// TODO: can we tell from the krm that this range is not valid, so that we don't need to do a
|
||||
// get
|
||||
if (history.present()) {
|
||||
if (BM_DEBUG) {
|
||||
printf("Adding range to history queue\n");
|
||||
if (BM_PURGE_DEBUG) {
|
||||
fmt::print("BM {0} Adding range to history queue: [{1} - {2}) @ {3} ({4})\n",
|
||||
self->epoch,
|
||||
activeRange.begin().printable(),
|
||||
activeRange.end().printable(),
|
||||
history.get().version,
|
||||
(void*)(activeRange.range().begin.begin()));
|
||||
}
|
||||
visited.insert({ activeRange.range().begin.begin(), history.get().version });
|
||||
visited.insert({ activeRange.range().begin.toString(), history.get().version });
|
||||
historyEntryQueue.push({ activeRange.range(), history.get().version, MAX_VERSION });
|
||||
} else if (BM_PURGE_DEBUG) {
|
||||
fmt::print("BM {0} No history for range, ignoring\n", self->epoch);
|
||||
}
|
||||
break;
|
||||
} catch (Error& e) {
|
||||
|
@ -3447,8 +3475,12 @@ ACTOR Future<Void> purgeRange(Reference<BlobManagerData> self, KeyRangeRef range
|
|||
}
|
||||
}
|
||||
|
||||
if (BM_DEBUG) {
|
||||
printf("Beginning BFS traversal of history\n");
|
||||
if (BM_PURGE_DEBUG) {
|
||||
fmt::print("BM {0} Beginning BFS traversal of {1} history items for range [{2} - {3}) \n",
|
||||
self->epoch,
|
||||
historyEntryQueue.size(),
|
||||
range.begin.printable(),
|
||||
range.end.printable());
|
||||
}
|
||||
while (!historyEntryQueue.empty()) {
|
||||
// process the node at the front of the queue and remove it
|
||||
|
@ -3458,8 +3490,9 @@ ACTOR Future<Void> purgeRange(Reference<BlobManagerData> self, KeyRangeRef range
|
|||
std::tie(currRange, startVersion, endVersion) = historyEntryQueue.front();
|
||||
historyEntryQueue.pop();
|
||||
|
||||
if (BM_DEBUG) {
|
||||
fmt::print("Processing history node [{0} - {1}) with versions [{2}, {3})\n",
|
||||
if (BM_PURGE_DEBUG) {
|
||||
fmt::print("BM {0} Processing history node [{1} - {2}) with versions [{3}, {4})\n",
|
||||
self->epoch,
|
||||
currRange.begin.printable(),
|
||||
currRange.end.printable(),
|
||||
startVersion,
|
||||
|
@ -3484,11 +3517,15 @@ ACTOR Future<Void> purgeRange(Reference<BlobManagerData> self, KeyRangeRef range
|
|||
}
|
||||
|
||||
if (!foundHistory) {
|
||||
if (BM_PURGE_DEBUG) {
|
||||
fmt::print("BM {0} No history for this node, skipping\n", self->epoch);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (BM_DEBUG) {
|
||||
fmt::print("Found history entry for this node. It's granuleID is {0}\n",
|
||||
if (BM_PURGE_DEBUG) {
|
||||
fmt::print("BM {0} Found history entry for this node. It's granuleID is {1}\n",
|
||||
self->epoch,
|
||||
currHistoryNode.granuleID.toString());
|
||||
}
|
||||
|
||||
|
@ -3499,33 +3536,45 @@ ACTOR Future<Void> purgeRange(Reference<BlobManagerData> self, KeyRangeRef range
|
|||
// and so this granule should be partially deleted
|
||||
// - otherwise, this granule is active, so don't schedule it for deletion
|
||||
if (force || endVersion <= purgeVersion) {
|
||||
if (BM_DEBUG) {
|
||||
fmt::print("Granule {0} will be FULLY deleted\n", currHistoryNode.granuleID.toString());
|
||||
if (BM_PURGE_DEBUG) {
|
||||
fmt::print(
|
||||
"BM {0} Granule {1} will be FULLY deleted\n", self->epoch, currHistoryNode.granuleID.toString());
|
||||
}
|
||||
toFullyDelete.push_back({ currHistoryNode.granuleID, historyKey, currRange });
|
||||
} else if (startVersion < purgeVersion) {
|
||||
if (BM_DEBUG) {
|
||||
fmt::print("Granule {0} will be partially deleted\n", currHistoryNode.granuleID.toString());
|
||||
if (BM_PURGE_DEBUG) {
|
||||
fmt::print("BM {0} Granule {1} will be partially deleted\n",
|
||||
self->epoch,
|
||||
currHistoryNode.granuleID.toString());
|
||||
}
|
||||
toPartiallyDelete.push_back({ currHistoryNode.granuleID, currRange });
|
||||
}
|
||||
|
||||
// add all of the node's parents to the queue
|
||||
if (BM_PURGE_DEBUG) {
|
||||
fmt::print("BM {0} Checking {1} parents\n", self->epoch, currHistoryNode.parentVersions.size());
|
||||
}
|
||||
for (int i = 0; i < currHistoryNode.parentVersions.size(); i++) {
|
||||
// for (auto& parent : currHistoryNode.parentVersions.size()) {
|
||||
// if we already added this node to queue, skip it; otherwise, mark it as visited
|
||||
KeyRangeRef parentRange(currHistoryNode.parentBoundaries[i], currHistoryNode.parentBoundaries[i + 1]);
|
||||
Version parentVersion = currHistoryNode.parentVersions[i];
|
||||
if (visited.count({ parentRange.begin.begin(), parentVersion })) {
|
||||
if (BM_DEBUG) {
|
||||
fmt::print("Already added {0} to queue, so skipping it\n", currHistoryNode.granuleID.toString());
|
||||
std::string beginStr = parentRange.begin.toString();
|
||||
if (!visited.insert({ beginStr, parentVersion }).second) {
|
||||
if (BM_PURGE_DEBUG) {
|
||||
fmt::print("BM {0} Already added [{1} - {2}) @ {3} - {4} to queue, so skipping it\n",
|
||||
self->epoch,
|
||||
parentRange.begin.printable(),
|
||||
parentRange.end.printable(),
|
||||
parentVersion,
|
||||
startVersion);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
visited.insert({ parentRange.begin.begin(), parentVersion });
|
||||
|
||||
if (BM_DEBUG) {
|
||||
fmt::print("Adding parent [{0} - {1}) with versions [{2} - {3}) to queue\n",
|
||||
if (BM_PURGE_DEBUG) {
|
||||
fmt::print("BM {0} Adding parent [{1} - {2}) @ {3} - {4} to queue\n",
|
||||
self->epoch,
|
||||
parentRange.begin.printable(),
|
||||
parentRange.end.printable(),
|
||||
parentVersion,
|
||||
|
@ -3553,10 +3602,19 @@ ACTOR Future<Void> purgeRange(Reference<BlobManagerData> self, KeyRangeRef range
|
|||
// we won't run into any issues with trying to "re-delete" a blob file since deleting
|
||||
// a file that doesn't exist is considered successful
|
||||
|
||||
TraceEvent("PurgeGranulesTraversalComplete", self->id)
|
||||
.detail("Epoch", self->epoch)
|
||||
.detail("Range", range)
|
||||
.detail("PurgeVersion", purgeVersion)
|
||||
.detail("Force", force)
|
||||
.detail("VisitedCount", visited.size())
|
||||
.detail("DeletingFullyCount", toFullyDelete.size())
|
||||
.detail("DeletingPartiallyCount", toPartiallyDelete.size());
|
||||
|
||||
state std::vector<Future<Void>> partialDeletions;
|
||||
state int i;
|
||||
if (BM_DEBUG) {
|
||||
fmt::print("{0} granules to fully delete\n", toFullyDelete.size());
|
||||
if (BM_PURGE_DEBUG) {
|
||||
fmt::print("BM {0}: {1} granules to fully delete\n", self->epoch, toFullyDelete.size());
|
||||
}
|
||||
for (i = toFullyDelete.size() - 1; i >= 0; --i) {
|
||||
state UID granuleId;
|
||||
|
@ -3564,22 +3622,22 @@ ACTOR Future<Void> purgeRange(Reference<BlobManagerData> self, KeyRangeRef range
|
|||
KeyRange keyRange;
|
||||
std::tie(granuleId, historyKey, keyRange) = toFullyDelete[i];
|
||||
// FIXME: consider batching into a single txn (need to take care of txn size limit)
|
||||
if (BM_DEBUG) {
|
||||
fmt::print("About to fully delete granule {0}\n", granuleId.toString());
|
||||
if (BM_PURGE_DEBUG) {
|
||||
fmt::print("BM {0}: About to fully delete granule {1}\n", self->epoch, granuleId.toString());
|
||||
}
|
||||
wait(fullyDeleteGranule(self, granuleId, historyKey, purgeVersion, range));
|
||||
}
|
||||
|
||||
if (BM_DEBUG) {
|
||||
fmt::print("{0} granules to partially delete\n", toPartiallyDelete.size());
|
||||
if (BM_PURGE_DEBUG) {
|
||||
fmt::print("BM {0}: {1} granules to partially delete\n", self->epoch, toPartiallyDelete.size());
|
||||
}
|
||||
|
||||
for (i = toPartiallyDelete.size() - 1; i >= 0; --i) {
|
||||
UID granuleId;
|
||||
KeyRange range;
|
||||
std::tie(granuleId, range) = toPartiallyDelete[i];
|
||||
if (BM_DEBUG) {
|
||||
fmt::print("About to partially delete granule {0}\n", granuleId.toString());
|
||||
if (BM_PURGE_DEBUG) {
|
||||
fmt::print("BM {0}: About to partially delete granule {1}\n", self->epoch, granuleId.toString());
|
||||
}
|
||||
partialDeletions.emplace_back(partiallyDeleteGranule(self, granuleId, purgeVersion, range));
|
||||
}
|
||||
|
@ -3591,8 +3649,9 @@ ACTOR Future<Void> purgeRange(Reference<BlobManagerData> self, KeyRangeRef range
|
|||
// another purgeIntent that got written for this table while we were processing this one.
|
||||
// If that is the case, we should not clear the key. Otherwise, we can just clear the key.
|
||||
|
||||
if (BM_DEBUG) {
|
||||
fmt::print("Successfully purged range [{0} - {1}) at purgeVersion={2}\n",
|
||||
if (BM_PURGE_DEBUG) {
|
||||
fmt::print("BM {0}: Successfully purged range [{1} - {2}) at purgeVersion={3}\n",
|
||||
self->epoch,
|
||||
range.begin.printable(),
|
||||
range.end.printable(),
|
||||
purgeVersion);
|
||||
|
@ -3604,6 +3663,8 @@ ACTOR Future<Void> purgeRange(Reference<BlobManagerData> self, KeyRangeRef range
|
|||
.detail("PurgeVersion", purgeVersion)
|
||||
.detail("Force", force);
|
||||
|
||||
CODE_PROBE(true, "range purge complete");
|
||||
|
||||
++self->stats.purgesProcessed;
|
||||
return Void();
|
||||
}
|
||||
|
@ -3654,6 +3715,7 @@ ACTOR Future<Void> monitorPurgeKeys(Reference<BlobManagerData> self) {
|
|||
// TODO: replace 10000 with a knob
|
||||
state RangeResult purgeIntents = wait(tr->getRange(blobGranulePurgeKeys, BUGGIFY ? 1 : 10000));
|
||||
if (purgeIntents.size()) {
|
||||
CODE_PROBE(true, "BM found purges to process");
|
||||
int rangeIdx = 0;
|
||||
for (; rangeIdx < purgeIntents.size(); ++rangeIdx) {
|
||||
Version purgeVersion;
|
||||
|
@ -3675,8 +3737,9 @@ ACTOR Future<Void> monitorPurgeKeys(Reference<BlobManagerData> self) {
|
|||
}
|
||||
purgeMap.insert(range, std::make_pair(purgeVersion, force));
|
||||
|
||||
if (BM_DEBUG) {
|
||||
fmt::print("about to purge range [{0} - {1}) @ {2}, force={3}\n",
|
||||
if (BM_PURGE_DEBUG) {
|
||||
fmt::print("BM {0} about to purge range [{1} - {2}) @ {3}, force={4}\n",
|
||||
self->epoch,
|
||||
range.begin.printable(),
|
||||
range.end.printable(),
|
||||
purgeVersion,
|
||||
|
@ -3728,9 +3791,11 @@ ACTOR Future<Void> monitorPurgeKeys(Reference<BlobManagerData> self) {
|
|||
}
|
||||
}
|
||||
|
||||
if (BM_DEBUG) {
|
||||
printf("Done clearing current set of purge intents.\n");
|
||||
if (BM_PURGE_DEBUG) {
|
||||
fmt::print("BM {0} Done clearing current set of purge intents.\n", self->epoch);
|
||||
}
|
||||
|
||||
CODE_PROBE(true, "BM finished processing purge intents");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -602,7 +602,20 @@ ACTOR Future<BlobFileIndex> writeDeltaFile(Reference<BlobWorkerData> bwData,
|
|||
|
||||
state std::string fileName = randomBGFilename(bwData->id, granuleID, currentDeltaVersion, ".delta");
|
||||
|
||||
state Value serialized = ObjectWriter::toValue(deltasToWrite, Unversioned());
|
||||
state Optional<BlobGranuleCipherKeysCtx> cipherKeysCtx;
|
||||
state Optional<BlobGranuleCipherKeysMeta> cipherKeysMeta;
|
||||
state Arena arena;
|
||||
// TODO support encryption, figure out proper state stuff
|
||||
/*if (isBlobFileEncryptionSupported()) {
|
||||
BlobGranuleCipherKeysCtx ciphKeysCtx = wait(getLatestGranuleCipherKeys(bwData, keyRange, &arena));
|
||||
cipherKeysCtx = ciphKeysCtx;
|
||||
cipherKeysMeta = BlobGranuleCipherKeysCtx::toCipherKeysMeta(cipherKeysCtx.get());
|
||||
}*/
|
||||
|
||||
Optional<CompressionFilter> compressFilter = getBlobFileCompressFilter();
|
||||
|
||||
state Value serialized = serializeChunkedDeltaFile(
|
||||
deltasToWrite, keyRange, SERVER_KNOBS->BG_DELTA_FILE_TARGET_CHUNK_BYTES, compressFilter, cipherKeysCtx);
|
||||
state size_t serializedSize = serialized.size();
|
||||
|
||||
// Free up deltasToWrite here to reduce memory
|
||||
|
@ -640,7 +653,7 @@ ACTOR Future<BlobFileIndex> writeDeltaFile(Reference<BlobWorkerData> bwData,
|
|||
|
||||
Key dfKey = blobGranuleFileKeyFor(granuleID, currentDeltaVersion, 'D');
|
||||
// TODO change once we support file multiplexing
|
||||
Value dfValue = blobGranuleFileValueFor(fname, 0, serializedSize, serializedSize);
|
||||
Value dfValue = blobGranuleFileValueFor(fname, 0, serializedSize, serializedSize, cipherKeysMeta);
|
||||
tr->set(dfKey, dfValue);
|
||||
|
||||
if (oldGranuleComplete.present()) {
|
||||
|
@ -668,7 +681,7 @@ ACTOR Future<BlobFileIndex> writeDeltaFile(Reference<BlobWorkerData> bwData,
|
|||
wait(delay(deterministicRandom()->random01()));
|
||||
}
|
||||
// FIXME: change when we implement multiplexing
|
||||
return BlobFileIndex(currentDeltaVersion, fname, 0, serializedSize, serializedSize);
|
||||
return BlobFileIndex(currentDeltaVersion, fname, 0, serializedSize, serializedSize, cipherKeysMeta);
|
||||
} catch (Error& e) {
|
||||
wait(tr->onError(e));
|
||||
}
|
||||
|
@ -753,8 +766,8 @@ ACTOR Future<BlobFileIndex> writeSnapshot(Reference<BlobWorkerData> bwData,
|
|||
}
|
||||
|
||||
Optional<CompressionFilter> compressFilter = getBlobFileCompressFilter();
|
||||
state Value serialized =
|
||||
serializeChunkedSnapshot(snapshot, SERVER_KNOBS->BG_SNAPSHOT_FILE_TARGET_CHUNKS, compressFilter, cipherKeysCtx);
|
||||
state Value serialized = serializeChunkedSnapshot(
|
||||
snapshot, SERVER_KNOBS->BG_SNAPSHOT_FILE_TARGET_CHUNK_BYTES, compressFilter, cipherKeysCtx);
|
||||
state size_t serializedSize = serialized.size();
|
||||
|
||||
// free snapshot to reduce memory
|
||||
|
@ -970,6 +983,7 @@ ACTOR Future<BlobFileIndex> compactFromBlob(Reference<BlobWorkerData> bwData,
|
|||
snapshotF.cipherKeysMeta);
|
||||
|
||||
// TODO: optimization - batch 'encryption-key' lookup given the GranuleFile set is known
|
||||
// FIXME: get cipher keys for delta as well!
|
||||
if (chunk.snapshotFile.get().cipherKeysMetaRef.present()) {
|
||||
ASSERT(isBlobFileEncryptionSupported());
|
||||
BlobGranuleCipherKeysCtx cipherKeysCtx =
|
||||
|
@ -3187,6 +3201,8 @@ ACTOR Future<Void> doBlobGranuleFileRequest(Reference<BlobWorkerData> bwData, Bl
|
|||
getGranuleCipherKeys(bwData, chunk.snapshotFile.get().cipherKeysMetaRef.get(), &rep.arena);
|
||||
}
|
||||
|
||||
// FIXME: get cipher keys for delta files too!
|
||||
|
||||
// new deltas (if version is larger than version of last delta file)
|
||||
// FIXME: do trivial key bounds here if key range is not fully contained in request key
|
||||
// range
|
||||
|
|
|
@ -368,3 +368,15 @@ Future<Void> MovableCoordinatedState::setExclusive(Value v) {
|
|||
Future<Void> MovableCoordinatedState::move(ClusterConnectionString const& nc) {
|
||||
return MovableCoordinatedStateImpl::move(impl.get(), nc);
|
||||
}
|
||||
|
||||
Optional<Value> updateCCSInMovableValue(ValueRef movableVal, KeyRef oldClusterKey, KeyRef newClusterKey) {
|
||||
Optional<Value> result;
|
||||
MovableValue moveVal = BinaryReader::fromStringRef<MovableValue>(
|
||||
movableVal, IncludeVersion(ProtocolVersion::withMovableCoordinatedStateV2()));
|
||||
if (moveVal.other.present() && moveVal.other.get().startsWith(oldClusterKey)) {
|
||||
TraceEvent(SevDebug, "UpdateCCSInMovableValue").detail("OldConnectionString", moveVal.other.get());
|
||||
moveVal.other = moveVal.other.get().removePrefix(oldClusterKey).withPrefix(newClusterKey);
|
||||
result = BinaryWriter::toValue(moveVal, IncludeVersion(ProtocolVersion::withMovableCoordinatedStateV2()));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
|
|
@ -776,3 +776,78 @@ ACTOR Future<Void> coordinationServer(std::string dataFolder,
|
|||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
ACTOR Future<Void> changeClusterDescription(std::string datafolder, KeyRef newClusterKey, KeyRef oldClusterKey) {
|
||||
state UID myID = deterministicRandom()->randomUniqueID();
|
||||
state OnDemandStore store(datafolder, myID, "coordination-");
|
||||
RangeResult res = wait(store->readRange(allKeys));
|
||||
// Context, in coordinators' kv-store
|
||||
// cluster description and the random id are always appear together as the clusterKey
|
||||
// The old cluster key, (call it oldCKey) below can appear in the following scenarios:
|
||||
// 1. oldCKey is a key in the store: the value is a binary format of _GenerationRegVal_ which contains a different
|
||||
// clusterKey(either movedFrom or moveTo)
|
||||
// 2. oldCKey appears in a key for forwarding message:
|
||||
// 2.1: the prefix is _fwdKeys.begin_: the value is the new connection string
|
||||
// 2.2: the prefix is _fwdTimeKeys.begin_: the value is the time
|
||||
// 3. oldCKey does not appear in any keys but in a value:
|
||||
// 3.1: it's in the value of a forwarding message(see 2.1)
|
||||
// 3.2: it's inside the value of _GenerationRegVal_ (see 1), which is a cluster connection string.
|
||||
// it seems that even we do not change it the cluster should still be good, but to be safe we still update it.
|
||||
for (auto& [key, value] : res) {
|
||||
if (key.startsWith(fwdKeys.begin)) {
|
||||
if (key.removePrefix(fwdKeys.begin) == oldClusterKey) {
|
||||
store->clear(singleKeyRange(key));
|
||||
store->set(KeyValueRef(newClusterKey.withPrefix(fwdKeys.begin), value));
|
||||
} else if (value.startsWith(oldClusterKey)) {
|
||||
store->set(KeyValueRef(key, value.removePrefix(oldClusterKey).withPrefix(newClusterKey)));
|
||||
}
|
||||
} else if (key.startsWith(fwdTimeKeys.begin) && key.removePrefix(fwdTimeKeys.begin) == oldClusterKey) {
|
||||
store->clear(singleKeyRange(key));
|
||||
store->set(KeyValueRef(newClusterKey.withPrefix(fwdTimeKeys.begin), value));
|
||||
} else if (key == oldClusterKey) {
|
||||
store->clear(singleKeyRange(key));
|
||||
store->set(KeyValueRef(newClusterKey, value));
|
||||
} else {
|
||||
// parse the value part
|
||||
GenerationRegVal regVal = BinaryReader::fromStringRef<GenerationRegVal>(value, IncludeVersion());
|
||||
if (regVal.val.present()) {
|
||||
Optional<Value> newVal = updateCCSInMovableValue(regVal.val.get(), oldClusterKey, newClusterKey);
|
||||
if (newVal.present()) {
|
||||
regVal.val = newVal.get();
|
||||
store->set(KeyValueRef(
|
||||
key, BinaryWriter::toValue(regVal, IncludeVersion(ProtocolVersion::withGenerationRegVal()))));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
wait(store->commit());
|
||||
return Void();
|
||||
}
|
||||
|
||||
Future<Void> coordChangeClusterKey(std::string dataFolder, KeyRef newClusterKey, KeyRef oldClusterKey) {
|
||||
TraceEvent(SevInfo, "CoordChangeClusterKey")
|
||||
.detail("DataFolder", dataFolder)
|
||||
.detail("NewClusterKey", newClusterKey)
|
||||
.detail("OldClusterKey", oldClusterKey);
|
||||
std::string absDataFolder = abspath(dataFolder);
|
||||
std::vector<std::string> returnList = platform::listDirectories(absDataFolder);
|
||||
std::vector<Future<Void>> futures;
|
||||
for (const auto& dirEntry : returnList) {
|
||||
if (dirEntry == "." || dirEntry == "..") {
|
||||
continue;
|
||||
}
|
||||
std::string processDir = dataFolder + "/" + dirEntry;
|
||||
TraceEvent(SevInfo, "UpdatingCoordDataForProcess").detail("ProcessDataDir", processDir);
|
||||
std::vector<std::string> returnFiles = platform::listFiles(processDir, "");
|
||||
bool isCoord = false;
|
||||
for (const auto& fileEntry : returnFiles) {
|
||||
if (fileEntry.rfind("coordination-", 0) == 0) {
|
||||
isCoord = true;
|
||||
}
|
||||
}
|
||||
if (!isCoord)
|
||||
continue;
|
||||
futures.push_back(changeClusterDescription(processDir, newClusterKey, oldClusterKey));
|
||||
}
|
||||
return waitForAll(futures);
|
||||
}
|
||||
|
|
|
@ -41,6 +41,7 @@
|
|||
#include "fdbserver/TenantCache.h"
|
||||
#include "fdbserver/TLogInterface.h"
|
||||
#include "fdbserver/WaitFailure.h"
|
||||
#include "fdbserver/workloads/workloads.actor.h"
|
||||
#include "flow/ActorCollection.h"
|
||||
#include "flow/Arena.h"
|
||||
#include "flow/BooleanParam.h"
|
||||
|
@ -290,6 +291,7 @@ ACTOR Future<Void> pollMoveKeysLock(Database cx, MoveKeysLock lock, const DDEnab
|
|||
}
|
||||
|
||||
struct DataDistributor : NonCopyable, ReferenceCounted<DataDistributor> {
|
||||
public:
|
||||
Reference<AsyncVar<ServerDBInfo> const> dbInfo;
|
||||
UID ddId;
|
||||
PromiseStream<Future<Void>> addActor;
|
||||
|
@ -311,7 +313,9 @@ struct DataDistributor : NonCopyable, ReferenceCounted<DataDistributor> {
|
|||
// fully-functional.
|
||||
DDTeamCollection* teamCollection;
|
||||
Reference<ShardsAffectedByTeamFailure> shardsAffectedByTeamFailure;
|
||||
PromiseStream<RelocateShard> relocationProducer, relocationConsumer; // comsumer is a yield stream from producer
|
||||
// consumer is a yield stream from producer. The RelocateShard is pushed into relocationProducer and popped from
|
||||
// relocationConsumer (by DDQueue)
|
||||
PromiseStream<RelocateShard> relocationProducer, relocationConsumer;
|
||||
|
||||
DataDistributor(Reference<AsyncVar<ServerDBInfo> const> const& db, UID id)
|
||||
: dbInfo(db), ddId(id), txnProcessor(nullptr), initialDDEventHolder(makeReference<EventCacheHolder>("InitialDD")),
|
||||
|
@ -436,11 +440,7 @@ struct DataDistributor : NonCopyable, ReferenceCounted<DataDistributor> {
|
|||
return Void();
|
||||
}
|
||||
|
||||
// Resume inflight relocations from the previous DD
|
||||
// TODO: add a test to verify the inflight relocation correctness and measure the memory usage with 4 million shards
|
||||
ACTOR static Future<Void> resumeRelocations(Reference<DataDistributor> self) {
|
||||
ASSERT(self->shardsAffectedByTeamFailure); // has to be allocated
|
||||
|
||||
ACTOR static Future<Void> resumeFromShards(Reference<DataDistributor> self, bool traceShard) {
|
||||
state int shard = 0;
|
||||
for (; shard < self->initData->shards.size() - 1; shard++) {
|
||||
const DDShardInfo& iShard = self->initData->shards[shard];
|
||||
|
@ -452,8 +452,8 @@ struct DataDistributor : NonCopyable, ReferenceCounted<DataDistributor> {
|
|||
if (self->configuration.usableRegions > 1) {
|
||||
teams.push_back(ShardsAffectedByTeamFailure::Team(iShard.remoteSrc, false));
|
||||
}
|
||||
if (g_network->isSimulated()) {
|
||||
TraceEvent("DDInitShard")
|
||||
if (traceShard) {
|
||||
TraceEvent(SevDebug, "DDInitShard")
|
||||
.detail("Keys", keys)
|
||||
.detail("PrimarySrc", describe(iShard.primarySrc))
|
||||
.detail("RemoteSrc", describe(iShard.remoteSrc))
|
||||
|
@ -472,20 +472,27 @@ struct DataDistributor : NonCopyable, ReferenceCounted<DataDistributor> {
|
|||
if (!unhealthy && self->configuration.usableRegions > 1) {
|
||||
unhealthy = iShard.remoteSrc.size() != self->configuration.storageTeamSize;
|
||||
}
|
||||
self->relocationProducer.send(RelocateShard(keys,
|
||||
unhealthy ? SERVER_KNOBS->PRIORITY_TEAM_UNHEALTHY
|
||||
: SERVER_KNOBS->PRIORITY_RECOVER_MOVE,
|
||||
RelocateReason::OTHER));
|
||||
self->relocationProducer.send(
|
||||
RelocateShard(keys,
|
||||
unhealthy ? DataMovementReason::TEAM_UNHEALTHY : DataMovementReason::RECOVER_MOVE,
|
||||
RelocateReason::OTHER));
|
||||
}
|
||||
|
||||
wait(yield(TaskPriority::DataDistribution));
|
||||
}
|
||||
return Void();
|
||||
}
|
||||
|
||||
// TODO: unit test needed
|
||||
ACTOR static Future<Void> resumeFromDataMoves(Reference<DataDistributor> self, Future<Void> readyToStart) {
|
||||
state KeyRangeMap<std::shared_ptr<DataMove>>::iterator it = self->initData->dataMoveMap.ranges().begin();
|
||||
|
||||
wait(readyToStart);
|
||||
|
||||
for (; it != self->initData->dataMoveMap.ranges().end(); ++it) {
|
||||
const DataMoveMetaData& meta = it.value()->meta;
|
||||
if (it.value()->isCancelled() || (it.value()->valid && !CLIENT_KNOBS->SHARD_ENCODE_LOCATION_METADATA)) {
|
||||
RelocateShard rs(meta.range, SERVER_KNOBS->PRIORITY_RECOVER_MOVE, RelocateReason::OTHER);
|
||||
RelocateShard rs(meta.range, DataMovementReason::RECOVER_MOVE, RelocateReason::OTHER);
|
||||
rs.dataMoveId = meta.id;
|
||||
rs.cancelled = true;
|
||||
self->relocationProducer.send(rs);
|
||||
|
@ -494,7 +501,7 @@ struct DataDistributor : NonCopyable, ReferenceCounted<DataDistributor> {
|
|||
TraceEvent(SevDebug, "DDInitFoundDataMove", self->ddId).detail("DataMove", meta.toString());
|
||||
ASSERT(meta.range == it.range());
|
||||
// TODO: Persist priority in DataMoveMetaData.
|
||||
RelocateShard rs(meta.range, SERVER_KNOBS->PRIORITY_RECOVER_MOVE, RelocateReason::OTHER);
|
||||
RelocateShard rs(meta.range, DataMovementReason::RECOVER_MOVE, RelocateReason::OTHER);
|
||||
rs.dataMoveId = meta.id;
|
||||
rs.dataMove = it.value();
|
||||
std::vector<ShardsAffectedByTeamFailure::Team> teams;
|
||||
|
@ -517,6 +524,16 @@ struct DataDistributor : NonCopyable, ReferenceCounted<DataDistributor> {
|
|||
}
|
||||
return Void();
|
||||
}
|
||||
|
||||
// Resume inflight relocations from the previous DD
|
||||
// TODO: The initialDataDistribution is unused once resumeRelocations and
|
||||
// DataDistributionTracker::trackInitialShards are done. In the future, we can release the object to save memory
|
||||
// usage if it turns out to be a problem.
|
||||
Future<Void> resumeRelocations() {
|
||||
ASSERT(shardsAffectedByTeamFailure); // has to be allocated
|
||||
Future<Void> shardsReady = resumeFromShards(Reference<DataDistributor>::addRef(this), g_network->isSimulated());
|
||||
return resumeFromDataMoves(Reference<DataDistributor>::addRef(this), shardsReady);
|
||||
}
|
||||
};
|
||||
|
||||
// Runs the data distribution algorithm for FDB, including the DD Queue, DD tracker, and DD team collection
|
||||
|
@ -564,18 +581,19 @@ ACTOR Future<Void> dataDistribution(Reference<DataDistributor> self,
|
|||
state Reference<AsyncVar<bool>> processingUnhealthy(new AsyncVar<bool>(false));
|
||||
state Reference<AsyncVar<bool>> processingWiggle(new AsyncVar<bool>(false));
|
||||
state Promise<Void> readyToStart;
|
||||
|
||||
self->shardsAffectedByTeamFailure = makeReference<ShardsAffectedByTeamFailure>();
|
||||
wait(DataDistributor::resumeRelocations(self));
|
||||
wait(self->resumeRelocations());
|
||||
|
||||
std::vector<TeamCollectionInterface> tcis;
|
||||
std::vector<TeamCollectionInterface> tcis; // primary and remote region interface
|
||||
Reference<AsyncVar<bool>> anyZeroHealthyTeams; // true if primary or remote has zero healthy team
|
||||
std::vector<Reference<AsyncVar<bool>>> zeroHealthyTeams; // primary and remote
|
||||
|
||||
Reference<AsyncVar<bool>> anyZeroHealthyTeams;
|
||||
std::vector<Reference<AsyncVar<bool>>> zeroHealthyTeams;
|
||||
tcis.push_back(TeamCollectionInterface());
|
||||
zeroHealthyTeams.push_back(makeReference<AsyncVar<bool>>(true));
|
||||
int storageTeamSize = self->configuration.storageTeamSize;
|
||||
|
||||
std::vector<Future<Void>> actors;
|
||||
std::vector<Future<Void>> actors; // the container of ACTORs
|
||||
if (self->configuration.usableRegions > 1) {
|
||||
tcis.push_back(TeamCollectionInterface());
|
||||
storageTeamSize = 2 * self->configuration.storageTeamSize;
|
||||
|
@ -1379,6 +1397,16 @@ static Future<ErrorOr<Void>> badTestFuture(double duration, Error e) {
|
|||
return tag(delay(duration), ErrorOr<Void>(e));
|
||||
}
|
||||
|
||||
inline DDShardInfo doubleToNoLocationShardInfo(double d, bool hasDest) {
|
||||
DDShardInfo res(doubleToTestKey(d), anonymousShardId, anonymousShardId);
|
||||
res.primarySrc.emplace_back((uint64_t)d, 0);
|
||||
if (hasDest) {
|
||||
res.primaryDest.emplace_back((uint64_t)d + 1, 0);
|
||||
res.hasDest = true;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
} // namespace data_distribution_test
|
||||
|
||||
TEST_CASE("/DataDistribution/WaitForMost") {
|
||||
|
@ -1440,3 +1468,44 @@ TEST_CASE("/DataDistributor/StorageWiggler/Order") {
|
|||
ASSERT(!wiggler.getNextServerId().present());
|
||||
return Void();
|
||||
}
|
||||
|
||||
TEST_CASE("/DataDistributor/Initialization/ResumeFromShard") {
|
||||
state Reference<AsyncVar<ServerDBInfo> const> dbInfo;
|
||||
state Reference<DataDistributor> self(new DataDistributor(dbInfo, UID()));
|
||||
|
||||
self->shardsAffectedByTeamFailure = makeReference<ShardsAffectedByTeamFailure>();
|
||||
self->initData = makeReference<InitialDataDistribution>();
|
||||
self->configuration.usableRegions = 1;
|
||||
self->configuration.storageTeamSize = 1;
|
||||
|
||||
// add DDShardInfo
|
||||
self->shardsAffectedByTeamFailure->setCheckMode(
|
||||
ShardsAffectedByTeamFailure::CheckMode::ForceNoCheck); // skip check when build
|
||||
int shardNum = deterministicRandom()->randomInt(1000, CLIENT_KNOBS->TOO_MANY * 5); // 2000000000; OOM
|
||||
std::cout << "generating " << shardNum << " shards...\n";
|
||||
for (int i = 1; i <= SERVER_KNOBS->DD_MOVE_KEYS_PARALLELISM; ++i) {
|
||||
self->initData->shards.emplace_back(data_distribution_test::doubleToNoLocationShardInfo(i, true));
|
||||
}
|
||||
for (int i = SERVER_KNOBS->DD_MOVE_KEYS_PARALLELISM + 1; i <= shardNum; ++i) {
|
||||
self->initData->shards.emplace_back(data_distribution_test::doubleToNoLocationShardInfo(i, false));
|
||||
}
|
||||
self->initData->shards.emplace_back(DDShardInfo(allKeys.end));
|
||||
std::cout << "Start resuming...\n";
|
||||
wait(DataDistributor::resumeFromShards(self, false));
|
||||
std::cout << "Start validation...\n";
|
||||
auto relocateFuture = self->relocationProducer.getFuture();
|
||||
for (int i = 0; i < SERVER_KNOBS->DD_MOVE_KEYS_PARALLELISM; ++i) {
|
||||
ASSERT(relocateFuture.isReady());
|
||||
auto rs = relocateFuture.pop();
|
||||
ASSERT(rs.isRestore() == false);
|
||||
ASSERT(rs.cancelled == false);
|
||||
ASSERT(rs.dataMoveId == anonymousShardId);
|
||||
ASSERT(rs.priority == SERVER_KNOBS->PRIORITY_RECOVER_MOVE);
|
||||
// std::cout << rs.keys.begin.toString() << " " << self->initData->shards[i].key.toString() << " \n";
|
||||
ASSERT(rs.keys.begin.compare(self->initData->shards[i].key) == 0);
|
||||
ASSERT(rs.keys.end == self->initData->shards[i + 1].key);
|
||||
}
|
||||
self->shardsAffectedByTeamFailure->setCheckMode(ShardsAffectedByTeamFailure::CheckMode::ForceCheck);
|
||||
self->shardsAffectedByTeamFailure->check();
|
||||
return Void();
|
||||
}
|
|
@ -41,12 +41,6 @@
|
|||
typedef Reference<IDataDistributionTeam> ITeamRef;
|
||||
typedef std::pair<ITeamRef, ITeamRef> SrcDestTeamPair;
|
||||
|
||||
// FIXME: Always use DataMovementReason to invoke these functions.
|
||||
inline bool isDiskRebalancePriority(int priority) {
|
||||
return priority == SERVER_KNOBS->PRIORITY_REBALANCE_UNDERUTILIZED_TEAM ||
|
||||
priority == SERVER_KNOBS->PRIORITY_REBALANCE_OVERUTILIZED_TEAM;
|
||||
}
|
||||
|
||||
inline bool isDataMovementForDiskBalancing(DataMovementReason reason) {
|
||||
return reason == DataMovementReason::REBALANCE_UNDERUTILIZED_TEAM ||
|
||||
reason == DataMovementReason::REBALANCE_OVERUTILIZED_TEAM;
|
||||
|
@ -57,16 +51,12 @@ inline bool isDataMovementForReadBalancing(DataMovementReason reason) {
|
|||
reason == DataMovementReason::REBALANCE_READ_UNDERUTIL_TEAM;
|
||||
}
|
||||
|
||||
inline bool isMountainChopperPriority(int priority) {
|
||||
return priority == SERVER_KNOBS->PRIORITY_REBALANCE_OVERUTILIZED_TEAM ||
|
||||
priority == SERVER_KNOBS->PRIORITY_REBALANCE_READ_OVERUTIL_TEAM;
|
||||
}
|
||||
|
||||
inline bool isDataMovementForMountainChopper(DataMovementReason reason) {
|
||||
return reason == DataMovementReason::REBALANCE_OVERUTILIZED_TEAM ||
|
||||
reason == DataMovementReason::REBALANCE_READ_OVERUTIL_TEAM;
|
||||
}
|
||||
|
||||
// FIXME: Always use DataMovementReason to invoke these functions.
|
||||
inline bool isValleyFillerPriority(int priority) {
|
||||
return priority == SERVER_KNOBS->PRIORITY_REBALANCE_UNDERUTILIZED_TEAM ||
|
||||
priority == SERVER_KNOBS->PRIORITY_REBALANCE_READ_UNDERUTIL_TEAM;
|
||||
|
@ -80,6 +70,9 @@ inline bool isDataMovementForValleyFiller(DataMovementReason reason) {
|
|||
int dataMovementPriority(DataMovementReason reason) {
|
||||
int priority;
|
||||
switch (reason) {
|
||||
case DataMovementReason::INVALID:
|
||||
priority = -1;
|
||||
break;
|
||||
case DataMovementReason::RECOVER_MOVE:
|
||||
priority = SERVER_KNOBS->PRIORITY_RECOVER_MOVE;
|
||||
break;
|
||||
|
@ -162,9 +155,9 @@ struct RelocateData {
|
|||
: keys(rs.keys), priority(rs.priority), boundaryPriority(isBoundaryPriority(rs.priority) ? rs.priority : -1),
|
||||
healthPriority(isHealthPriority(rs.priority) ? rs.priority : -1), reason(rs.reason), startTime(now()),
|
||||
randomId(deterministicRandom()->randomUniqueID()), dataMoveId(rs.dataMoveId), workFactor(0),
|
||||
wantsNewServers(isMountainChopperPriority(rs.priority) || isValleyFillerPriority(rs.priority) ||
|
||||
rs.priority == SERVER_KNOBS->PRIORITY_SPLIT_SHARD ||
|
||||
rs.priority == SERVER_KNOBS->PRIORITY_TEAM_REDUNDANT),
|
||||
wantsNewServers(
|
||||
isDataMovementForMountainChopper(rs.moveReason) || isDataMovementForValleyFiller(rs.moveReason) ||
|
||||
rs.moveReason == DataMovementReason::SPLIT_SHARD || rs.moveReason == DataMovementReason::TEAM_REDUNDANT),
|
||||
cancellable(true), interval("QueuedRelocation"), dataMove(rs.dataMove) {
|
||||
if (dataMove != nullptr) {
|
||||
this->src.insert(this->src.end(), dataMove->meta.src.begin(), dataMove->meta.src.end());
|
||||
|
@ -813,7 +806,6 @@ struct DDQueueData {
|
|||
}
|
||||
|
||||
ACTOR static Future<Void> getSourceServersForRange(DDQueueData* self,
|
||||
Database cx,
|
||||
RelocateData input,
|
||||
PromiseStream<RelocateData> output,
|
||||
Reference<FlowLock> fetchLock) {
|
||||
|
@ -929,7 +921,7 @@ struct DDQueueData {
|
|||
|
||||
fetchingSourcesQueue.insert(rrs);
|
||||
getSourceActors.insert(
|
||||
rrs.keys, getSourceServersForRange(this, cx, rrs, fetchSourceServersComplete, fetchSourceLock));
|
||||
rrs.keys, getSourceServersForRange(this, rrs, fetchSourceServersComplete, fetchSourceLock));
|
||||
} else {
|
||||
RelocateData newData(rrs);
|
||||
newData.keys = affectedQueuedItems[r];
|
||||
|
@ -1739,7 +1731,7 @@ inline double getWorstCpu(const HealthMetrics& metrics, const std::vector<UID>&
|
|||
// Move the shard with the top K highest read density of sourceTeam's to destTeam if sourceTeam has much more read load
|
||||
// than destTeam
|
||||
ACTOR Future<bool> rebalanceReadLoad(DDQueueData* self,
|
||||
int priority,
|
||||
DataMovementReason moveReason,
|
||||
Reference<IDataDistributionTeam> sourceTeam,
|
||||
Reference<IDataDistributionTeam> destTeam,
|
||||
bool primary,
|
||||
|
@ -1807,7 +1799,7 @@ ACTOR Future<bool> rebalanceReadLoad(DDQueueData* self,
|
|||
ShardsAffectedByTeamFailure::Team(sourceTeam->getServerIDs(), primary));
|
||||
for (int i = 0; i < shards.size(); i++) {
|
||||
if (shard == shards[i]) {
|
||||
self->output.send(RelocateShard(shard, priority, RelocateReason::REBALANCE_READ));
|
||||
self->output.send(RelocateShard(shard, moveReason, RelocateReason::REBALANCE_READ));
|
||||
self->updateLastAsSource(sourceTeam->getServerIDs());
|
||||
return true;
|
||||
}
|
||||
|
@ -1818,7 +1810,7 @@ ACTOR Future<bool> rebalanceReadLoad(DDQueueData* self,
|
|||
|
||||
// Move a random shard from sourceTeam if sourceTeam has much more data than provided destTeam
|
||||
ACTOR static Future<bool> rebalanceTeams(DDQueueData* self,
|
||||
int priority,
|
||||
DataMovementReason moveReason,
|
||||
Reference<IDataDistributionTeam const> sourceTeam,
|
||||
Reference<IDataDistributionTeam const> destTeam,
|
||||
bool primary,
|
||||
|
@ -1879,7 +1871,7 @@ ACTOR static Future<bool> rebalanceTeams(DDQueueData* self,
|
|||
ShardsAffectedByTeamFailure::Team(sourceTeam->getServerIDs(), primary));
|
||||
for (int i = 0; i < shards.size(); i++) {
|
||||
if (moveShard == shards[i]) {
|
||||
self->output.send(RelocateShard(moveShard, priority, RelocateReason::REBALANCE_DISK));
|
||||
self->output.send(RelocateShard(moveShard, moveReason, RelocateReason::REBALANCE_DISK));
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
@ -2008,9 +2000,9 @@ ACTOR Future<Void> BgDDLoadRebalance(DDQueueData* self, int teamCollectionIndex,
|
|||
// clang-format off
|
||||
if (sourceTeam.isValid() && destTeam.isValid()) {
|
||||
if (readRebalance) {
|
||||
wait(store(moved,rebalanceReadLoad(self, ddPriority, sourceTeam, destTeam, teamCollectionIndex == 0, &traceEvent)));
|
||||
wait(store(moved,rebalanceReadLoad(self, reason, sourceTeam, destTeam, teamCollectionIndex == 0, &traceEvent)));
|
||||
} else {
|
||||
wait(store(moved,rebalanceTeams(self, ddPriority, sourceTeam, destTeam, teamCollectionIndex == 0, &traceEvent)));
|
||||
wait(store(moved,rebalanceTeams(self, reason, sourceTeam, destTeam, teamCollectionIndex == 0, &traceEvent)));
|
||||
}
|
||||
}
|
||||
// clang-format on
|
||||
|
@ -2106,7 +2098,7 @@ ACTOR Future<Void> BgDDMountainChopper(DDQueueData* self, int teamCollectionInde
|
|||
|
||||
if (loadedTeam.first.present()) {
|
||||
bool _moved = wait(rebalanceTeams(self,
|
||||
SERVER_KNOBS->PRIORITY_REBALANCE_OVERUTILIZED_TEAM,
|
||||
DataMovementReason::REBALANCE_OVERUTILIZED_TEAM,
|
||||
loadedTeam.first.get(),
|
||||
randomTeam.first.get(),
|
||||
teamCollectionIndex == 0,
|
||||
|
@ -2205,7 +2197,7 @@ ACTOR Future<Void> BgDDValleyFiller(DDQueueData* self, int teamCollectionIndex)
|
|||
|
||||
if (unloadedTeam.first.present()) {
|
||||
bool _moved = wait(rebalanceTeams(self,
|
||||
SERVER_KNOBS->PRIORITY_REBALANCE_UNDERUTILIZED_TEAM,
|
||||
DataMovementReason::REBALANCE_UNDERUTILIZED_TEAM,
|
||||
randomTeam.first.get(),
|
||||
unloadedTeam.first.get(),
|
||||
teamCollectionIndex == 0,
|
||||
|
@ -2267,8 +2259,8 @@ ACTOR Future<Void> dataDistributionQueue(Database cx,
|
|||
|
||||
for (int i = 0; i < teamCollections.size(); i++) {
|
||||
// FIXME: Use BgDDLoadBalance for disk rebalance too after DD simulation test proof.
|
||||
// balancingFutures.push_back(BgDDLoadRebalance(&self, i, SERVER_KNOBS->PRIORITY_REBALANCE_OVERUTILIZED_TEAM));
|
||||
// balancingFutures.push_back(BgDDLoadRebalance(&self, i, SERVER_KNOBS->PRIORITY_REBALANCE_UNDERUTILIZED_TEAM));
|
||||
// balancingFutures.push_back(BgDDLoadRebalance(&self, i, DataMovementReason::REBALANCE_OVERUTILIZED_TEAM));
|
||||
// balancingFutures.push_back(BgDDLoadRebalance(&self, i, DataMovementReason::REBALANCE_UNDERUTILIZED_TEAM));
|
||||
if (SERVER_KNOBS->READ_SAMPLING_ENABLED) {
|
||||
balancingFutures.push_back(BgDDLoadRebalance(&self, i, DataMovementReason::REBALANCE_READ_OVERUTIL_TEAM));
|
||||
balancingFutures.push_back(BgDDLoadRebalance(&self, i, DataMovementReason::REBALANCE_READ_UNDERUTIL_TEAM));
|
||||
|
|
|
@ -524,12 +524,12 @@ ACTOR Future<Void> shardSplitter(DataDistributionTracker* self,
|
|||
for (int i = 0; i < skipRange; i++) {
|
||||
KeyRangeRef r(splitKeys[i], splitKeys[i + 1]);
|
||||
self->shardsAffectedByTeamFailure->defineShard(r);
|
||||
self->output.send(RelocateShard(r, SERVER_KNOBS->PRIORITY_SPLIT_SHARD, RelocateReason::OTHER));
|
||||
self->output.send(RelocateShard(r, DataMovementReason::SPLIT_SHARD, RelocateReason::OTHER));
|
||||
}
|
||||
for (int i = numShards - 1; i > skipRange; i--) {
|
||||
KeyRangeRef r(splitKeys[i], splitKeys[i + 1]);
|
||||
self->shardsAffectedByTeamFailure->defineShard(r);
|
||||
self->output.send(RelocateShard(r, SERVER_KNOBS->PRIORITY_SPLIT_SHARD, RelocateReason::OTHER));
|
||||
self->output.send(RelocateShard(r, DataMovementReason::SPLIT_SHARD, RelocateReason::OTHER));
|
||||
}
|
||||
|
||||
self->sizeChanges.add(changeSizes(self, keys, shardSize->get().get().metrics.bytes));
|
||||
|
@ -675,7 +675,7 @@ Future<Void> shardMerger(DataDistributionTracker* self,
|
|||
}
|
||||
restartShardTrackers(self, mergeRange, ShardMetrics(endingStats, lastLowBandwidthStartTime, shardCount));
|
||||
self->shardsAffectedByTeamFailure->defineShard(mergeRange);
|
||||
self->output.send(RelocateShard(mergeRange, SERVER_KNOBS->PRIORITY_MERGE_SHARD, RelocateReason::OTHER));
|
||||
self->output.send(RelocateShard(mergeRange, DataMovementReason::MERGE_SHARD, RelocateReason::OTHER));
|
||||
|
||||
// We are about to be cancelled by the call to restartShardTrackers
|
||||
return Void();
|
||||
|
@ -1189,8 +1189,14 @@ void ShardsAffectedByTeamFailure::finishMove(KeyRangeRef keys) {
|
|||
}
|
||||
}
|
||||
|
||||
void ShardsAffectedByTeamFailure::setCheckMode(CheckMode mode) {
|
||||
checkMode = mode;
|
||||
}
|
||||
|
||||
void ShardsAffectedByTeamFailure::check() const {
|
||||
if (EXPENSIVE_VALIDATION) {
|
||||
if (checkMode == CheckMode::ForceNoCheck)
|
||||
return;
|
||||
if (EXPENSIVE_VALIDATION || checkMode == CheckMode::ForceCheck) {
|
||||
for (auto t = team_shards.begin(); t != team_shards.end(); ++t) {
|
||||
auto i = shard_teams.rangeContaining(t->second.begin);
|
||||
if (i->range() != t->second || !std::count(i->value().first.begin(), i->value().first.end(), t->first)) {
|
||||
|
|
|
@ -79,6 +79,10 @@ static_assert((ROCKSDB_MAJOR == 6 && ROCKSDB_MINOR == 27) ? ROCKSDB_PATCH >= 3 :
|
|||
namespace {
|
||||
using rocksdb::BackgroundErrorReason;
|
||||
|
||||
struct SharedRocksDBState {
|
||||
bool closing = false;
|
||||
};
|
||||
|
||||
// Returns string representation of RocksDB background error reason.
|
||||
// Error reason code:
|
||||
// https://github.com/facebook/rocksdb/blob/12d798ac06bcce36be703b057d5f5f4dab3b270c/include/rocksdb/listener.h#L125
|
||||
|
@ -737,6 +741,7 @@ ACTOR Future<Void> flowLockLogger(UID id, const FlowLock* readLock, const FlowLo
|
|||
}
|
||||
|
||||
ACTOR Future<Void> rocksDBMetricLogger(UID id,
|
||||
std::shared_ptr<SharedRocksDBState> sharedState,
|
||||
std::shared_ptr<rocksdb::Statistics> statistics,
|
||||
std::shared_ptr<PerfContextMetrics> perfContextMetrics,
|
||||
rocksdb::DB* db,
|
||||
|
@ -780,6 +785,7 @@ ACTOR Future<Void> rocksDBMetricLogger(UID id,
|
|||
{ "CountIterSkippedKeys", rocksdb::NUMBER_ITER_SKIP, 0 },
|
||||
|
||||
};
|
||||
|
||||
state std::vector<std::pair<const char*, std::string>> intPropertyStats = {
|
||||
{ "NumImmutableMemtables", rocksdb::DB::Properties::kNumImmutableMemTable },
|
||||
{ "NumImmutableMemtablesFlushed", rocksdb::DB::Properties::kNumImmutableMemTableFlushed },
|
||||
|
@ -823,6 +829,9 @@ ACTOR Future<Void> rocksDBMetricLogger(UID id,
|
|||
|
||||
loop {
|
||||
wait(delay(SERVER_KNOBS->ROCKSDB_METRICS_DELAY));
|
||||
if (sharedState->closing) {
|
||||
break;
|
||||
}
|
||||
TraceEvent e("RocksDBMetrics", id);
|
||||
uint64_t stat;
|
||||
for (auto& [name, ticker, cum] : tickerStats) {
|
||||
|
@ -873,6 +882,8 @@ ACTOR Future<Void> rocksDBMetricLogger(UID id,
|
|||
perfContextMetrics->log(true);
|
||||
}
|
||||
}
|
||||
|
||||
return Void();
|
||||
}
|
||||
|
||||
void logRocksDBError(UID id,
|
||||
|
@ -921,6 +932,8 @@ struct RocksDBKeyValueStore : IKeyValueStore {
|
|||
|
||||
DB& db;
|
||||
CF& cf;
|
||||
std::unordered_set<rocksdb::ColumnFamilyHandle*> cfHandles;
|
||||
|
||||
UID id;
|
||||
std::shared_ptr<rocksdb::RateLimiter> rateLimiter;
|
||||
std::shared_ptr<ReadIteratorPool> readIterPool;
|
||||
|
@ -954,15 +967,10 @@ struct RocksDBKeyValueStore : IKeyValueStore {
|
|||
}
|
||||
}
|
||||
|
||||
~Writer() override {
|
||||
if (db) {
|
||||
delete db;
|
||||
}
|
||||
}
|
||||
|
||||
void init() override {}
|
||||
|
||||
struct OpenAction : TypedAction<Writer, OpenAction> {
|
||||
std::shared_ptr<SharedRocksDBState> sharedState;
|
||||
std::string path;
|
||||
ThreadReturnPromise<Void> done;
|
||||
Optional<Future<Void>>& metrics;
|
||||
|
@ -970,14 +978,15 @@ struct RocksDBKeyValueStore : IKeyValueStore {
|
|||
const FlowLock* fetchLock;
|
||||
std::shared_ptr<RocksDBErrorListener> errorListener;
|
||||
Counters& counters;
|
||||
OpenAction(std::string path,
|
||||
OpenAction(std::shared_ptr<SharedRocksDBState> sharedState,
|
||||
std::string path,
|
||||
Optional<Future<Void>>& metrics,
|
||||
const FlowLock* readLock,
|
||||
const FlowLock* fetchLock,
|
||||
std::shared_ptr<RocksDBErrorListener> errorListener,
|
||||
Counters& counters)
|
||||
: path(std::move(path)), metrics(metrics), readLock(readLock), fetchLock(fetchLock),
|
||||
errorListener(errorListener), counters(counters) {}
|
||||
: sharedState(sharedState), path(std::move(path)), metrics(metrics), readLock(readLock),
|
||||
fetchLock(fetchLock), errorListener(errorListener), counters(counters) {}
|
||||
|
||||
double getTimeEstimate() const override { return SERVER_KNOBS->COMMIT_TIME_ESTIMATE; }
|
||||
};
|
||||
|
@ -1004,6 +1013,7 @@ struct RocksDBKeyValueStore : IKeyValueStore {
|
|||
|
||||
std::vector<rocksdb::ColumnFamilyHandle*> handles;
|
||||
status = rocksdb::DB::Open(options, a.path, descriptors, &handles, &db);
|
||||
cfHandles.insert(handles.begin(), handles.end());
|
||||
|
||||
if (!status.ok()) {
|
||||
logRocksDBError(id, status, "Open");
|
||||
|
@ -1020,6 +1030,7 @@ struct RocksDBKeyValueStore : IKeyValueStore {
|
|||
|
||||
if (cf == nullptr) {
|
||||
status = db->CreateColumnFamily(cfOptions, SERVER_KNOBS->DEFAULT_FDB_ROCKSDB_COLUMN_FAMILY, &cf);
|
||||
cfHandles.insert(cf);
|
||||
if (!status.ok()) {
|
||||
logRocksDBError(id, status, "Open");
|
||||
a.done.sendError(statusToError(status));
|
||||
|
@ -1037,13 +1048,20 @@ struct RocksDBKeyValueStore : IKeyValueStore {
|
|||
// The current thread and main thread are same when the code runs in simulation.
|
||||
// blockUntilReady() is getting the thread into deadlock state, so directly calling
|
||||
// the metricsLogger.
|
||||
a.metrics = rocksDBMetricLogger(
|
||||
id, options.statistics, perfContextMetrics, db, readIterPool, &a.counters, cf) &&
|
||||
flowLockLogger(id, a.readLock, a.fetchLock) && refreshReadIteratorPool(readIterPool);
|
||||
a.metrics =
|
||||
rocksDBMetricLogger(
|
||||
id, a.sharedState, options.statistics, perfContextMetrics, db, readIterPool, &a.counters, cf) &&
|
||||
flowLockLogger(id, a.readLock, a.fetchLock) && refreshReadIteratorPool(readIterPool);
|
||||
} else {
|
||||
onMainThread([&] {
|
||||
a.metrics = rocksDBMetricLogger(
|
||||
id, options.statistics, perfContextMetrics, db, readIterPool, &a.counters, cf) &&
|
||||
a.metrics = rocksDBMetricLogger(id,
|
||||
a.sharedState,
|
||||
options.statistics,
|
||||
perfContextMetrics,
|
||||
db,
|
||||
readIterPool,
|
||||
&a.counters,
|
||||
cf) &&
|
||||
flowLockLogger(id, a.readLock, a.fetchLock) && refreshReadIteratorPool(readIterPool);
|
||||
return Future<bool>(true);
|
||||
}).blockUntilReady();
|
||||
|
@ -1182,6 +1200,12 @@ struct RocksDBKeyValueStore : IKeyValueStore {
|
|||
a.done.send(Void());
|
||||
return;
|
||||
}
|
||||
for (rocksdb::ColumnFamilyHandle* handle : cfHandles) {
|
||||
if (handle != nullptr) {
|
||||
db->DestroyColumnFamilyHandle(handle);
|
||||
}
|
||||
}
|
||||
cfHandles.clear();
|
||||
auto s = db->Close();
|
||||
if (!s.ok()) {
|
||||
logRocksDBError(id, s, "Close");
|
||||
|
@ -1547,35 +1571,9 @@ struct RocksDBKeyValueStore : IKeyValueStore {
|
|||
}
|
||||
};
|
||||
|
||||
DB db = nullptr;
|
||||
std::shared_ptr<PerfContextMetrics> perfContextMetrics;
|
||||
std::string path;
|
||||
rocksdb::ColumnFamilyHandle* defaultFdbCF = nullptr;
|
||||
UID id;
|
||||
Reference<IThreadPool> writeThread;
|
||||
Reference<IThreadPool> readThreads;
|
||||
std::shared_ptr<RocksDBErrorListener> errorListener;
|
||||
Future<Void> errorFuture;
|
||||
Promise<Void> closePromise;
|
||||
Future<Void> openFuture;
|
||||
std::unique_ptr<rocksdb::WriteBatch> writeBatch;
|
||||
Optional<Future<Void>> metrics;
|
||||
FlowLock readSemaphore;
|
||||
int numReadWaiters;
|
||||
FlowLock fetchSemaphore;
|
||||
int numFetchWaiters;
|
||||
std::shared_ptr<ReadIteratorPool> readIterPool;
|
||||
std::vector<std::unique_ptr<ThreadReturnPromiseStream<std::pair<std::string, double>>>> metricPromiseStreams;
|
||||
// ThreadReturnPromiseStream pair.first stores the histogram name and
|
||||
// pair.second stores the corresponding measured latency (seconds)
|
||||
Future<Void> actorErrorListener;
|
||||
Future<Void> collection;
|
||||
PromiseStream<Future<Void>> addActor;
|
||||
Counters counters;
|
||||
|
||||
explicit RocksDBKeyValueStore(const std::string& path, UID id)
|
||||
: path(path), id(id), perfContextMetrics(new PerfContextMetrics()),
|
||||
readIterPool(new ReadIteratorPool(id, db, defaultFdbCF)),
|
||||
: sharedState(std::make_shared<SharedRocksDBState>()), path(path), id(id),
|
||||
perfContextMetrics(new PerfContextMetrics()), readIterPool(new ReadIteratorPool(id, db, defaultFdbCF)),
|
||||
readSemaphore(SERVER_KNOBS->ROCKSDB_READ_QUEUE_SOFT_MAX),
|
||||
fetchSemaphore(SERVER_KNOBS->ROCKSDB_FETCH_QUEUE_SOFT_MAX),
|
||||
numReadWaiters(SERVER_KNOBS->ROCKSDB_READ_QUEUE_HARD_MAX - SERVER_KNOBS->ROCKSDB_READ_QUEUE_SOFT_MAX),
|
||||
|
@ -1730,6 +1728,8 @@ struct RocksDBKeyValueStore : IKeyValueStore {
|
|||
Future<Void> getError() const override { return errorFuture; }
|
||||
|
||||
ACTOR static void doClose(RocksDBKeyValueStore* self, bool deleteOnClose) {
|
||||
self->sharedState->closing = true;
|
||||
|
||||
// The metrics future retains a reference to the DB, so stop it before we delete it.
|
||||
self->metrics.reset();
|
||||
|
||||
|
@ -1740,8 +1740,12 @@ struct RocksDBKeyValueStore : IKeyValueStore {
|
|||
self->writeThread->post(a);
|
||||
wait(f);
|
||||
wait(self->writeThread->stop());
|
||||
if (self->closePromise.canBeSet())
|
||||
if (self->closePromise.canBeSet()) {
|
||||
self->closePromise.send(Void());
|
||||
}
|
||||
if (self->db != nullptr) {
|
||||
delete self->db;
|
||||
}
|
||||
delete self;
|
||||
}
|
||||
|
||||
|
@ -1765,7 +1769,7 @@ struct RocksDBKeyValueStore : IKeyValueStore {
|
|||
return openFuture;
|
||||
}
|
||||
auto a = std::make_unique<Writer::OpenAction>(
|
||||
path, metrics, &readSemaphore, &fetchSemaphore, errorListener, counters);
|
||||
this->sharedState, path, metrics, &readSemaphore, &fetchSemaphore, errorListener, counters);
|
||||
openFuture = a->done.getFuture();
|
||||
writeThread->post(a.release());
|
||||
return openFuture;
|
||||
|
@ -1978,6 +1982,33 @@ struct RocksDBKeyValueStore : IKeyValueStore {
|
|||
}
|
||||
return Void();
|
||||
}
|
||||
|
||||
DB db = nullptr;
|
||||
std::shared_ptr<SharedRocksDBState> sharedState;
|
||||
std::shared_ptr<PerfContextMetrics> perfContextMetrics;
|
||||
std::string path;
|
||||
rocksdb::ColumnFamilyHandle* defaultFdbCF = nullptr;
|
||||
UID id;
|
||||
Reference<IThreadPool> writeThread;
|
||||
Reference<IThreadPool> readThreads;
|
||||
std::shared_ptr<RocksDBErrorListener> errorListener;
|
||||
Future<Void> errorFuture;
|
||||
Promise<Void> closePromise;
|
||||
Future<Void> openFuture;
|
||||
std::unique_ptr<rocksdb::WriteBatch> writeBatch;
|
||||
Optional<Future<Void>> metrics;
|
||||
FlowLock readSemaphore;
|
||||
int numReadWaiters;
|
||||
FlowLock fetchSemaphore;
|
||||
int numFetchWaiters;
|
||||
std::shared_ptr<ReadIteratorPool> readIterPool;
|
||||
std::vector<std::unique_ptr<ThreadReturnPromiseStream<std::pair<std::string, double>>>> metricPromiseStreams;
|
||||
// ThreadReturnPromiseStream pair.first stores the histogram name and
|
||||
// pair.second stores the corresponding measured latency (seconds)
|
||||
Future<Void> actorErrorListener;
|
||||
Future<Void> collection;
|
||||
PromiseStream<Future<Void>> addActor;
|
||||
Counters counters;
|
||||
};
|
||||
|
||||
void RocksDBKeyValueStore::Writer::action(CheckpointAction& a) {
|
||||
|
@ -1987,7 +2018,7 @@ void RocksDBKeyValueStore::Writer::action(CheckpointAction& a) {
|
|||
.detail("Format", static_cast<int>(a.request.format))
|
||||
.detail("CheckpointDir", a.request.checkpointDir);
|
||||
|
||||
rocksdb::Checkpoint* checkpoint;
|
||||
rocksdb::Checkpoint* checkpoint = nullptr;
|
||||
rocksdb::Status s = rocksdb::Checkpoint::Create(db, &checkpoint);
|
||||
if (!s.ok()) {
|
||||
logRocksDBError(id, s, "Checkpoint");
|
||||
|
@ -2051,9 +2082,15 @@ void RocksDBKeyValueStore::Writer::action(CheckpointAction& a) {
|
|||
.detail("RocksSequenceNumber", debugCheckpointSeq)
|
||||
.detail("CheckpointDir", checkpointDir);
|
||||
} else {
|
||||
if (checkpoint != nullptr) {
|
||||
delete checkpoint;
|
||||
}
|
||||
throw not_implemented();
|
||||
}
|
||||
|
||||
if (checkpoint != nullptr) {
|
||||
delete checkpoint;
|
||||
}
|
||||
res.setState(CheckpointMetaData::Complete);
|
||||
a.reply.send(res);
|
||||
}
|
||||
|
@ -2081,6 +2118,8 @@ void RocksDBKeyValueStore::Writer::action(RestoreAction& a) {
|
|||
|
||||
if (cf != nullptr) {
|
||||
ASSERT(db->DropColumnFamily(cf).ok());
|
||||
db->DestroyColumnFamilyHandle(cf);
|
||||
cfHandles.erase(cf);
|
||||
}
|
||||
|
||||
rocksdb::ExportImportFilesMetaData metaData = getMetaData(a.checkpoints[0]);
|
||||
|
@ -2088,6 +2127,7 @@ void RocksDBKeyValueStore::Writer::action(RestoreAction& a) {
|
|||
importOptions.move_files = true;
|
||||
status = db->CreateColumnFamilyWithImport(
|
||||
getCFOptions(), SERVER_KNOBS->DEFAULT_FDB_ROCKSDB_COLUMN_FAMILY, importOptions, metaData, &cf);
|
||||
cfHandles.insert(cf);
|
||||
|
||||
if (!status.ok()) {
|
||||
logRocksDBError(id, status, "Restore");
|
||||
|
@ -2101,6 +2141,7 @@ void RocksDBKeyValueStore::Writer::action(RestoreAction& a) {
|
|||
} else if (format == RocksDB) {
|
||||
if (cf == nullptr) {
|
||||
status = db->CreateColumnFamily(getCFOptions(), SERVER_KNOBS->DEFAULT_FDB_ROCKSDB_COLUMN_FAMILY, &cf);
|
||||
cfHandles.insert(cf);
|
||||
TraceEvent("RocksDBServeRestoreRange", id)
|
||||
.detail("Path", a.path)
|
||||
.detail("Checkpoint", describe(a.checkpoints));
|
||||
|
@ -2217,7 +2258,7 @@ TEST_CASE("noSim/fdbserver/KeyValueStoreRocksDB/RocksDBBasic") {
|
|||
}
|
||||
|
||||
Future<Void> closed = kvStore->onClosed();
|
||||
kvStore->close();
|
||||
kvStore->dispose();
|
||||
wait(closed);
|
||||
|
||||
platform::eraseDirectoryRecursive(rocksDBTestDir);
|
||||
|
@ -2250,7 +2291,7 @@ TEST_CASE("noSim/fdbserver/KeyValueStoreRocksDB/RocksDBReopen") {
|
|||
ASSERT(Optional<Value>(LiteralStringRef("bar")) == val);
|
||||
|
||||
Future<Void> closed = kvStore->onClosed();
|
||||
kvStore->close();
|
||||
kvStore->dispose();
|
||||
wait(closed);
|
||||
|
||||
platform::eraseDirectoryRecursive(rocksDBTestDir);
|
||||
|
@ -2295,8 +2336,8 @@ TEST_CASE("noSim/fdbserver/KeyValueStoreRocksDB/CheckpointRestoreColumnFamily")
|
|||
std::vector<Future<Void>> closes;
|
||||
closes.push_back(kvStore->onClosed());
|
||||
closes.push_back(kvStoreCopy->onClosed());
|
||||
kvStore->close();
|
||||
kvStoreCopy->close();
|
||||
kvStore->dispose();
|
||||
kvStoreCopy->dispose();
|
||||
wait(waitForAll(closes));
|
||||
|
||||
platform::eraseDirectoryRecursive(rocksDBTestDir);
|
||||
|
@ -2346,7 +2387,7 @@ TEST_CASE("noSim/fdbserver/KeyValueStoreRocksDB/CheckpointRestoreKeyValues") {
|
|||
std::vector<Future<Void>> closes;
|
||||
closes.push_back(cpReader->close());
|
||||
closes.push_back(kvStore->onClosed());
|
||||
kvStore->close();
|
||||
kvStore->dispose();
|
||||
wait(waitForAll(closes));
|
||||
|
||||
platform::eraseDirectoryRecursive(rocksDBTestDir);
|
||||
|
|
|
@ -110,9 +110,9 @@ class RocksDBErrorListener : public rocksdb::EventListener {
|
|||
public:
|
||||
RocksDBErrorListener(){};
|
||||
void OnBackgroundError(rocksdb::BackgroundErrorReason reason, rocksdb::Status* bg_error) override {
|
||||
TraceEvent(SevError, "RocksDBBGError")
|
||||
TraceEvent(SevError, "ShardedRocksDBBGError")
|
||||
.detail("Reason", getErrorReason(reason))
|
||||
.detail("RocksDBSeverity", bg_error->severity())
|
||||
.detail("ShardedRocksDBSeverity", bg_error->severity())
|
||||
.detail("Status", bg_error->ToString());
|
||||
std::unique_lock<std::mutex> lock(mutex);
|
||||
if (!errorPromise.isValid())
|
||||
|
@ -186,8 +186,8 @@ std::vector<std::pair<KeyRange, std::string>> decodeShardMapping(const RangeResu
|
|||
|
||||
void logRocksDBError(const rocksdb::Status& status, const std::string& method) {
|
||||
auto level = status.IsTimedOut() ? SevWarn : SevError;
|
||||
TraceEvent e(level, "RocksDBError");
|
||||
e.detail("Error", status.ToString()).detail("Method", method).detail("RocksDBSeverity", status.severity());
|
||||
TraceEvent e(level, "ShardedRocksDBError");
|
||||
e.detail("Error", status.ToString()).detail("Method", method).detail("ShardedRocksDBSeverity", status.severity());
|
||||
if (status.IsIOError()) {
|
||||
e.detail("SubCode", status.subcode());
|
||||
}
|
||||
|
@ -219,7 +219,7 @@ const char* ShardOpToString(ShardOp op) {
|
|||
}
|
||||
}
|
||||
void logShardEvent(StringRef name, ShardOp op, Severity severity = SevInfo, const std::string& message = "") {
|
||||
TraceEvent e(severity, "KVSShardEvent");
|
||||
TraceEvent e(severity, "ShardedRocksKVSShardEvent");
|
||||
e.detail("Name", name).detail("Action", ShardOpToString(op));
|
||||
if (!message.empty()) {
|
||||
e.detail("Message", message);
|
||||
|
@ -230,7 +230,7 @@ void logShardEvent(StringRef name,
|
|||
ShardOp op,
|
||||
Severity severity = SevInfo,
|
||||
const std::string& message = "") {
|
||||
TraceEvent e(severity, "KVSShardEvent");
|
||||
TraceEvent e(severity, "ShardedRocksKVSShardEvent");
|
||||
e.detail("Name", name).detail("Action", ShardOpToString(op)).detail("Begin", range.begin).detail("End", range.end);
|
||||
if (message != "") {
|
||||
e.detail("Message", message);
|
||||
|
@ -343,7 +343,7 @@ public:
|
|||
ASSERT(cf);
|
||||
readRangeOptions.background_purge_on_iterator_cleanup = true;
|
||||
readRangeOptions.auto_prefix_mode = (SERVER_KNOBS->ROCKSDB_PREFIX_LEN > 0);
|
||||
TraceEvent(SevDebug, "ReadIteratorPool")
|
||||
TraceEvent(SevVerbose, "ShardedRocksReadIteratorPool")
|
||||
.detail("Path", path)
|
||||
.detail("KnobRocksDBReadRangeReuseIterators", SERVER_KNOBS->ROCKSDB_READ_RANGE_REUSE_ITERATORS)
|
||||
.detail("KnobRocksDBPrefixLen", SERVER_KNOBS->ROCKSDB_PREFIX_LEN);
|
||||
|
@ -425,7 +425,7 @@ private:
|
|||
ACTOR Future<Void> flowLockLogger(const FlowLock* readLock, const FlowLock* fetchLock) {
|
||||
loop {
|
||||
wait(delay(SERVER_KNOBS->ROCKSDB_METRICS_DELAY));
|
||||
TraceEvent e("RocksDBFlowLock");
|
||||
TraceEvent e("ShardedRocksDBFlowLock");
|
||||
e.detail("ReadAvailable", readLock->available());
|
||||
e.detail("ReadActivePermits", readLock->activePermits());
|
||||
e.detail("ReadWaiters", readLock->waiters());
|
||||
|
@ -588,13 +588,13 @@ public:
|
|||
if (rState->closing) {
|
||||
break;
|
||||
}
|
||||
TraceEvent(SevInfo, "KVSPhysialShardMetrics")
|
||||
TraceEvent(SevInfo, "ShardedRocksKVSPhysialShardMetrics")
|
||||
.detail("NumActiveShards", shardManager->numActiveShards())
|
||||
.detail("TotalPhysicalShards", shardManager->numPhysicalShards());
|
||||
}
|
||||
} catch (Error& e) {
|
||||
if (e.code() != error_code_actor_cancelled) {
|
||||
TraceEvent(SevError, "ShardMetricsLoggerError").errorUnsuppressed(e);
|
||||
TraceEvent(SevError, "ShardedRocksShardMetricsLoggerError").errorUnsuppressed(e);
|
||||
}
|
||||
}
|
||||
return Void();
|
||||
|
@ -602,7 +602,7 @@ public:
|
|||
|
||||
rocksdb::Status init() {
|
||||
// Open instance.
|
||||
TraceEvent(SevVerbose, "ShardManagerInitBegin", this->logId).detail("DataPath", path);
|
||||
TraceEvent(SevInfo, "ShardedRocksShardManagerInitBegin", this->logId).detail("DataPath", path);
|
||||
std::vector<std::string> columnFamilies;
|
||||
rocksdb::Options options = getOptions();
|
||||
rocksdb::Status status = rocksdb::DB::ListColumnFamilies(options, path, &columnFamilies);
|
||||
|
@ -632,6 +632,8 @@ public:
|
|||
}
|
||||
|
||||
if (foundMetadata) {
|
||||
TraceEvent(SevInfo, "ShardedRocksInitLoadPhysicalShards", this->logId)
|
||||
.detail("PhysicalShardCount", handles.size());
|
||||
for (auto handle : handles) {
|
||||
if (handle->GetName() == "kvs-metadata") {
|
||||
metadataShard = std::make_shared<PhysicalShard>(db, "kvs-metadata", handle);
|
||||
|
@ -639,7 +641,8 @@ public:
|
|||
physicalShards[handle->GetName()] = std::make_shared<PhysicalShard>(db, handle->GetName(), handle);
|
||||
}
|
||||
columnFamilyMap[handle->GetID()] = handle;
|
||||
TraceEvent(SevInfo, "ShardedRocskDB").detail("FoundShard", handle->GetName()).detail("Action", "Init");
|
||||
TraceEvent(SevVerbose, "ShardedRocksInitPhysicalShard", this->logId)
|
||||
.detail("PhysicalShard", handle->GetName());
|
||||
}
|
||||
RangeResult metadata;
|
||||
readRangeInDb(metadataShard.get(), prefixRange(shardMappingPrefix), UINT16_MAX, UINT16_MAX, &metadata);
|
||||
|
@ -647,7 +650,7 @@ public:
|
|||
std::vector<std::pair<KeyRange, std::string>> mapping = decodeShardMapping(metadata, shardMappingPrefix);
|
||||
|
||||
for (const auto& [range, name] : mapping) {
|
||||
TraceEvent(SevDebug, "ShardedRocksLoadPhysicalShard", this->logId)
|
||||
TraceEvent(SevVerbose, "ShardedRocksLoadRange", this->logId)
|
||||
.detail("Range", range)
|
||||
.detail("PhysicalShard", name);
|
||||
auto it = physicalShards.find(name);
|
||||
|
@ -662,10 +665,10 @@ public:
|
|||
activePhysicalShardIds.emplace(name);
|
||||
}
|
||||
// TODO: remove unused column families.
|
||||
|
||||
} else {
|
||||
// DB is opened with default shard.
|
||||
ASSERT(handles.size() == 1);
|
||||
|
||||
// Add SpecialKeys range. This range should not be modified.
|
||||
std::shared_ptr<PhysicalShard> defaultShard = std::make_shared<PhysicalShard>(db, "default", handles[0]);
|
||||
columnFamilyMap[defaultShard->cf->GetID()] = defaultShard->cf;
|
||||
|
@ -688,7 +691,7 @@ public:
|
|||
return status;
|
||||
}
|
||||
metadataShard->readIterPool->update();
|
||||
TraceEvent(SevInfo, "InitializeMetaDataShard", this->logId)
|
||||
TraceEvent(SevInfo, "ShardedRocksInitializeMetaDataShard", this->logId)
|
||||
.detail("MetadataShardCF", metadataShard->cf->GetID());
|
||||
}
|
||||
physicalShards["kvs-metadata"] = metadataShard;
|
||||
|
@ -696,7 +699,7 @@ public:
|
|||
writeBatch = std::make_unique<rocksdb::WriteBatch>();
|
||||
dirtyShards = std::make_unique<std::set<PhysicalShard*>>();
|
||||
|
||||
TraceEvent(SevDebug, "ShardManagerInitEnd", this->logId).detail("DataPath", path);
|
||||
TraceEvent(SevInfo, "ShardedRocksShardManagerInitEnd", this->logId).detail("DataPath", path);
|
||||
return status;
|
||||
}
|
||||
|
||||
|
@ -712,7 +715,7 @@ public:
|
|||
|
||||
for (auto it = rangeIterator.begin(); it != rangeIterator.end(); ++it) {
|
||||
if (it.value() == nullptr) {
|
||||
TraceEvent(SevDebug, "ShardedRocksDB")
|
||||
TraceEvent(SevVerbose, "ShardedRocksDB")
|
||||
.detail("Info", "ShardNotFound")
|
||||
.detail("BeginKey", range.begin)
|
||||
.detail("EndKey", range.end);
|
||||
|
@ -724,9 +727,10 @@ public:
|
|||
}
|
||||
|
||||
PhysicalShard* addRange(KeyRange range, std::string id) {
|
||||
TraceEvent(SevVerbose, "ShardedRocksAddRangeBegin", this->logId)
|
||||
TraceEvent(SevInfo, "ShardedRocksAddRangeBegin", this->logId)
|
||||
.detail("Range", range)
|
||||
.detail("PhysicalShardID", id);
|
||||
|
||||
// Newly added range should not overlap with any existing range.
|
||||
auto ranges = dataShardMap.intersectingRanges(range);
|
||||
|
||||
|
@ -750,7 +754,7 @@ public:
|
|||
|
||||
validate();
|
||||
|
||||
TraceEvent(SevVerbose, "ShardedRocksAddRangeEnd", this->logId)
|
||||
TraceEvent(SevInfo, "ShardedRocksAddRangeEnd", this->logId)
|
||||
.detail("Range", range)
|
||||
.detail("PhysicalShardID", id);
|
||||
|
||||
|
@ -758,7 +762,7 @@ public:
|
|||
}
|
||||
|
||||
std::vector<std::string> removeRange(KeyRange range) {
|
||||
TraceEvent(SevVerbose, "ShardedRocksRemoveRangeBegin", this->logId).detail("Range", range);
|
||||
TraceEvent(SevInfo, "ShardedRocksRemoveRangeBegin", this->logId).detail("Range", range);
|
||||
|
||||
std::vector<std::string> shardIds;
|
||||
|
||||
|
@ -796,6 +800,7 @@ public:
|
|||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Range modification could result in more than one segments. Remove the original segment key here.
|
||||
existingShard->dataShards.erase(shardRange.begin.toString());
|
||||
if (shardRange.begin < range.begin) {
|
||||
|
@ -826,7 +831,7 @@ public:
|
|||
|
||||
validate();
|
||||
|
||||
TraceEvent(SevVerbose, "ShardedRocksRemoveRangeEnd", this->logId).detail("Range", range);
|
||||
TraceEvent(SevInfo, "ShardedRocksRemoveRangeEnd", this->logId).detail("Range", range);
|
||||
|
||||
return shardIds;
|
||||
}
|
||||
|
@ -849,7 +854,7 @@ public:
|
|||
TraceEvent(SevError, "ShardedRocksDB").detail("Error", "write to non-exist shard").detail("WriteKey", key);
|
||||
return;
|
||||
}
|
||||
TraceEvent(SevVerbose, "ShardManagerPut", this->logId)
|
||||
TraceEvent(SevVerbose, "ShardedRocksShardManagerPut", this->logId)
|
||||
.detail("WriteKey", key)
|
||||
.detail("Value", value)
|
||||
.detail("MapRange", it.range())
|
||||
|
@ -859,7 +864,9 @@ public:
|
|||
ASSERT(dirtyShards != nullptr);
|
||||
writeBatch->Put(it.value()->physicalShard->cf, toSlice(key), toSlice(value));
|
||||
dirtyShards->insert(it.value()->physicalShard);
|
||||
TraceEvent(SevVerbose, "ShardManagerPutEnd", this->logId).detail("WriteKey", key).detail("Value", value);
|
||||
TraceEvent(SevVerbose, "ShardedRocksShardManagerPutEnd", this->logId)
|
||||
.detail("WriteKey", key)
|
||||
.detail("Value", value);
|
||||
}
|
||||
|
||||
void clear(KeyRef key) {
|
||||
|
@ -884,7 +891,7 @@ public:
|
|||
}
|
||||
|
||||
void persistRangeMapping(KeyRangeRef range, bool isAdd) {
|
||||
TraceEvent(SevDebug, "ShardedRocksDB")
|
||||
TraceEvent(SevDebug, "ShardedRocksDB", this->logId)
|
||||
.detail("Info", "RangeToPersist")
|
||||
.detail("BeginKey", range.begin)
|
||||
.detail("EndKey", range.end);
|
||||
|
@ -902,7 +909,7 @@ public:
|
|||
writeBatch->Put(metadataShard->cf,
|
||||
getShardMappingKey(it.range().begin, shardMappingPrefix),
|
||||
it.value()->physicalShard->id);
|
||||
TraceEvent(SevDebug, "ShardedRocksDB")
|
||||
TraceEvent(SevDebug, "ShardedRocksDB", this->logId)
|
||||
.detail("Action", "PersistRangeMapping")
|
||||
.detail("BeginKey", it.range().begin)
|
||||
.detail("EndKey", it.range().end)
|
||||
|
@ -911,7 +918,7 @@ public:
|
|||
} else {
|
||||
// Empty range.
|
||||
writeBatch->Put(metadataShard->cf, getShardMappingKey(it.range().begin, shardMappingPrefix), "");
|
||||
TraceEvent(SevDebug, "ShardedRocksDB")
|
||||
TraceEvent(SevDebug, "ShardedRocksDB", this->logId)
|
||||
.detail("Action", "PersistRangeMapping")
|
||||
.detail("BeginKey", it.range().begin)
|
||||
.detail("EndKey", it.range().end)
|
||||
|
@ -921,7 +928,7 @@ public:
|
|||
}
|
||||
} else {
|
||||
writeBatch->Put(metadataShard->cf, getShardMappingKey(range.begin, shardMappingPrefix), "");
|
||||
TraceEvent(SevDebug, "ShardedRocksDB")
|
||||
TraceEvent(SevDebug, "ShardedRocksDB", this->logId)
|
||||
.detail("Action", "PersistRangeMapping")
|
||||
.detail("RemoveRange", "True")
|
||||
.detail("BeginKey", range.begin)
|
||||
|
@ -972,7 +979,7 @@ public:
|
|||
if (!s.ok()) {
|
||||
logRocksDBError(s, "DestroyDB");
|
||||
}
|
||||
TraceEvent("RocksDB").detail("Info", "DBDestroyed");
|
||||
TraceEvent("ShardedRocksDB", this->logId).detail("Info", "DBDestroyed");
|
||||
}
|
||||
|
||||
rocksdb::DB* getDb() const { return db; }
|
||||
|
@ -997,9 +1004,9 @@ public:
|
|||
}
|
||||
|
||||
void validate() {
|
||||
TraceEvent(SevVerbose, "ValidateShardManager", this->logId);
|
||||
TraceEvent(SevVerbose, "ShardedRocksValidateShardManager", this->logId);
|
||||
for (auto s = dataShardMap.ranges().begin(); s != dataShardMap.ranges().end(); ++s) {
|
||||
TraceEvent e(SevVerbose, "ValidateDataShardMap", this->logId);
|
||||
TraceEvent e(SevVerbose, "ShardedRocksValidateDataShardMap", this->logId);
|
||||
e.detail("Range", s->range());
|
||||
const DataShard* shard = s->value();
|
||||
e.detail("ShardAddress", reinterpret_cast<std::uintptr_t>(shard));
|
||||
|
@ -1008,6 +1015,13 @@ public:
|
|||
} else {
|
||||
e.detail("Shard", "Empty");
|
||||
}
|
||||
if (shard != nullptr) {
|
||||
ASSERT(shard->range == static_cast<KeyRangeRef>(s->range()));
|
||||
ASSERT(shard->physicalShard != nullptr);
|
||||
auto it = shard->physicalShard->dataShards.find(shard->range.begin.toString());
|
||||
ASSERT(it != shard->physicalShard->dataShards.end());
|
||||
ASSERT(it->second.get() == shard);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1338,7 +1352,7 @@ std::shared_ptr<rocksdb::Statistics> RocksDBMetrics::getStatsObjForRocksDB() {
|
|||
}
|
||||
|
||||
void RocksDBMetrics::logStats(rocksdb::DB* db) {
|
||||
TraceEvent e("RocksDBMetrics");
|
||||
TraceEvent e("ShardedRocksDBMetrics");
|
||||
uint64_t stat;
|
||||
for (auto& [name, ticker, cumulation] : tickerStats) {
|
||||
stat = stats->getTickerCount(ticker);
|
||||
|
@ -1361,7 +1375,7 @@ void RocksDBMetrics::logStats(rocksdb::DB* db) {
|
|||
}
|
||||
|
||||
void RocksDBMetrics::logMemUsagePerShard(std::string shardName, rocksdb::DB* db) {
|
||||
TraceEvent e("RocksDBShardMemMetrics");
|
||||
TraceEvent e("ShardedRocksDBShardMemMetrics");
|
||||
uint64_t stat;
|
||||
ASSERT(db != nullptr);
|
||||
ASSERT(db->GetIntProperty(rocksdb::DB::Properties::kBlockCacheUsage, &stat));
|
||||
|
@ -1387,7 +1401,7 @@ void RocksDBMetrics::setPerfContext(int index) {
|
|||
}
|
||||
|
||||
void RocksDBMetrics::logPerfContext(bool ignoreZeroMetric) {
|
||||
TraceEvent e("RocksDBPerfContextMetrics");
|
||||
TraceEvent e("ShardedRocksDBPerfContextMetrics");
|
||||
e.setMaxEventLength(20000);
|
||||
for (auto& [name, metric, vals] : perfContextMetrics) {
|
||||
uint64_t s = 0;
|
||||
|
@ -1650,7 +1664,7 @@ struct ShardedRocksDBKeyValueStore : IKeyValueStore {
|
|||
return;
|
||||
}
|
||||
|
||||
TraceEvent(SevInfo, "RocksDB").detail("Method", "Open");
|
||||
TraceEvent(SevInfo, "ShardedRocksDB").detail("Method", "Open");
|
||||
a.done.send(Void());
|
||||
}
|
||||
|
||||
|
@ -1841,7 +1855,7 @@ struct ShardedRocksDBKeyValueStore : IKeyValueStore {
|
|||
} else {
|
||||
a.shardManager->closeAllShards();
|
||||
}
|
||||
TraceEvent(SevInfo, "RocksDB").detail("Method", "Close");
|
||||
TraceEvent(SevInfo, "ShardedRocksDB").detail("Method", "Close");
|
||||
a.done.send(Void());
|
||||
}
|
||||
};
|
||||
|
@ -1908,7 +1922,7 @@ struct ShardedRocksDBKeyValueStore : IKeyValueStore {
|
|||
traceBatch.get().addEvent("GetValueDebug", a.debugID.get().first(), "Reader.Before");
|
||||
}
|
||||
if (readBeginTime - a.startTime > readValueTimeout) {
|
||||
TraceEvent(SevWarn, "RocksDBError")
|
||||
TraceEvent(SevWarn, "ShardedRocksDBError")
|
||||
.detail("Error", "Read value request timedout")
|
||||
.detail("Method", "ReadValueAction")
|
||||
.detail("Timeout value", readValueTimeout);
|
||||
|
@ -1995,7 +2009,7 @@ struct ShardedRocksDBKeyValueStore : IKeyValueStore {
|
|||
"Reader.Before"); //.detail("TaskID", g_network->getCurrentTask());
|
||||
}
|
||||
if (readBeginTime - a.startTime > readValuePrefixTimeout) {
|
||||
TraceEvent(SevWarn, "RocksDBError")
|
||||
TraceEvent(SevWarn, "ShardedRocksDBError")
|
||||
.detail("Error", "Read value prefix request timedout")
|
||||
.detail("Method", "ReadValuePrefixAction")
|
||||
.detail("Timeout value", readValuePrefixTimeout);
|
||||
|
@ -2080,7 +2094,7 @@ struct ShardedRocksDBKeyValueStore : IKeyValueStore {
|
|||
rocksDBMetrics->getReadRangeQueueWaitHistogram(threadIndex)->sampleSeconds(readBeginTime - a.startTime);
|
||||
}
|
||||
if (readBeginTime - a.startTime > readRangeTimeout) {
|
||||
TraceEvent(SevWarn, "KVSReadTimeout")
|
||||
TraceEvent(SevWarn, "ShardedRocksKVSReadTimeout")
|
||||
.detail("Error", "Read range request timedout")
|
||||
.detail("Method", "ReadRangeAction")
|
||||
.detail("Timeout value", readRangeTimeout);
|
||||
|
@ -2127,10 +2141,6 @@ struct ShardedRocksDBKeyValueStore : IKeyValueStore {
|
|||
}
|
||||
}
|
||||
|
||||
Histogram::getHistogram(
|
||||
ROCKSDBSTORAGE_HISTOGRAM_GROUP, "ShardedRocksDBNumShardsInRangeRead"_sr, Histogram::Unit::countLinear)
|
||||
->sample(numShards);
|
||||
|
||||
result.more =
|
||||
(result.size() == a.rowLimit) || (result.size() == -a.rowLimit) || (accumulatedBytes >= a.byteLimit);
|
||||
if (result.more) {
|
||||
|
@ -2184,7 +2194,8 @@ struct ShardedRocksDBKeyValueStore : IKeyValueStore {
|
|||
readThreads = createGenericThreadPool();
|
||||
}
|
||||
writeThread->addThread(new Writer(id, 0, shardManager.getColumnFamilyMap(), rocksDBMetrics), "fdb-rocksdb-wr");
|
||||
TraceEvent("RocksDBReadThreads").detail("KnobRocksDBReadParallelism", SERVER_KNOBS->ROCKSDB_READ_PARALLELISM);
|
||||
TraceEvent("ShardedRocksDBReadThreads", id)
|
||||
.detail("KnobRocksDBReadParallelism", SERVER_KNOBS->ROCKSDB_READ_PARALLELISM);
|
||||
for (unsigned i = 0; i < SERVER_KNOBS->ROCKSDB_READ_PARALLELISM; ++i) {
|
||||
readThreads->addThread(new Reader(id, i, rocksDBMetrics), "fdb-rocksdb-re");
|
||||
}
|
||||
|
@ -2302,7 +2313,7 @@ struct ShardedRocksDBKeyValueStore : IKeyValueStore {
|
|||
auto* shard = shardManager.getDataShard(key);
|
||||
if (shard == nullptr || !shard->physicalShard->initialized()) {
|
||||
// TODO: read non-exist system key range should not cause an error.
|
||||
TraceEvent(SevWarnAlways, "ShardedRocksDB")
|
||||
TraceEvent(SevWarnAlways, "ShardedRocksDB", this->id)
|
||||
.detail("Detail", "Read non-exist key range")
|
||||
.detail("ReadKey", key);
|
||||
return Optional<Value>();
|
||||
|
@ -2330,7 +2341,7 @@ struct ShardedRocksDBKeyValueStore : IKeyValueStore {
|
|||
auto* shard = shardManager.getDataShard(key);
|
||||
if (shard == nullptr || !shard->physicalShard->initialized()) {
|
||||
// TODO: read non-exist system key range should not cause an error.
|
||||
TraceEvent(SevWarnAlways, "ShardedRocksDB")
|
||||
TraceEvent(SevWarnAlways, "ShardedRocksDB", this->id)
|
||||
.detail("Detail", "Read non-exist key range")
|
||||
.detail("ReadKey", key);
|
||||
return Optional<Value>();
|
||||
|
@ -2452,7 +2463,7 @@ IKeyValueStore* keyValueStoreShardedRocksDB(std::string const& path,
|
|||
#ifdef SSD_ROCKSDB_EXPERIMENTAL
|
||||
return new ShardedRocksDBKeyValueStore(path, logID);
|
||||
#else
|
||||
TraceEvent(SevError, "RocksDBEngineInitFailure").detail("Reason", "Built without RocksDB");
|
||||
TraceEvent(SevError, "ShardedRocksDBEngineInitFailure").detail("Reason", "Built without RocksDB");
|
||||
ASSERT(false);
|
||||
return nullptr;
|
||||
#endif // SSD_ROCKSDB_EXPERIMENTAL
|
||||
|
|
|
@ -21,7 +21,7 @@
|
|||
#include "fdbserver/RESTKmsConnector.h"
|
||||
|
||||
#include "fdbclient/FDBTypes.h"
|
||||
#include "fdbclient/HTTP.h"
|
||||
#include "fdbrpc/HTTP.h"
|
||||
#include "flow/IAsyncFile.h"
|
||||
#include "fdbserver/KmsConnectorInterface.h"
|
||||
#include "fdbserver/Knobs.h"
|
||||
|
|
|
@ -162,6 +162,7 @@ private:
|
|||
CF cf;
|
||||
Key begin;
|
||||
Key end;
|
||||
std::vector<rocksdb::ColumnFamilyHandle*> handles;
|
||||
double readRangeTimeout;
|
||||
std::unique_ptr<rocksdb::Iterator> cursor;
|
||||
};
|
||||
|
@ -233,7 +234,6 @@ void RocksDBCheckpointReader::Reader::action(RocksDBCheckpointReader::Reader::Op
|
|||
descriptors.push_back(rocksdb::ColumnFamilyDescriptor{ name, cfOptions });
|
||||
}
|
||||
|
||||
std::vector<rocksdb::ColumnFamilyHandle*> handles;
|
||||
status = rocksdb::DB::OpenForReadOnly(options, a.path, descriptors, &handles, &db);
|
||||
|
||||
if (!status.ok()) {
|
||||
|
@ -288,6 +288,14 @@ void RocksDBCheckpointReader::Reader::action(RocksDBCheckpointReader::Reader::Cl
|
|||
return;
|
||||
}
|
||||
|
||||
for (rocksdb::ColumnFamilyHandle* handle : handles) {
|
||||
if (handle != nullptr) {
|
||||
TraceEvent("RocksDBCheckpointReaderDestroyCF").detail("Path", a.path).detail("CF", handle->GetName());
|
||||
db->DestroyColumnFamilyHandle(handle);
|
||||
}
|
||||
}
|
||||
handles.clear();
|
||||
|
||||
rocksdb::Status s = db->Close();
|
||||
if (!s.ok()) {
|
||||
logRocksDBError(s, "Close");
|
||||
|
@ -385,6 +393,9 @@ ACTOR Future<Void> RocksDBCheckpointReader::doClose(RocksDBCheckpointReader* sel
|
|||
}
|
||||
|
||||
if (self != nullptr) {
|
||||
if (self->db != nullptr) {
|
||||
delete self->db;
|
||||
}
|
||||
delete self;
|
||||
}
|
||||
|
||||
|
|
|
@ -284,6 +284,13 @@ class TestConfig {
|
|||
if (attrib == "blobGranulesEnabled") {
|
||||
blobGranulesEnabled = strcmp(value.c_str(), "true") == 0;
|
||||
}
|
||||
if (attrib == "injectSSTargetedRestart") {
|
||||
injectTargetedSSRestart = strcmp(value.c_str(), "true") == 0;
|
||||
}
|
||||
|
||||
if (attrib == "injectSSDelay") {
|
||||
injectSSDelay = strcmp(value.c_str(), "true") == 0;
|
||||
}
|
||||
}
|
||||
|
||||
ifs.close();
|
||||
|
@ -334,6 +341,8 @@ public:
|
|||
bool allowDefaultTenant = true;
|
||||
bool allowDisablingTenants = true;
|
||||
bool allowCreatingTenants = true;
|
||||
bool injectTargetedSSRestart = false;
|
||||
bool injectSSDelay = false;
|
||||
|
||||
ConfigDBType getConfigDBType() const { return configDBType; }
|
||||
|
||||
|
@ -394,7 +403,10 @@ public:
|
|||
.add("allowDefaultTenant", &allowDefaultTenant)
|
||||
.add("allowDisablingTenants", &allowDisablingTenants)
|
||||
.add("allowCreatingTenants", &allowCreatingTenants)
|
||||
.add("randomlyRenameZoneId", &randomlyRenameZoneId);
|
||||
.add("randomlyRenameZoneId", &randomlyRenameZoneId)
|
||||
.add("randomlyRenameZoneId", &randomlyRenameZoneId)
|
||||
.add("injectTargetedSSRestart", &injectTargetedSSRestart)
|
||||
.add("injectSSDelay", &injectSSDelay);
|
||||
try {
|
||||
auto file = toml::parse(testFile);
|
||||
if (file.contains("configuration") && toml::find(file, "configuration").is_table()) {
|
||||
|
@ -1401,7 +1413,7 @@ void SimulationConfig::setDatacenters(const TestConfig& testConfig) {
|
|||
void SimulationConfig::setStorageEngine(const TestConfig& testConfig) {
|
||||
// Using [0, 4) to disable the RocksDB storage engine.
|
||||
// TODO: Figure out what is broken with the RocksDB engine in simulation.
|
||||
int storage_engine_type = deterministicRandom()->randomInt(0, 4);
|
||||
int storage_engine_type = deterministicRandom()->randomInt(0, 6);
|
||||
if (testConfig.storageEngineType.present()) {
|
||||
storage_engine_type = testConfig.storageEngineType.get();
|
||||
} else {
|
||||
|
@ -1409,7 +1421,7 @@ void SimulationConfig::setStorageEngine(const TestConfig& testConfig) {
|
|||
while (std::find(testConfig.storageEngineExcludeTypes.begin(),
|
||||
testConfig.storageEngineExcludeTypes.end(),
|
||||
storage_engine_type) != testConfig.storageEngineExcludeTypes.end()) {
|
||||
storage_engine_type = deterministicRandom()->randomInt(0, 5);
|
||||
storage_engine_type = deterministicRandom()->randomInt(0, 6);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1452,6 +1464,8 @@ void SimulationConfig::setStorageEngine(const TestConfig& testConfig) {
|
|||
TraceEvent(SevWarnAlways, "RocksDBNonDeterminism")
|
||||
.detail("Explanation", "The Sharded RocksDB storage engine is threaded and non-deterministic");
|
||||
noUnseed = true;
|
||||
auto& g_knobs = IKnobCollection::getMutableGlobalKnobCollection();
|
||||
g_knobs.setKnob("shard_encode_location_metadata", KnobValueRef::create(bool{ true }));
|
||||
break;
|
||||
}
|
||||
default:
|
||||
|
@ -2393,6 +2407,13 @@ ACTOR void setupAndRun(std::string dataFolder,
|
|||
testConfig.readFromConfig(testFile);
|
||||
g_simulator.hasDiffProtocolProcess = testConfig.startIncompatibleProcess;
|
||||
g_simulator.setDiffProtocol = false;
|
||||
if (testConfig.injectTargetedSSRestart && deterministicRandom()->random01() < 0.25) {
|
||||
g_simulator.injectTargetedSSRestartTime = 60.0 + 340.0 * deterministicRandom()->random01();
|
||||
}
|
||||
|
||||
if (testConfig.injectSSDelay && deterministicRandom()->random01() < 0.25) {
|
||||
g_simulator.injectSSDelayTime = 60.0 + 240.0 * deterministicRandom()->random01();
|
||||
}
|
||||
|
||||
// Build simulator allow list
|
||||
allowList.addTrustedSubnet("0.0.0.0/2"sv);
|
||||
|
@ -2406,6 +2427,7 @@ ACTOR void setupAndRun(std::string dataFolder,
|
|||
// https://github.com/apple/foundationdb/issues/5155
|
||||
if (std::string_view(testFile).find("restarting") != std::string_view::npos) {
|
||||
testConfig.storageEngineExcludeTypes.push_back(4);
|
||||
testConfig.storageEngineExcludeTypes.push_back(5);
|
||||
|
||||
// Disable the default tenant in restarting tests for now
|
||||
// TODO: persist the chosen default tenant in the restartInfo.ini file for the second test
|
||||
|
@ -2418,6 +2440,7 @@ ACTOR void setupAndRun(std::string dataFolder,
|
|||
// Re-enable the backup and restore related simulation tests when the tests are passing again.
|
||||
if (std::string_view(testFile).find("Backup") != std::string_view::npos) {
|
||||
testConfig.storageEngineExcludeTypes.push_back(4);
|
||||
testConfig.storageEngineExcludeTypes.push_back(5);
|
||||
}
|
||||
|
||||
// Disable the default tenant in backup and DR tests for now. This is because backup does not currently duplicate
|
||||
|
@ -2432,6 +2455,7 @@ ACTOR void setupAndRun(std::string dataFolder,
|
|||
// in the build.
|
||||
if (!rocksDBEnabled) {
|
||||
testConfig.storageEngineExcludeTypes.push_back(4);
|
||||
testConfig.storageEngineExcludeTypes.push_back(5);
|
||||
}
|
||||
|
||||
state ProtocolVersion protocolVersion = currentProtocolVersion;
|
||||
|
|
|
@ -742,6 +742,7 @@ ACTOR static Future<JsonBuilderObject> processStatusFetcher(
|
|||
std::vector<std::pair<GrvProxyInterface, EventMap>> grvProxies,
|
||||
std::vector<BlobWorkerInterface> blobWorkers,
|
||||
ServerCoordinators coordinators,
|
||||
std::vector<NetworkAddress> coordinatorAddresses,
|
||||
Database cx,
|
||||
Optional<DatabaseConfiguration> configuration,
|
||||
Optional<Key> healthyZone,
|
||||
|
@ -839,8 +840,7 @@ ACTOR static Future<JsonBuilderObject> processStatusFetcher(
|
|||
}
|
||||
}
|
||||
|
||||
std::vector<NetworkAddress> addressVec = wait(coordinators.ccr->getConnectionString().tryResolveHostnames());
|
||||
for (const auto& coordinator : addressVec) {
|
||||
for (const auto& coordinator : coordinatorAddresses) {
|
||||
roles.addCoordinatorRole(coordinator);
|
||||
}
|
||||
|
||||
|
@ -2751,6 +2751,9 @@ ACTOR Future<JsonBuilderObject> lockedStatusFetcher(Reference<AsyncVar<ServerDBI
|
|||
try {
|
||||
wait(tr.onError(e));
|
||||
} catch (Error& e) {
|
||||
if (e.code() == error_code_actor_cancelled)
|
||||
throw;
|
||||
|
||||
incomplete_reasons->insert(format("Unable to determine if database is locked (%s).", e.what()));
|
||||
break;
|
||||
}
|
||||
|
@ -3041,6 +3044,7 @@ ACTOR Future<StatusReply> clusterGetStatus(
|
|||
|
||||
statusObj["machines"] = machineStatusFetcher(mMetrics, workers, configuration, &status_incomplete_reasons);
|
||||
|
||||
state std::vector<NetworkAddress> coordinatorAddresses;
|
||||
if (configuration.present()) {
|
||||
// Do the latency probe by itself to avoid interference from other status activities
|
||||
state bool isAvailable = true;
|
||||
|
@ -3133,8 +3137,9 @@ ACTOR Future<StatusReply> clusterGetStatus(
|
|||
state std::vector<JsonBuilderObject> workerStatuses = wait(getAll(futures2));
|
||||
wait(success(primaryDCFO));
|
||||
|
||||
std::vector<NetworkAddress> coordinatorAddresses =
|
||||
wait(coordinators.ccr->getConnectionString().tryResolveHostnames());
|
||||
std::vector<NetworkAddress> addresses =
|
||||
wait(timeoutError(coordinators.ccr->getConnectionString().tryResolveHostnames(), 5.0));
|
||||
coordinatorAddresses = std::move(addresses);
|
||||
|
||||
int logFaultTolerance = 100;
|
||||
if (db->get().recoveryState >= RecoveryState::ACCEPTING_COMMITS) {
|
||||
|
@ -3275,6 +3280,7 @@ ACTOR Future<StatusReply> clusterGetStatus(
|
|||
grvProxies,
|
||||
blobWorkers,
|
||||
coordinators,
|
||||
coordinatorAddresses,
|
||||
cx,
|
||||
configuration,
|
||||
loadResult.present() ? loadResult.get().healthyZone : Optional<Key>(),
|
||||
|
|
|
@ -518,7 +518,8 @@ struct LogData : NonCopyable, public ReferenceCounted<LogData> {
|
|||
|
||||
Deque<std::pair<Version, Standalone<VectorRef<uint8_t>>>> messageBlocks;
|
||||
std::vector<std::vector<Reference<TagData>>> tag_data; // tag.locality | tag.id
|
||||
int unpoppedRecoveredTags;
|
||||
int unpoppedRecoveredTagCount;
|
||||
std::set<Tag> unpoppedRecoveredTags;
|
||||
std::map<Tag, Promise<Void>> waitingTags;
|
||||
|
||||
Reference<TagData> getTagData(Tag tag) {
|
||||
|
@ -642,7 +643,7 @@ struct LogData : NonCopyable, public ReferenceCounted<LogData> {
|
|||
std::string context)
|
||||
: stopped(false), initialized(false), queueCommittingVersion(0), knownCommittedVersion(0),
|
||||
durableKnownCommittedVersion(0), minKnownCommittedVersion(0), queuePoppedVersion(0), minPoppedTagVersion(0),
|
||||
minPoppedTag(invalidTag), unpoppedRecoveredTags(0), cc("TLog", interf.id().toString()),
|
||||
minPoppedTag(invalidTag), unpoppedRecoveredTagCount(0), cc("TLog", interf.id().toString()),
|
||||
bytesInput("BytesInput", cc), bytesDurable("BytesDurable", cc), blockingPeeks("BlockingPeeks", cc),
|
||||
blockingPeekTimeouts("BlockingPeekTimeouts", cc), emptyPeeks("EmptyPeeks", cc),
|
||||
nonEmptyPeeks("NonEmptyPeeks", cc), logId(interf.id()), protocolVersion(protocolVersion),
|
||||
|
@ -1196,14 +1197,20 @@ ACTOR Future<Void> tLogPopCore(TLogData* self, Tag inputTag, Version to, Referen
|
|||
|
||||
if (tagData->unpoppedRecovered && upTo > logData->recoveredAt) {
|
||||
tagData->unpoppedRecovered = false;
|
||||
logData->unpoppedRecoveredTags--;
|
||||
logData->unpoppedRecoveredTagCount--;
|
||||
logData->unpoppedRecoveredTags.erase(tag);
|
||||
TraceEvent("TLogPoppedTag", logData->logId)
|
||||
.detail("Tags", logData->unpoppedRecoveredTags)
|
||||
.detail("Tags", logData->unpoppedRecoveredTagCount)
|
||||
.detail("Tag", tag.toString())
|
||||
.detail("DurableKCVer", logData->durableKnownCommittedVersion)
|
||||
.detail("RecoveredAt", logData->recoveredAt);
|
||||
if (logData->unpoppedRecoveredTags == 0 && logData->durableKnownCommittedVersion >= logData->recoveredAt &&
|
||||
logData->recoveryComplete.canBeSet()) {
|
||||
.detail("RecoveredAt", logData->recoveredAt)
|
||||
.detail("UnpoppedTags", describe(logData->unpoppedRecoveredTags));
|
||||
if (logData->unpoppedRecoveredTagCount == 0 &&
|
||||
logData->durableKnownCommittedVersion >= logData->recoveredAt && logData->recoveryComplete.canBeSet()) {
|
||||
TraceEvent("TLogRecoveryComplete", logData->logId)
|
||||
.detail("Tags", logData->unpoppedRecoveredTagCount)
|
||||
.detail("DurableKCVer", logData->durableKnownCommittedVersion)
|
||||
.detail("RecoveredAt", logData->recoveredAt);
|
||||
logData->recoveryComplete.send(Void());
|
||||
}
|
||||
}
|
||||
|
@ -2153,10 +2160,10 @@ ACTOR Future<Void> doQueueCommit(TLogData* self,
|
|||
ASSERT(ver > logData->queueCommittedVersion.get());
|
||||
|
||||
logData->durableKnownCommittedVersion = knownCommittedVersion;
|
||||
if (logData->unpoppedRecoveredTags == 0 && knownCommittedVersion >= logData->recoveredAt &&
|
||||
if (logData->unpoppedRecoveredTagCount == 0 && knownCommittedVersion >= logData->recoveredAt &&
|
||||
logData->recoveryComplete.canBeSet()) {
|
||||
TraceEvent("TLogRecoveryComplete", logData->logId)
|
||||
.detail("Tags", logData->unpoppedRecoveredTags)
|
||||
.detail("Tags", logData->unpoppedRecoveredTagCount)
|
||||
.detail("DurableKCVer", logData->durableKnownCommittedVersion)
|
||||
.detail("RecoveredAt", logData->recoveredAt);
|
||||
logData->recoveryComplete.send(Void());
|
||||
|
@ -3408,7 +3415,8 @@ ACTOR Future<Void> tLogStart(TLogData* self, InitializeTLogRequest req, Locality
|
|||
logData->queueCommittedVersion.set(logData->unrecoveredBefore - 1);
|
||||
logData->version.set(logData->unrecoveredBefore - 1);
|
||||
|
||||
logData->unpoppedRecoveredTags = req.allTags.size();
|
||||
logData->unpoppedRecoveredTagCount = req.allTags.size();
|
||||
logData->unpoppedRecoveredTags = std::set<Tag>(req.allTags.begin(), req.allTags.end());
|
||||
wait(initPersistentState(self, logData) || logData->removed);
|
||||
|
||||
TraceEvent("TLogRecover", self->dbgid)
|
||||
|
|
|
@ -69,6 +69,7 @@
|
|||
#include "fdbserver/TesterInterface.actor.h"
|
||||
#include "fdbserver/WorkerInterface.actor.h"
|
||||
#include "fdbserver/pubsub.h"
|
||||
#include "fdbserver/OnDemandStore.h"
|
||||
#include "fdbserver/workloads/workloads.actor.h"
|
||||
#include "flow/ArgParseUtil.h"
|
||||
#include "flow/DeterministicRandom.h"
|
||||
|
@ -111,7 +112,8 @@ enum {
|
|||
OPT_TRACECLOCK, OPT_NUMTESTERS, OPT_DEVHELP, OPT_PRINT_CODE_PROBES, OPT_ROLLSIZE, OPT_MAXLOGS, OPT_MAXLOGSSIZE, OPT_KNOB, OPT_UNITTESTPARAM, OPT_TESTSERVERS, OPT_TEST_ON_SERVERS, OPT_METRICSCONNFILE,
|
||||
OPT_METRICSPREFIX, OPT_LOGGROUP, OPT_LOCALITY, OPT_IO_TRUST_SECONDS, OPT_IO_TRUST_WARN_ONLY, OPT_FILESYSTEM, OPT_PROFILER_RSS_SIZE, OPT_KVFILE,
|
||||
OPT_TRACE_FORMAT, OPT_WHITELIST_BINPATH, OPT_BLOB_CREDENTIAL_FILE, OPT_CONFIG_PATH, OPT_USE_TEST_CONFIG_DB, OPT_FAULT_INJECTION, OPT_PROFILER, OPT_PRINT_SIMTIME,
|
||||
OPT_FLOW_PROCESS_NAME, OPT_FLOW_PROCESS_ENDPOINT, OPT_IP_TRUSTED_MASK, OPT_KMS_CONN_DISCOVERY_URL_FILE, OPT_KMS_CONN_VALIDATION_TOKEN_DETAILS, OPT_KMS_CONN_GET_ENCRYPTION_KEYS_ENDPOINT
|
||||
OPT_FLOW_PROCESS_NAME, OPT_FLOW_PROCESS_ENDPOINT, OPT_IP_TRUSTED_MASK, OPT_KMS_CONN_DISCOVERY_URL_FILE, OPT_KMS_CONN_VALIDATION_TOKEN_DETAILS, OPT_KMS_CONN_GET_ENCRYPTION_KEYS_ENDPOINT,
|
||||
OPT_NEW_CLUSTER_KEY
|
||||
};
|
||||
|
||||
CSimpleOpt::SOption g_rgOptions[] = {
|
||||
|
@ -205,9 +207,11 @@ CSimpleOpt::SOption g_rgOptions[] = {
|
|||
{ OPT_FLOW_PROCESS_NAME, "--process-name", SO_REQ_SEP },
|
||||
{ OPT_FLOW_PROCESS_ENDPOINT, "--process-endpoint", SO_REQ_SEP },
|
||||
{ OPT_IP_TRUSTED_MASK, "--trusted-subnet-", SO_REQ_SEP },
|
||||
{ OPT_NEW_CLUSTER_KEY, "--new-cluster-key", SO_REQ_SEP },
|
||||
{ OPT_KMS_CONN_DISCOVERY_URL_FILE, "--discover-kms-conn-url-file", SO_REQ_SEP},
|
||||
{ OPT_KMS_CONN_VALIDATION_TOKEN_DETAILS, "--kms-conn-validation-token-details", SO_REQ_SEP},
|
||||
{ OPT_KMS_CONN_GET_ENCRYPTION_KEYS_ENDPOINT, "--kms-conn-get-encryption-keys-endpoint", SO_REQ_SEP},
|
||||
|
||||
TLS_OPTION_FLAGS,
|
||||
SO_END_OF_OPTIONS
|
||||
};
|
||||
|
@ -735,6 +739,17 @@ static void printUsage(const char* name, bool devhelp) {
|
|||
" - FDB_DUMP_STARTKEY: start key for the dump, default is empty\n"
|
||||
" - FDB_DUMP_ENDKEY: end key for the dump, default is \"\\xff\\xff\"\n"
|
||||
" - FDB_DUMP_DEBUG: print key-values to stderr in escaped format\n");
|
||||
|
||||
printf(
|
||||
"\n"
|
||||
"The 'changedescription' role replaces the old cluster key in all coordinators' data file to the specified "
|
||||
"new cluster key,\n"
|
||||
"which is passed in by '--new-cluster-key'. In particular, cluster key means '[description]:[id]'.\n"
|
||||
"'--datadir' is supposed to point to the top level directory of FDB's data, where subdirectories are for "
|
||||
"each process's data.\n"
|
||||
"The given cluster file passed in by '-C, --cluster-file' is considered to contain the old cluster key.\n"
|
||||
"It is used before restoring a snapshotted cluster to let the cluster have a different cluster key.\n"
|
||||
"Please make sure run it on every host in the cluster with the same '--new-cluster-key'.\n");
|
||||
} else {
|
||||
printOptionUsage("--dev-help", "Display developer-specific help and exit.");
|
||||
}
|
||||
|
@ -980,10 +995,12 @@ void restoreRoleFilesHelper(std::string dirSrc, std::string dirToMove, std::stri
|
|||
|
||||
namespace {
|
||||
enum class ServerRole {
|
||||
ChangeClusterKey,
|
||||
ConsistencyCheck,
|
||||
CreateTemplateDatabase,
|
||||
DSLTest,
|
||||
FDBD,
|
||||
FlowProcess,
|
||||
KVFileGenerateIOLogChecksums,
|
||||
KVFileIntegrityCheck,
|
||||
KVFileDump,
|
||||
|
@ -996,13 +1013,12 @@ enum class ServerRole {
|
|||
SkipListTest,
|
||||
Test,
|
||||
VersionedMapTest,
|
||||
UnitTests,
|
||||
FlowProcess
|
||||
UnitTests
|
||||
};
|
||||
struct CLIOptions {
|
||||
std::string commandLine;
|
||||
std::string fileSystemPath, dataFolder, connFile, seedConnFile, seedConnString, logFolder = ".", metricsConnFile,
|
||||
metricsPrefix;
|
||||
metricsPrefix, newClusterKey;
|
||||
std::string logGroup = "default";
|
||||
uint64_t rollsize = TRACE_DEFAULT_ROLL_SIZE;
|
||||
uint64_t maxLogsSize = TRACE_DEFAULT_MAX_LOGS_SIZE;
|
||||
|
@ -1250,6 +1266,8 @@ private:
|
|||
role = ServerRole::UnitTests;
|
||||
else if (!strcmp(sRole, "flowprocess"))
|
||||
role = ServerRole::FlowProcess;
|
||||
else if (!strcmp(sRole, "changeclusterkey"))
|
||||
role = ServerRole::ChangeClusterKey;
|
||||
else {
|
||||
fprintf(stderr, "ERROR: Unknown role `%s'\n", sRole);
|
||||
printHelpTeaser(argv[0]);
|
||||
|
@ -1653,6 +1671,19 @@ private:
|
|||
knobs.emplace_back("rest_kms_connector_get_encryption_keys_endpoint", args.OptionArg());
|
||||
break;
|
||||
}
|
||||
case OPT_NEW_CLUSTER_KEY: {
|
||||
newClusterKey = args.OptionArg();
|
||||
try {
|
||||
ClusterConnectionString ccs;
|
||||
// make sure the new cluster key is in valid format
|
||||
ccs.parseKey(newClusterKey);
|
||||
} catch (Error& e) {
|
||||
std::cerr << "Invalid cluster key(description:id) '" << newClusterKey << "' from --new-cluster-key"
|
||||
<< std::endl;
|
||||
flushAndExit(FDB_EXIT_ERROR);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1748,6 +1779,21 @@ private:
|
|||
flushAndExit(FDB_EXIT_ERROR);
|
||||
}
|
||||
|
||||
if (role == ServerRole::ChangeClusterKey) {
|
||||
bool error = false;
|
||||
if (!newClusterKey.size()) {
|
||||
fprintf(stderr, "ERROR: please specify --new-cluster-key\n");
|
||||
error = true;
|
||||
} else if (connectionFile->getConnectionString().clusterKey() == newClusterKey) {
|
||||
fprintf(stderr, "ERROR: the new cluster key is the same as the old one\n");
|
||||
error = true;
|
||||
}
|
||||
if (error) {
|
||||
printHelpTeaser(argv[0]);
|
||||
flushAndExit(FDB_EXIT_ERROR);
|
||||
}
|
||||
}
|
||||
|
||||
// Interpret legacy "maxLogs" option in the most sensible and unsurprising way we can while eliminating its code
|
||||
// path
|
||||
if (maxLogsSet) {
|
||||
|
@ -2272,6 +2318,11 @@ int main(int argc, char* argv[]) {
|
|||
} else if (role == ServerRole::KVFileDump) {
|
||||
f = stopAfter(KVFileDump(opts.kvFile));
|
||||
g_network->run();
|
||||
} else if (role == ServerRole::ChangeClusterKey) {
|
||||
Key newClusterKey(opts.newClusterKey);
|
||||
Key oldClusterKey = opts.connectionFile->getConnectionString().clusterKey();
|
||||
f = stopAfter(coordChangeClusterKey(opts.dataFolder, newClusterKey, oldClusterKey));
|
||||
g_network->run();
|
||||
}
|
||||
|
||||
int rc = FDB_EXIT_SUCCESS;
|
||||
|
|
|
@ -51,6 +51,8 @@ bool compareFDBAndBlob(RangeResult fdb,
|
|||
Version v,
|
||||
bool debug);
|
||||
|
||||
void printGranuleChunks(const Standalone<VectorRef<BlobGranuleChunkRef>>& chunks);
|
||||
|
||||
ACTOR Future<Void> clearAndAwaitMerge(Database cx, KeyRange range);
|
||||
|
||||
#include "flow/unactorcompiler.h"
|
||||
|
|
|
@ -236,4 +236,9 @@ Future<Void> coordinationServer(std::string const& dataFolder,
|
|||
Reference<ConfigNode> const&,
|
||||
ConfigBroadcastInterface const&);
|
||||
|
||||
// Read a value of MovableValue and if old cluster key presents in it, update to the new key
|
||||
Optional<Value> updateCCSInMovableValue(ValueRef movableVal, KeyRef oldClusterKey, KeyRef newClusterKey);
|
||||
|
||||
Future<Void> coordChangeClusterKey(std::string dataFolder, KeyRef newClusterKey, KeyRef oldClusterKey);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -39,6 +39,7 @@ enum class RelocateReason { INVALID = -1, OTHER, REBALANCE_DISK, REBALANCE_READ
|
|||
|
||||
// One-to-one relationship to the priority knobs
|
||||
enum class DataMovementReason {
|
||||
INVALID,
|
||||
RECOVER_MOVE,
|
||||
REBALANCE_UNDERUTILIZED_TEAM,
|
||||
REBALANCE_OVERUTILIZED_TEAM,
|
||||
|
@ -60,6 +61,8 @@ enum class DataMovementReason {
|
|||
|
||||
struct DDShardInfo;
|
||||
|
||||
extern int dataMovementPriority(DataMovementReason moveReason);
|
||||
|
||||
// Represents a data move in DD.
|
||||
struct DataMove {
|
||||
DataMove() : meta(DataMoveMetaData()), restore(false), valid(false), cancelled(false) {}
|
||||
|
@ -89,9 +92,14 @@ struct RelocateShard {
|
|||
std::shared_ptr<DataMove> dataMove; // Not null if this is a restored data move.
|
||||
UID dataMoveId;
|
||||
RelocateReason reason;
|
||||
RelocateShard() : priority(0), cancelled(false), dataMoveId(anonymousShardId), reason(RelocateReason::INVALID) {}
|
||||
RelocateShard(KeyRange const& keys, int priority, RelocateReason reason)
|
||||
: keys(keys), priority(priority), cancelled(false), dataMoveId(anonymousShardId), reason(reason) {}
|
||||
DataMovementReason moveReason;
|
||||
RelocateShard()
|
||||
: priority(0), cancelled(false), dataMoveId(anonymousShardId), reason(RelocateReason::INVALID),
|
||||
moveReason(DataMovementReason::INVALID) {}
|
||||
RelocateShard(KeyRange const& keys, DataMovementReason moveReason, RelocateReason reason)
|
||||
: keys(keys), cancelled(false), dataMoveId(anonymousShardId), reason(reason), moveReason(moveReason) {
|
||||
priority = dataMovementPriority(moveReason);
|
||||
}
|
||||
|
||||
bool isRestore() const { return this->dataMove != nullptr; }
|
||||
};
|
||||
|
@ -286,6 +294,7 @@ class ShardsAffectedByTeamFailure : public ReferenceCounted<ShardsAffectedByTeam
|
|||
public:
|
||||
ShardsAffectedByTeamFailure() {}
|
||||
|
||||
enum class CheckMode { Normal = 0, ForceCheck, ForceNoCheck };
|
||||
struct Team {
|
||||
std::vector<UID> servers; // sorted
|
||||
bool primary;
|
||||
|
@ -335,6 +344,8 @@ public:
|
|||
void finishMove(KeyRangeRef keys);
|
||||
void check() const;
|
||||
|
||||
void setCheckMode(CheckMode);
|
||||
|
||||
PromiseStream<KeyRange> restartShardTracker;
|
||||
|
||||
private:
|
||||
|
@ -348,6 +359,7 @@ private:
|
|||
}
|
||||
};
|
||||
|
||||
CheckMode checkMode = CheckMode::Normal;
|
||||
KeyRangeMap<std::pair<std::vector<Team>, std::vector<Team>>>
|
||||
shard_teams; // A shard can be affected by the failure of multiple teams if it is a queued merge, or when
|
||||
// usable_regions > 1
|
||||
|
|
|
@ -294,8 +294,8 @@ Future<Void> bulkSetup(Database cx,
|
|||
// Here we wait for data in flight to go to 0 (this will not work on a database with other users)
|
||||
if (postSetupWarming != 0) {
|
||||
try {
|
||||
wait(delay(5.0) >>
|
||||
waitForLowInFlight(cx, workload)); // Wait for the data distribution in a small test to start
|
||||
wait(delay(5.0));
|
||||
wait(waitForLowInFlight(cx, workload)); // Wait for the data distribution in a small test to start
|
||||
} catch (Error& e) {
|
||||
if (e.code() == error_code_actor_cancelled)
|
||||
throw;
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -385,7 +385,9 @@ ACTOR Future<Reference<TestWorkload>> getWorkloadIface(WorkloadRequest work,
|
|||
wcx.sharedRandomNumber = work.sharedRandomNumber;
|
||||
|
||||
workload = IWorkloadFactory::create(testName.toString(), wcx);
|
||||
wait(workload->initialized());
|
||||
if (workload) {
|
||||
wait(workload->initialized());
|
||||
}
|
||||
|
||||
auto unconsumedOptions = checkAllOptionsConsumed(workload ? workload->options : VectorRef<KeyValueRef>());
|
||||
if (!workload || unconsumedOptions.size()) {
|
||||
|
|
|
@ -237,57 +237,64 @@ struct BlobGranuleVerifierWorkload : TestWorkload {
|
|||
while (timeTravelIt != timeTravelChecks.end() && currentTime >= timeTravelIt->first) {
|
||||
state OldRead oldRead = timeTravelIt->second;
|
||||
timeTravelChecksMemory -= oldRead.oldResult.expectedSize();
|
||||
// advance iterator before doing read, so if it gets error we don't retry it
|
||||
timeTravelIt = timeTravelChecks.erase(timeTravelIt);
|
||||
if (prevPurgeVersion == -1) {
|
||||
prevPurgeVersion = oldRead.v;
|
||||
}
|
||||
// advance iterator before doing read, so if it gets error we don't retry it
|
||||
|
||||
try {
|
||||
state Version newPurgeVersion = 0;
|
||||
state bool doPurging = allowPurging && deterministicRandom()->random01() < 0.5;
|
||||
if (doPurging) {
|
||||
Version maxPurgeVersion = oldRead.v;
|
||||
for (auto& it : timeTravelChecks) {
|
||||
maxPurgeVersion = std::min(it.second.v, maxPurgeVersion);
|
||||
}
|
||||
if (prevPurgeVersion < maxPurgeVersion) {
|
||||
newPurgeVersion = deterministicRandom()->randomInt64(prevPurgeVersion, maxPurgeVersion);
|
||||
prevPurgeVersion = std::max(prevPurgeVersion, newPurgeVersion);
|
||||
Key purgeKey = wait(cx->purgeBlobGranules(normalKeys, newPurgeVersion, {}, false));
|
||||
wait(cx->waitPurgeGranulesComplete(purgeKey));
|
||||
self->purges++;
|
||||
} else {
|
||||
doPurging = false;
|
||||
}
|
||||
// before doing read, purge just before read version
|
||||
state Version newPurgeVersion = 0;
|
||||
state bool doPurging = allowPurging && deterministicRandom()->random01() < 0.5;
|
||||
if (doPurging) {
|
||||
CODE_PROBE(true, "BGV considering purge");
|
||||
Version maxPurgeVersion = oldRead.v;
|
||||
for (auto& it : timeTravelChecks) {
|
||||
maxPurgeVersion = std::min(it.second.v, maxPurgeVersion);
|
||||
}
|
||||
if (prevPurgeVersion < maxPurgeVersion) {
|
||||
CODE_PROBE(true, "BGV doing purge");
|
||||
newPurgeVersion = deterministicRandom()->randomInt64(prevPurgeVersion, maxPurgeVersion);
|
||||
prevPurgeVersion = std::max(prevPurgeVersion, newPurgeVersion);
|
||||
if (BGV_DEBUG) {
|
||||
fmt::print("BGV Purging @ {0}\n", newPurgeVersion);
|
||||
}
|
||||
try {
|
||||
Key purgeKey = wait(cx->purgeBlobGranules(normalKeys, newPurgeVersion, {}, false));
|
||||
if (BGV_DEBUG) {
|
||||
fmt::print("BGV Purged @ {0}, waiting\n", newPurgeVersion);
|
||||
}
|
||||
wait(cx->waitPurgeGranulesComplete(purgeKey));
|
||||
} catch (Error& e) {
|
||||
if (e.code() == error_code_operation_cancelled) {
|
||||
throw e;
|
||||
}
|
||||
// purging shouldn't error, it should retry.
|
||||
if (BGV_DEBUG) {
|
||||
fmt::print("Unexpected error {0} purging @ {1}!\n", e.name(), newPurgeVersion);
|
||||
}
|
||||
ASSERT(false);
|
||||
}
|
||||
CODE_PROBE(true, "BGV purge complete");
|
||||
if (BGV_DEBUG) {
|
||||
fmt::print("BGV Purge complete @ {0}\n", newPurgeVersion);
|
||||
}
|
||||
self->purges++;
|
||||
} else {
|
||||
doPurging = false;
|
||||
}
|
||||
}
|
||||
|
||||
// do time travel read
|
||||
try {
|
||||
std::pair<RangeResult, Standalone<VectorRef<BlobGranuleChunkRef>>> reReadResult =
|
||||
wait(readFromBlob(cx, self->bstore, oldRead.range, 0, oldRead.v));
|
||||
if (!compareFDBAndBlob(oldRead.oldResult, reReadResult, oldRead.range, oldRead.v, BGV_DEBUG)) {
|
||||
self->mismatches++;
|
||||
}
|
||||
self->timeTravelReads++;
|
||||
|
||||
if (doPurging) {
|
||||
wait(self->killBlobWorkers(cx, self));
|
||||
std::pair<RangeResult, Standalone<VectorRef<BlobGranuleChunkRef>>> versionRead =
|
||||
wait(readFromBlob(cx, self->bstore, oldRead.range, 0, prevPurgeVersion));
|
||||
try {
|
||||
Version minSnapshotVersion = newPurgeVersion;
|
||||
for (auto& it : versionRead.second) {
|
||||
minSnapshotVersion = std::min(minSnapshotVersion, it.snapshotVersion);
|
||||
}
|
||||
std::pair<RangeResult, Standalone<VectorRef<BlobGranuleChunkRef>>> versionRead =
|
||||
wait(readFromBlob(cx, self->bstore, oldRead.range, 0, minSnapshotVersion - 1));
|
||||
ASSERT(false);
|
||||
} catch (Error& e) {
|
||||
if (e.code() == error_code_actor_cancelled) {
|
||||
throw;
|
||||
}
|
||||
ASSERT(e.code() == error_code_blob_granule_transaction_too_old);
|
||||
}
|
||||
}
|
||||
} catch (Error& e) {
|
||||
fmt::print("Error TT: {0}\n", e.name());
|
||||
if (e.code() == error_code_blob_granule_transaction_too_old) {
|
||||
self->timeTravelTooOld++;
|
||||
// TODO: add debugging info for when this is a failure
|
||||
|
@ -297,6 +304,51 @@ struct BlobGranuleVerifierWorkload : TestWorkload {
|
|||
oldRead.v);
|
||||
}
|
||||
}
|
||||
|
||||
// if purged just before read, verify that purge cleaned up data by restarting blob workers and
|
||||
// reading older than the purge version
|
||||
if (doPurging) {
|
||||
wait(self->killBlobWorkers(cx, self));
|
||||
if (BGV_DEBUG) {
|
||||
fmt::print("BGV Reading post-purge [{0} - {1}) @ {2}\n",
|
||||
oldRead.range.begin.printable(),
|
||||
oldRead.range.end.printable(),
|
||||
prevPurgeVersion);
|
||||
}
|
||||
// ensure purge version exactly is still readable
|
||||
std::pair<RangeResult, Standalone<VectorRef<BlobGranuleChunkRef>>> versionRead1 =
|
||||
wait(readFromBlob(cx, self->bstore, oldRead.range, 0, prevPurgeVersion));
|
||||
if (BGV_DEBUG) {
|
||||
fmt::print("BGV Post-purge first read:\n");
|
||||
printGranuleChunks(versionRead1.second);
|
||||
}
|
||||
try {
|
||||
// read at purgeVersion - 1, should NOT be readable
|
||||
Version minSnapshotVersion = newPurgeVersion;
|
||||
for (auto& it : versionRead1.second) {
|
||||
minSnapshotVersion = std::min(minSnapshotVersion, it.snapshotVersion);
|
||||
}
|
||||
if (BGV_DEBUG) {
|
||||
fmt::print("BGV Reading post-purge again [{0} - {1}) @ {2}\n",
|
||||
oldRead.range.begin.printable(),
|
||||
oldRead.range.end.printable(),
|
||||
minSnapshotVersion - 1);
|
||||
}
|
||||
std::pair<RangeResult, Standalone<VectorRef<BlobGranuleChunkRef>>> versionRead2 =
|
||||
wait(readFromBlob(cx, self->bstore, oldRead.range, 0, minSnapshotVersion - 1));
|
||||
if (BGV_DEBUG) {
|
||||
fmt::print("BGV ERROR: data not purged! Read successful!!\n");
|
||||
printGranuleChunks(versionRead2.second);
|
||||
}
|
||||
ASSERT(false);
|
||||
} catch (Error& e) {
|
||||
if (e.code() == error_code_actor_cancelled) {
|
||||
throw;
|
||||
}
|
||||
ASSERT(e.code() == error_code_blob_granule_transaction_too_old);
|
||||
CODE_PROBE(true, "BGV verified too old after purge");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// pick a random range
|
||||
|
@ -471,6 +523,8 @@ struct BlobGranuleVerifierWorkload : TestWorkload {
|
|||
// For some reason simulation is still passing when this fails?.. so assert for now
|
||||
ASSERT(result);
|
||||
|
||||
// FIXME: if doPurging was set, possibly do one last purge here, and verify it succeeds with no errors
|
||||
|
||||
if (self->clientId == 0 && SERVER_KNOBS->BG_ENABLE_MERGING && deterministicRandom()->random01() < 0.1) {
|
||||
CODE_PROBE(true, "BGV clearing database and awaiting merge");
|
||||
wait(clearAndAwaitMerge(cx, normalKeys));
|
||||
|
|
|
@ -0,0 +1,767 @@
|
|||
/*
|
||||
* ChangeFeedOperations.actor.cpp
|
||||
*
|
||||
* This source file is part of the FoundationDB open source project
|
||||
*
|
||||
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "fdbclient/FDBOptions.g.h"
|
||||
#include "fdbclient/ManagementAPI.actor.h"
|
||||
#include "fdbclient/NativeAPI.actor.h"
|
||||
#include "fdbclient/SystemData.h"
|
||||
#include "fdbserver/TesterInterface.actor.h"
|
||||
#include "fdbserver/workloads/workloads.actor.h"
|
||||
#include "fdbserver/workloads/BulkSetup.actor.h"
|
||||
#include "flow/Arena.h"
|
||||
#include "flow/IRandom.h"
|
||||
#include "flow/Trace.h"
|
||||
#include "flow/Util.h"
|
||||
#include "flow/serialize.h"
|
||||
#include <cstring>
|
||||
#include <limits>
|
||||
|
||||
#include "flow/actorcompiler.h" // This must be the last #include.
|
||||
|
||||
// enable to debug specific operations for a given change feed
|
||||
#define DEBUG_KEY ""_sr
|
||||
|
||||
#define DEBUG_CF(feedKey) (feedKey.printable() == DEBUG_KEY)
|
||||
|
||||
ACTOR Future<Void> doPop(Database cx, Key key, Key feedID, Version version, Version* doneOut) {
|
||||
wait(cx->popChangeFeedMutations(feedID, version));
|
||||
if (*doneOut < version) {
|
||||
*doneOut = version;
|
||||
}
|
||||
if (DEBUG_CF(key)) {
|
||||
fmt::print("DBG) {0} Popped through {1}\n", key.printable(), version);
|
||||
}
|
||||
// TODO: could strengthen pop checking by validating that a read immediately after the pop completes has no data
|
||||
return Void();
|
||||
}
|
||||
|
||||
struct FeedTestData : ReferenceCounted<FeedTestData>, NonCopyable {
|
||||
Key key;
|
||||
KeyRange keyRange;
|
||||
Key feedID;
|
||||
int nextVal;
|
||||
Future<Void> liveReader;
|
||||
bool lastCleared = false;
|
||||
|
||||
std::vector<Future<Void>> pops;
|
||||
Version poppingVersion;
|
||||
Version poppedVersion;
|
||||
Optional<Version> stopVersion;
|
||||
bool destroying;
|
||||
bool destroyed;
|
||||
bool complete;
|
||||
|
||||
int popWindow;
|
||||
int popDelayWindow;
|
||||
|
||||
std::deque<std::pair<Version, Optional<Value>>> writesByVersion;
|
||||
|
||||
// these were all committed
|
||||
std::deque<std::pair<Version, Optional<Value>>> pendingCheck;
|
||||
NotifiedVersion checkVersion;
|
||||
|
||||
FeedTestData(Key key, bool doPops)
|
||||
: key(key), keyRange(KeyRangeRef(key, keyAfter(key))), feedID(key.withPrefix(LiteralStringRef("CF"))), nextVal(0),
|
||||
lastCleared(false), poppingVersion(0), poppedVersion(0), destroying(false), destroyed(false), complete(false),
|
||||
checkVersion(0) {
|
||||
if (doPops) {
|
||||
popWindow = deterministicRandom()->randomExp(1, 8);
|
||||
popDelayWindow = deterministicRandom()->randomInt(0, 2) * deterministicRandom()->randomExp(1, 4);
|
||||
} else {
|
||||
popWindow = -1;
|
||||
popDelayWindow = -1;
|
||||
}
|
||||
}
|
||||
|
||||
Value nextValue() {
|
||||
std::string v = std::to_string(nextVal);
|
||||
nextVal++;
|
||||
return Value(v);
|
||||
}
|
||||
|
||||
void update(Version version, Optional<Value> value) {
|
||||
if (!stopVersion.present()) {
|
||||
// if feed is stopped, value should not get read
|
||||
writesByVersion.push_back({ version, value });
|
||||
pendingCheck.push_back(writesByVersion.back());
|
||||
checkVersion.set(version);
|
||||
}
|
||||
}
|
||||
|
||||
void testComplete() {
|
||||
complete = true;
|
||||
checkVersion.set(checkVersion.get() + 1);
|
||||
}
|
||||
|
||||
void pop(Database cx, Version v) {
|
||||
if (DEBUG_CF(key)) {
|
||||
fmt::print("DBG) {0} Popping through {1}\n", key.printable(), v);
|
||||
}
|
||||
ASSERT(poppingVersion < v);
|
||||
poppingVersion = v;
|
||||
while (!writesByVersion.empty() && v > writesByVersion.front().first) {
|
||||
writesByVersion.pop_front();
|
||||
}
|
||||
while (!pendingCheck.empty() && v > pendingCheck.front().first) {
|
||||
pendingCheck.pop_front();
|
||||
}
|
||||
pops.push_back(doPop(cx, key, feedID, v, &poppedVersion));
|
||||
}
|
||||
};
|
||||
|
||||
static void rollbackFeed(Key key,
|
||||
std::deque<Standalone<MutationsAndVersionRef>>& buffered,
|
||||
Version version,
|
||||
MutationRef rollbackMutation) {
|
||||
Version rollbackVersion;
|
||||
BinaryReader br(rollbackMutation.param2, Unversioned());
|
||||
br >> rollbackVersion;
|
||||
TraceEvent("ChangeFeedRollback").detail("Key", key).detail("Ver", version).detail("RollbackVer", rollbackVersion);
|
||||
if (DEBUG_CF(key)) {
|
||||
fmt::print("DBG) {0} Rolling back {1} -> {2}\n", key.printable(), version, rollbackVersion);
|
||||
}
|
||||
while (!buffered.empty() && buffered.back().version > rollbackVersion) {
|
||||
TraceEvent("ChangeFeedRollbackVer").detail("Ver", buffered.back().version);
|
||||
buffered.pop_back();
|
||||
}
|
||||
}
|
||||
|
||||
static void checkNextResult(Key key,
|
||||
std::deque<Standalone<MutationsAndVersionRef>>& buffered,
|
||||
std::deque<std::pair<Version, Optional<Value>>>& checkData) {
|
||||
// First asserts are checking data is in the form the test is supposed to produce
|
||||
ASSERT(!buffered.empty());
|
||||
ASSERT(buffered.front().mutations.size() == 1);
|
||||
ASSERT(buffered.front().mutations[0].param1 == key);
|
||||
|
||||
// Below asserts are correctness of change feed invariants.
|
||||
|
||||
// Handle case where txn retried and wrote same value twice. checkData's version is the committed one, so the same
|
||||
// update may appear at an earlier version. This is fine, as long as it then actually appears at the committed
|
||||
// version
|
||||
// TODO: could strengthen this check a bit and only allow it to appear at the lower version if the txn retried on
|
||||
// commit_unknown_result?
|
||||
if (checkData.front().first < buffered.front().version) {
|
||||
fmt::print("ERROR. {0} Check version {1} != {2}.\n Check: {3} {4}\n Buffered: {5} {6}\n",
|
||||
key.printable(),
|
||||
checkData.front().first,
|
||||
buffered.front().version,
|
||||
checkData.front().second.present() ? "SET" : "CLEAR",
|
||||
checkData.front().second.present() ? checkData.front().second.get().printable()
|
||||
: keyAfter(key).printable(),
|
||||
buffered.front().mutations[0].type == MutationRef::SetValue ? "SET" : "CLEAR",
|
||||
buffered.front().mutations[0].param2.printable());
|
||||
}
|
||||
ASSERT(checkData.front().first >= buffered.front().version);
|
||||
|
||||
if (checkData.front().second.present()) {
|
||||
ASSERT(buffered.front().mutations[0].type == MutationRef::SetValue);
|
||||
ASSERT(buffered.front().mutations[0].param2 == checkData.front().second.get());
|
||||
} else {
|
||||
ASSERT(buffered.front().mutations[0].type == MutationRef::ClearRange);
|
||||
ASSERT(buffered.front().mutations[0].param2 == keyAfter(key));
|
||||
}
|
||||
|
||||
if (checkData.front().first == buffered.front().version) {
|
||||
checkData.pop_front();
|
||||
}
|
||||
buffered.pop_front();
|
||||
}
|
||||
|
||||
ACTOR Future<Void> liveReader(Database cx, Reference<FeedTestData> data, Version begin) {
|
||||
state Version lastCheckVersion = 0;
|
||||
state Version nextCheckVersion = 0;
|
||||
state std::deque<Standalone<MutationsAndVersionRef>> buffered;
|
||||
state Reference<ChangeFeedData> results = makeReference<ChangeFeedData>();
|
||||
state Future<Void> stream =
|
||||
cx->getChangeFeedStream(results, data->feedID, begin, std::numeric_limits<Version>::max(), data->keyRange);
|
||||
try {
|
||||
loop {
|
||||
if (data->complete && data->pendingCheck.empty()) {
|
||||
return Void();
|
||||
}
|
||||
nextCheckVersion = data->pendingCheck.empty() ? invalidVersion : data->pendingCheck.front().first;
|
||||
choose {
|
||||
when(Standalone<VectorRef<MutationsAndVersionRef>> res = waitNext(results->mutations.getFuture())) {
|
||||
for (auto& it : res) {
|
||||
if (it.mutations.size() == 1 && it.mutations.back().param1 == lastEpochEndPrivateKey) {
|
||||
rollbackFeed(data->key, buffered, it.version, it.mutations.back());
|
||||
} else {
|
||||
if (it.mutations.size() == 0) {
|
||||
// FIXME: THIS SHOULD NOT HAPPEN
|
||||
// FIXME: these are also getting sent past stopVersion!!
|
||||
} else {
|
||||
if (data->stopVersion.present()) {
|
||||
if (it.version > data->stopVersion.get()) {
|
||||
fmt::print("DBG) {0} Read data with version {1} > stop version {2} ({3})\n",
|
||||
data->key.printable(),
|
||||
it.version,
|
||||
data->stopVersion.get(),
|
||||
it.mutations.size());
|
||||
}
|
||||
ASSERT(it.version <= data->stopVersion.get());
|
||||
}
|
||||
buffered.push_back(Standalone<MutationsAndVersionRef>(it));
|
||||
if (DEBUG_CF(data->key)) {
|
||||
fmt::print("DBG) {0} Live read through {1} ({2})\n",
|
||||
data->key.printable(),
|
||||
it.version,
|
||||
it.mutations.size());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
when(wait(data->checkVersion.whenAtLeast(lastCheckVersion + 1))) {
|
||||
// wake loop and start new whenAtLeast whenever checkVersion is set
|
||||
lastCheckVersion = data->checkVersion.get();
|
||||
}
|
||||
when(wait(data->pendingCheck.empty() ? Never()
|
||||
: results->whenAtLeast(data->pendingCheck.front().first))) {
|
||||
|
||||
if (data->pendingCheck.empty() || data->pendingCheck.front().first > nextCheckVersion) {
|
||||
// pendingCheck wasn't empty before whenAtLeast, and nextCheckVersion = the front version, so if
|
||||
// either of these are true, the data was popped concurrently and we can move on to checking the
|
||||
// next value
|
||||
CODE_PROBE(true, "popped while waiting for whenAtLeast to check next value");
|
||||
continue;
|
||||
}
|
||||
while (!buffered.empty() && buffered.front().version < data->poppingVersion) {
|
||||
CODE_PROBE(true, "live reader ignoring data that is being popped");
|
||||
buffered.pop_front();
|
||||
}
|
||||
if (buffered.empty()) {
|
||||
if (data->poppingVersion < data->pendingCheck.front().first) {
|
||||
fmt::print("DBG) {0} Buffered empty after ready for check, and data not popped! popped "
|
||||
"{1}, popping {2}, check {3}\n",
|
||||
data->key.printable(),
|
||||
data->poppedVersion,
|
||||
data->poppingVersion,
|
||||
data->pendingCheck.front().first);
|
||||
}
|
||||
ASSERT(data->poppingVersion >= data->pendingCheck.front().first);
|
||||
data->pendingCheck.pop_front();
|
||||
} else {
|
||||
Version v = buffered.front().version;
|
||||
if (DEBUG_CF(data->key)) {
|
||||
fmt::print("DBG) {0} Live checking through {1}\n",
|
||||
data->key.printable(),
|
||||
data->pendingCheck.front().first);
|
||||
}
|
||||
checkNextResult(data->key, buffered, data->pendingCheck);
|
||||
if (DEBUG_CF(data->key)) {
|
||||
fmt::print("DBG) {0} Live Checked through {1}\n", data->key.printable(), v);
|
||||
}
|
||||
|
||||
if (data->popDelayWindow >= 0 && data->popWindow >= 0 &&
|
||||
data->writesByVersion.size() == data->popWindow + data->popDelayWindow) {
|
||||
data->pop(cx, data->writesByVersion[data->popWindow - 1].first + 1);
|
||||
ASSERT(data->writesByVersion.size() == data->popDelayWindow);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (Error& e) {
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
ACTOR Future<Void> historicReader(Database cx,
|
||||
Reference<FeedTestData> data,
|
||||
Version begin,
|
||||
Version end,
|
||||
bool skipPopped) {
|
||||
state std::deque<std::pair<Version, Optional<Value>>> checkData;
|
||||
state std::deque<Standalone<MutationsAndVersionRef>> buffered;
|
||||
state Reference<ChangeFeedData> results = makeReference<ChangeFeedData>();
|
||||
state Future<Void> stream = cx->getChangeFeedStream(results, data->feedID, begin, end, data->keyRange);
|
||||
state Version poppedVersionAtStart = data->poppedVersion;
|
||||
|
||||
if (DEBUG_CF(data->key)) {
|
||||
fmt::print("DBG) {0} Starting historical read {1} - {2}\n", data->key.printable(), begin, end);
|
||||
}
|
||||
|
||||
// TODO could cpu optimize this
|
||||
for (auto& it : data->writesByVersion) {
|
||||
if (it.first >= end) {
|
||||
break;
|
||||
}
|
||||
if (it.first >= begin) {
|
||||
checkData.push_back(it);
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
loop {
|
||||
Standalone<VectorRef<MutationsAndVersionRef>> res = waitNext(results->mutations.getFuture());
|
||||
for (auto& it : res) {
|
||||
if (it.mutations.size() == 1 && it.mutations.back().param1 == lastEpochEndPrivateKey) {
|
||||
rollbackFeed(data->key, buffered, it.version, it.mutations.back());
|
||||
} else {
|
||||
if (it.mutations.size() == 0) {
|
||||
// FIXME: THIS SHOULD NOT HAPPEN
|
||||
// FIXME: these are also getting sent past stopVersion!!
|
||||
} else {
|
||||
if (data->stopVersion.present()) {
|
||||
ASSERT(it.version <= data->stopVersion.get());
|
||||
}
|
||||
buffered.push_back(Standalone<MutationsAndVersionRef>(it));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (Error& e) {
|
||||
if (e.code() != error_code_end_of_stream) {
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
if (skipPopped) {
|
||||
while (!buffered.empty() && buffered.front().version < data->poppingVersion) {
|
||||
// ignore data
|
||||
buffered.pop_front();
|
||||
}
|
||||
while (!checkData.empty() && checkData.front().first < data->poppingVersion) {
|
||||
checkData.pop_front();
|
||||
}
|
||||
}
|
||||
|
||||
while (!checkData.empty() && !buffered.empty()) {
|
||||
checkNextResult(data->key, buffered, checkData);
|
||||
}
|
||||
// Change feed missing data it should have
|
||||
ASSERT(checkData.empty());
|
||||
// Change feed read extra data it shouldn't have
|
||||
ASSERT(buffered.empty());
|
||||
|
||||
// check pop version of cursor
|
||||
// TODO: this check might not always work if read is for old data and SS is way behind
|
||||
// FIXME: this check doesn't work for now, probably due to above comment
|
||||
/*if (data->poppingVersion != 0) {
|
||||
ASSERT(results->popVersion >= poppedVersionAtStart && results->popVersion <= data->poppingVersion);
|
||||
}*/
|
||||
|
||||
return Void();
|
||||
}
|
||||
|
||||
enum Op {
|
||||
CREATE_DELETE = 0,
|
||||
READ = 1,
|
||||
UPDATE_CLEAR = 2,
|
||||
STOP = 3,
|
||||
POP = 4,
|
||||
OP_COUNT = 5 /* keep this last */
|
||||
};
|
||||
|
||||
struct ChangeFeedOperationsWorkload : TestWorkload {
|
||||
// test settings
|
||||
double testDuration;
|
||||
int operationsPerSecond;
|
||||
int targetFeeds;
|
||||
bool clientsDisjointKeyspace;
|
||||
bool clearKeyWhenDestroy;
|
||||
double clearFrequency;
|
||||
int popMode;
|
||||
|
||||
int opWeights[Op::OP_COUNT];
|
||||
int totalOpWeight;
|
||||
|
||||
Future<Void> client;
|
||||
std::unordered_set<Key> usedKeys;
|
||||
std::vector<Reference<FeedTestData>> data;
|
||||
|
||||
ChangeFeedOperationsWorkload(WorkloadContext const& wcx) : TestWorkload(wcx) {
|
||||
testDuration = getOption(options, "testDuration"_sr, 60.0);
|
||||
operationsPerSecond = getOption(options, "opsPerSecond"_sr, 100.0);
|
||||
int64_t rand = wcx.sharedRandomNumber;
|
||||
targetFeeds = deterministicRandom()->randomExp(1, 1 + rand % 10);
|
||||
targetFeeds *= (0.8 + (deterministicRandom()->random01() * 0.4));
|
||||
targetFeeds = std::max(1, targetFeeds / clientCount);
|
||||
rand /= 10;
|
||||
clientsDisjointKeyspace = rand % 2;
|
||||
rand /= 2;
|
||||
clearKeyWhenDestroy = rand % 2;
|
||||
rand /= 2;
|
||||
bool doStops = rand % 2;
|
||||
rand /= 2;
|
||||
bool noCreateDelete = rand % 10 == 0;
|
||||
rand /= 10;
|
||||
popMode = rand % 3; // 0=none, 1=read-driven, 2=op-driven
|
||||
rand /= 3;
|
||||
|
||||
ASSERT(clientId >= 0);
|
||||
ASSERT(clientId < clientCount);
|
||||
ASSERT(clientCount < 255);
|
||||
|
||||
clearFrequency = deterministicRandom()->random01();
|
||||
|
||||
for (int i = 0; i < Op::OP_COUNT; i++) {
|
||||
int randWeight = deterministicRandom()->randomExp(0, 5);
|
||||
ASSERT(randWeight > 0);
|
||||
opWeights[i] = randWeight;
|
||||
}
|
||||
|
||||
if (!doStops) {
|
||||
opWeights[Op::STOP] = 0;
|
||||
}
|
||||
if (noCreateDelete) {
|
||||
opWeights[Op::CREATE_DELETE] = 0;
|
||||
}
|
||||
if (popMode != 2) {
|
||||
opWeights[Op::POP] = 0;
|
||||
}
|
||||
|
||||
std::string weightString = "|";
|
||||
totalOpWeight = 0;
|
||||
for (int i = 0; i < Op::OP_COUNT; i++) {
|
||||
totalOpWeight += opWeights[i];
|
||||
weightString += std::to_string(opWeights[i]) + "|";
|
||||
}
|
||||
|
||||
TraceEvent("ChangeFeedOperationsInit")
|
||||
.detail("TargetFeeds", targetFeeds)
|
||||
.detail("DisjointKeyspace", clientsDisjointKeyspace)
|
||||
.detail("ClearWhenDestroy", clearKeyWhenDestroy)
|
||||
.detail("DoStops", doStops)
|
||||
.detail("NoCreateDelete", noCreateDelete)
|
||||
.detail("Weights", weightString);
|
||||
}
|
||||
|
||||
Key unusedNewRandomKey() {
|
||||
while (true) {
|
||||
Key k = newRandomKey();
|
||||
if (usedKeys.insert(k).second) {
|
||||
return k;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Key newRandomKey() {
|
||||
if (clientsDisjointKeyspace) {
|
||||
double keyspaceRange = (1.0 / clientCount);
|
||||
double randPartOfRange = deterministicRandom()->random01() * (keyspaceRange - 0.0001);
|
||||
double randomDouble = clientId * keyspaceRange + 0.0001 + randPartOfRange;
|
||||
return doubleToTestKey(randomDouble);
|
||||
} else {
|
||||
// this is kinda hacky but it guarantees disjoint keys per client
|
||||
Key ret = doubleToTestKey(deterministicRandom()->random01());
|
||||
std::string str = ret.toString();
|
||||
str.back() = (uint8_t)clientId;
|
||||
return Key(str);
|
||||
}
|
||||
}
|
||||
|
||||
// Pick op with weighted average
|
||||
Op pickRandomOp() {
|
||||
int r = deterministicRandom()->randomInt(0, totalOpWeight);
|
||||
int i = 0;
|
||||
while (i < Op::OP_COUNT && (opWeights[i] <= r || opWeights[i] == 0)) {
|
||||
r -= opWeights[i];
|
||||
i++;
|
||||
}
|
||||
ASSERT(i < Op::OP_COUNT);
|
||||
return (Op)i;
|
||||
}
|
||||
|
||||
ACTOR Future<Void> createNewFeed(Database cx, ChangeFeedOperationsWorkload* self) {
|
||||
state Transaction tr(cx);
|
||||
state Key key = self->unusedNewRandomKey();
|
||||
state Reference<FeedTestData> feedData = makeReference<FeedTestData>(key, self->popMode == 1);
|
||||
state Value initialValue = feedData->nextValue();
|
||||
|
||||
if (DEBUG_CF(key)) {
|
||||
fmt::print("DBG) Creating {0}\n", key.printable());
|
||||
}
|
||||
|
||||
loop {
|
||||
try {
|
||||
tr.set(key, initialValue);
|
||||
wait(updateChangeFeed(&tr, feedData->feedID, ChangeFeedStatus::CHANGE_FEED_CREATE, feedData->keyRange));
|
||||
wait(tr.commit());
|
||||
|
||||
Version createVersion = tr.getCommittedVersion();
|
||||
if (DEBUG_CF(key)) {
|
||||
fmt::print("DBG) Created {0} @ {1}\n", key.printable(), createVersion);
|
||||
}
|
||||
feedData->update(createVersion, initialValue);
|
||||
feedData->liveReader = liveReader(cx, feedData, createVersion);
|
||||
|
||||
self->data.push_back(feedData);
|
||||
|
||||
return Void();
|
||||
} catch (Error& e) {
|
||||
wait(tr.onError(e));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::string description() const override { return "ChangeFeedOperationsWorkload"; }
|
||||
Future<Void> setup(Database const& cx) override { return _setup(cx, this); }
|
||||
|
||||
ACTOR Future<Void> _setup(Database cx, ChangeFeedOperationsWorkload* self) {
|
||||
// create initial targetFeeds feeds
|
||||
TraceEvent("ChangeFeedOperationsSetup").detail("InitialFeeds", self->targetFeeds).log();
|
||||
state int i;
|
||||
for (i = 0; i < self->targetFeeds; i++) {
|
||||
wait(self->createNewFeed(cx, self));
|
||||
}
|
||||
TraceEvent("ChangeFeedOperationsSetupComplete");
|
||||
return Void();
|
||||
}
|
||||
|
||||
Future<Void> start(Database const& cx) override {
|
||||
client = changeFeedOperationsClient(cx->clone(), this);
|
||||
return delay(testDuration);
|
||||
}
|
||||
Future<bool> check(Database const& cx) override {
|
||||
client = Future<Void>();
|
||||
return _check(cx, this);
|
||||
}
|
||||
|
||||
ACTOR Future<Void> checkFeed(Database cx, ChangeFeedOperationsWorkload* self, Reference<FeedTestData> feedData) {
|
||||
state int popIdx;
|
||||
feedData->testComplete();
|
||||
|
||||
if (DEBUG_CF(feedData->key)) {
|
||||
fmt::print("Final check {0} waiting on live reader\n", feedData->key.printable());
|
||||
}
|
||||
// wait on live reader and pops to make sure they complete without error
|
||||
wait(feedData->liveReader);
|
||||
if (DEBUG_CF(feedData->key)) {
|
||||
fmt::print("Final check {0} waiting on {1} pops\n", feedData->key.printable(), feedData->pops.size());
|
||||
}
|
||||
for (popIdx = 0; popIdx < feedData->pops.size(); popIdx++) {
|
||||
wait(feedData->pops[popIdx]);
|
||||
}
|
||||
|
||||
// do final check, read everything not popped
|
||||
if (DEBUG_CF(feedData->key)) {
|
||||
fmt::print("Final check {0} waiting on data check\n", feedData->key.printable(), feedData->pops.size());
|
||||
}
|
||||
wait(self->doRead(cx, feedData, feedData->writesByVersion.size()));
|
||||
|
||||
// ensure reading [0, poppedVersion) returns no results
|
||||
if (feedData->poppedVersion > 0) {
|
||||
if (DEBUG_CF(feedData->key)) {
|
||||
fmt::print(
|
||||
"Final check {0} waiting on read popped check\n", feedData->key.printable(), feedData->pops.size());
|
||||
}
|
||||
// FIXME: re-enable checking for popped data by changing skipPopped back to false!
|
||||
wait(historicReader(cx, feedData, 0, feedData->poppedVersion, true));
|
||||
}
|
||||
|
||||
return Void();
|
||||
}
|
||||
|
||||
ACTOR Future<bool> _check(Database cx, ChangeFeedOperationsWorkload* self) {
|
||||
TraceEvent("ChangeFeedOperationsCheck").detail("FeedCount", self->data.size()).log();
|
||||
fmt::print("Checking {0} feeds\n", self->data.size()); // TODO REMOVE
|
||||
state std::vector<Future<Void>> feedChecks;
|
||||
for (int i = 0; i < self->data.size(); i++) {
|
||||
if (self->data[i]->destroying) {
|
||||
continue;
|
||||
}
|
||||
if (DEBUG_CF(self->data[i]->key)) {
|
||||
fmt::print("Final check {0}\n", self->data[i]->key.printable());
|
||||
}
|
||||
feedChecks.push_back(self->checkFeed(cx, self, self->data[i]));
|
||||
}
|
||||
wait(waitForAll(feedChecks));
|
||||
// FIXME: check that all destroyed feeds are actually destroyed?
|
||||
TraceEvent("ChangeFeedOperationsCheckComplete");
|
||||
return true;
|
||||
}
|
||||
|
||||
void getMetrics(std::vector<PerfMetric>& m) override {}
|
||||
|
||||
ACTOR Future<Void> stopFeed(Database cx, Reference<FeedTestData> feedData) {
|
||||
state Transaction tr(cx);
|
||||
if (DEBUG_CF(feedData->key)) {
|
||||
fmt::print("DBG) {0} Stopping\n", feedData->key.printable());
|
||||
}
|
||||
loop {
|
||||
try {
|
||||
wait(updateChangeFeed(&tr, feedData->feedID, ChangeFeedStatus::CHANGE_FEED_STOP, feedData->keyRange));
|
||||
wait(tr.commit());
|
||||
|
||||
Version stopVersion = tr.getCommittedVersion();
|
||||
if (!feedData->stopVersion.present()) {
|
||||
feedData->stopVersion = stopVersion;
|
||||
}
|
||||
if (DEBUG_CF(feedData->key)) {
|
||||
fmt::print("DBG) {0} Stopped @ {1}\n", feedData->key.printable(), stopVersion);
|
||||
}
|
||||
return Void();
|
||||
} catch (Error& e) {
|
||||
wait(tr.onError(e));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void popFeed(Database cx, Reference<FeedTestData> feedData) {
|
||||
if (!feedData->writesByVersion.empty()) {
|
||||
feedData->pop(cx, feedData->writesByVersion.front().first + 1);
|
||||
}
|
||||
}
|
||||
|
||||
ACTOR Future<Void> destroyFeed(Database cx, ChangeFeedOperationsWorkload* self, int feedIdx) {
|
||||
state Reference<FeedTestData> feedData = self->data[feedIdx];
|
||||
state Transaction tr(cx);
|
||||
feedData->destroying = true;
|
||||
if (DEBUG_CF(feedData->key)) {
|
||||
fmt::print("DBG) {0} Destroying\n", feedData->key.printable());
|
||||
}
|
||||
loop {
|
||||
try {
|
||||
wait(
|
||||
updateChangeFeed(&tr, feedData->feedID, ChangeFeedStatus::CHANGE_FEED_DESTROY, feedData->keyRange));
|
||||
if (self->clearKeyWhenDestroy) {
|
||||
tr.clear(feedData->key);
|
||||
}
|
||||
wait(tr.commit());
|
||||
|
||||
feedData->destroyed = true;
|
||||
// remove feed from list
|
||||
ASSERT(self->data[feedIdx]->key == feedData->key);
|
||||
swapAndPop(&self->data, feedIdx);
|
||||
if (DEBUG_CF(feedData->key)) {
|
||||
fmt::print("DBG) {0} Destroyed @ {1}\n", feedData->key.printable(), tr.getCommittedVersion());
|
||||
}
|
||||
return Void();
|
||||
} catch (Error& e) {
|
||||
wait(tr.onError(e));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ACTOR Future<Void> doRead(Database cx, Reference<FeedTestData> feedData, int targetReadWidth) {
|
||||
if (feedData->writesByVersion.empty()) {
|
||||
return Void();
|
||||
}
|
||||
Version beginVersion;
|
||||
Version endVersion;
|
||||
if (targetReadWidth >= feedData->writesByVersion.size()) {
|
||||
beginVersion = feedData->writesByVersion.front().first;
|
||||
endVersion = feedData->writesByVersion.back().first + 1;
|
||||
} else {
|
||||
// either up to or including end
|
||||
int randStart = deterministicRandom()->randomInt(0, feedData->writesByVersion.size() - targetReadWidth);
|
||||
beginVersion = feedData->writesByVersion[randStart].first;
|
||||
int end = randStart + targetReadWidth;
|
||||
if (end == feedData->writesByVersion.size()) {
|
||||
endVersion = feedData->writesByVersion.back().first + 1;
|
||||
} else {
|
||||
// Make sure last included value (end version -1) is a committed version for checking
|
||||
endVersion = feedData->writesByVersion[end].first + 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (DEBUG_CF(feedData->key)) {
|
||||
fmt::print("DBG) {0} Reading @ {1} - {2}\n", feedData->key.printable(), beginVersion, endVersion);
|
||||
}
|
||||
|
||||
// FIXME: this sometimes reads popped data!
|
||||
wait(historicReader(cx, feedData, beginVersion, endVersion, true));
|
||||
|
||||
if (DEBUG_CF(feedData->key)) {
|
||||
fmt::print("DBG) {0} Read complete\n", feedData->key.printable());
|
||||
}
|
||||
|
||||
return Void();
|
||||
}
|
||||
|
||||
ACTOR Future<Void> doUpdateClear(Database cx,
|
||||
ChangeFeedOperationsWorkload* self,
|
||||
Reference<FeedTestData> feedData) {
|
||||
state Transaction tr(cx);
|
||||
state Optional<Value> updateValue;
|
||||
|
||||
// if value is already not set, don't do a clear, otherwise pick either
|
||||
if (feedData->lastCleared || deterministicRandom()->random01() > self->clearFrequency) {
|
||||
updateValue = feedData->nextValue();
|
||||
if (DEBUG_CF(feedData->key)) {
|
||||
fmt::print("DBG) {0} Setting {1}\n", feedData->key.printable(), updateValue.get().printable());
|
||||
}
|
||||
} else if (DEBUG_CF(feedData->key)) {
|
||||
fmt::print("DBG) {0} Clearing\n", feedData->key.printable());
|
||||
}
|
||||
loop {
|
||||
try {
|
||||
if (updateValue.present()) {
|
||||
tr.set(feedData->key, updateValue.get());
|
||||
} else {
|
||||
tr.clear(feedData->key);
|
||||
}
|
||||
|
||||
wait(tr.commit());
|
||||
|
||||
Version writtenVersion = tr.getCommittedVersion();
|
||||
|
||||
if (DEBUG_CF(feedData->key) && updateValue.present()) {
|
||||
fmt::print("DBG) {0} Set {1} @ {2}\n",
|
||||
feedData->key.printable(),
|
||||
updateValue.get().printable(),
|
||||
writtenVersion);
|
||||
}
|
||||
if (DEBUG_CF(feedData->key) && !updateValue.present()) {
|
||||
fmt::print("DBG) {0} Cleared @ {1}\n", feedData->key.printable(), writtenVersion);
|
||||
}
|
||||
|
||||
feedData->update(writtenVersion, updateValue);
|
||||
return Void();
|
||||
} catch (Error& e) {
|
||||
wait(tr.onError(e));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ACTOR Future<Void> changeFeedOperationsClient(Database cx, ChangeFeedOperationsWorkload* self) {
|
||||
state double last = now();
|
||||
loop {
|
||||
state Future<Void> waitNextOp = poisson(&last, 1.0 / self->operationsPerSecond);
|
||||
Op op = self->pickRandomOp();
|
||||
int feedIdx = deterministicRandom()->randomInt(0, self->data.size());
|
||||
if (op == Op::CREATE_DELETE) {
|
||||
// bundle these together so random creates/deletes keep about the target number of feeds
|
||||
if (deterministicRandom()->random01() < 0.5 || self->data.size() == 1) {
|
||||
wait(self->createNewFeed(cx, self));
|
||||
} else {
|
||||
wait(self->destroyFeed(cx, self, feedIdx));
|
||||
}
|
||||
} else if (op == Op::READ) {
|
||||
// relatively small random read
|
||||
wait(self->doRead(cx, self->data[feedIdx], deterministicRandom()->randomExp(2, 8)));
|
||||
} else if (op == Op::UPDATE_CLEAR) {
|
||||
wait(self->doUpdateClear(cx, self, self->data[feedIdx]));
|
||||
} else if (op == Op::STOP) {
|
||||
wait(self->stopFeed(cx, self->data[feedIdx]));
|
||||
} else if (op == Op::POP) {
|
||||
self->popFeed(cx, self->data[feedIdx]);
|
||||
} else {
|
||||
ASSERT(false);
|
||||
}
|
||||
|
||||
wait(waitNextOp);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
WorkloadFactory<ChangeFeedOperationsWorkload> ChangeFeedOperationsWorkloadFactory("ChangeFeedOperations");
|
|
@ -325,6 +325,7 @@ struct PhysicalShardMoveWorkLoad : TestWorkload {
|
|||
TraceEvent("TestCancelDataMoveEnd").detail("DataMove", dataMove.toString());
|
||||
}
|
||||
|
||||
TraceEvent("TestMoveShardStartMoveKeys").detail("DataMove", dataMoveId);
|
||||
wait(moveKeys(cx,
|
||||
dataMoveId,
|
||||
keys,
|
||||
|
|
|
@ -215,7 +215,8 @@ struct SkewedReadWriteWorkload : ReadWriteCommon {
|
|||
self->startReadWriteClients(cx, clients);
|
||||
wait(timeout(waitForAll(clients), self->testDuration / self->skewRound, Void()));
|
||||
clients.clear();
|
||||
wait(delay(5.0) >> updateServerShards(cx, self));
|
||||
wait(delay(5.0));
|
||||
wait(updateServerShards(cx, self));
|
||||
}
|
||||
|
||||
return Void();
|
||||
|
|
|
@ -175,6 +175,10 @@ struct SSCheckpointRestoreWorkload : TestWorkload {
|
|||
ASSERT(res[i] == kvRange[i]);
|
||||
}
|
||||
|
||||
Future<Void> close = kvStore->onClosed();
|
||||
kvStore->dispose();
|
||||
wait(close);
|
||||
|
||||
int ignore = wait(setDDMode(cx, 1));
|
||||
return Void();
|
||||
}
|
||||
|
|
|
@ -30,7 +30,6 @@
|
|||
#include "fdbclient/TenantManagement.actor.h"
|
||||
#include "fdbclient/TenantSpecialKeys.actor.h"
|
||||
#include "fdbclient/ThreadSafeTransaction.h"
|
||||
#include "fdbclient/libb64/decode.h"
|
||||
#include "fdbrpc/simulator.h"
|
||||
#include "fdbserver/workloads/workloads.actor.h"
|
||||
#include "fdbserver/Knobs.h"
|
||||
|
@ -38,6 +37,7 @@
|
|||
#include "flow/IRandom.h"
|
||||
#include "flow/ThreadHelper.actor.h"
|
||||
#include "flow/flow.h"
|
||||
#include "libb64/decode.h"
|
||||
#include "flow/actorcompiler.h" // This must be the last #include.
|
||||
|
||||
struct TenantManagementWorkload : TestWorkload {
|
||||
|
|
|
@ -519,7 +519,12 @@ void FastAllocator<Size>::getMagazine() {
|
|||
--g_allocation_tracing_disabled;
|
||||
}
|
||||
#endif
|
||||
block = (void**)::allocate(magazine_size * Size, /*allowLargePages*/ false, /*includeGuardPages*/ true);
|
||||
#ifdef VALGRIND
|
||||
const bool includeGuardPages = false;
|
||||
#else
|
||||
const bool includeGuardPages = true;
|
||||
#endif
|
||||
block = (void**)::allocate(magazine_size * Size, /*allowLargePages*/ false, includeGuardPages);
|
||||
#endif
|
||||
|
||||
// void** block = new void*[ magazine_size * PSize ];
|
||||
|
|
|
@ -168,6 +168,13 @@ void FlowKnobs::initialize(Randomize randomize, IsSimulated isSimulated) {
|
|||
init( LOW_PRIORITY_DELAY_COUNT, 5 );
|
||||
init( LOW_PRIORITY_MAX_DELAY, 5.0 );
|
||||
|
||||
// HTTP
|
||||
init( HTTP_READ_SIZE, 128*1024 );
|
||||
init( HTTP_SEND_SIZE, 32*1024 );
|
||||
init( HTTP_VERBOSE_LEVEL, 0 );
|
||||
init( HTTP_REQUEST_ID_HEADER, "" );
|
||||
init( HTTP_RESPONSE_SKIP_VERIFY_CHECKSUM_FOR_PARTIAL_CONTENT, false );
|
||||
|
||||
//IAsyncFile
|
||||
init( INCREMENTAL_DELETE_TRUNCATE_AMOUNT, 5e8 ); //500MB
|
||||
init( INCREMENTAL_DELETE_INTERVAL, 1.0 ); //every 1 second
|
||||
|
|
|
@ -146,8 +146,8 @@ public:
|
|||
void initMetrics() override;
|
||||
|
||||
// INetworkConnections interface
|
||||
Future<Reference<IConnection>> connect(NetworkAddress toAddr, const std::string& host) override;
|
||||
Future<Reference<IConnection>> connectExternal(NetworkAddress toAddr, const std::string& host) override;
|
||||
Future<Reference<IConnection>> connect(NetworkAddress toAddr, tcp::socket* existingSocket = nullptr) override;
|
||||
Future<Reference<IConnection>> connectExternal(NetworkAddress toAddr) override;
|
||||
Future<Reference<IUDPSocket>> createUDPSocket(NetworkAddress toAddr) override;
|
||||
Future<Reference<IUDPSocket>> createUDPSocket(bool isV6) override;
|
||||
// The mock DNS methods should only be used in simulation.
|
||||
|
@ -507,7 +507,7 @@ public:
|
|||
|
||||
UID getDebugID() const override { return id; }
|
||||
|
||||
tcp::socket& getSocket() { return socket; }
|
||||
tcp::socket& getSocket() override { return socket; }
|
||||
|
||||
private:
|
||||
UID id;
|
||||
|
@ -839,10 +839,15 @@ public:
|
|||
: id(nondeterministicRandom()->randomUniqueID()), socket(io_service), ssl_sock(socket, context->mutate()),
|
||||
sslContext(context) {}
|
||||
|
||||
explicit SSLConnection(Reference<ReferencedObject<boost::asio::ssl::context>> context, tcp::socket* existingSocket)
|
||||
: id(nondeterministicRandom()->randomUniqueID()), socket(std::move(*existingSocket)),
|
||||
ssl_sock(socket, context->mutate()), sslContext(context) {}
|
||||
|
||||
// This is not part of the IConnection interface, because it is wrapped by INetwork::connect()
|
||||
ACTOR static Future<Reference<IConnection>> connect(boost::asio::io_service* ios,
|
||||
Reference<ReferencedObject<boost::asio::ssl::context>> context,
|
||||
NetworkAddress addr) {
|
||||
NetworkAddress addr,
|
||||
tcp::socket* existingSocket = nullptr) {
|
||||
std::pair<IPAddress, uint16_t> peerIP = std::make_pair(addr.ip, addr.port);
|
||||
auto iter(g_network->networkInfo.serverTLSConnectionThrottler.find(peerIP));
|
||||
if (iter != g_network->networkInfo.serverTLSConnectionThrottler.end()) {
|
||||
|
@ -857,9 +862,15 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
if (existingSocket != nullptr) {
|
||||
Reference<SSLConnection> self(new SSLConnection(context, existingSocket));
|
||||
self->peer_address = addr;
|
||||
self->init();
|
||||
return self;
|
||||
}
|
||||
|
||||
state Reference<SSLConnection> self(new SSLConnection(*ios, context));
|
||||
self->peer_address = addr;
|
||||
|
||||
try {
|
||||
auto to = tcpEndpoint(self->peer_address);
|
||||
BindPromise p("N2_ConnectError", self->id);
|
||||
|
@ -869,7 +880,7 @@ public:
|
|||
wait(onConnected);
|
||||
self->init();
|
||||
return self;
|
||||
} catch (Error& e) {
|
||||
} catch (Error&) {
|
||||
// Either the connection failed, or was cancelled by the caller
|
||||
self->closeSocket();
|
||||
throw;
|
||||
|
@ -1097,7 +1108,7 @@ public:
|
|||
|
||||
UID getDebugID() const override { return id; }
|
||||
|
||||
tcp::socket& getSocket() { return socket; }
|
||||
tcp::socket& getSocket() override { return socket; }
|
||||
|
||||
ssl_socket& getSSLSocket() { return ssl_sock; }
|
||||
|
||||
|
@ -1818,17 +1829,17 @@ THREAD_HANDLE Net2::startThread(THREAD_FUNC_RETURN (*func)(void*), void* arg, in
|
|||
return ::startThread(func, arg, stackSize, name);
|
||||
}
|
||||
|
||||
Future<Reference<IConnection>> Net2::connect(NetworkAddress toAddr, const std::string& host) {
|
||||
Future<Reference<IConnection>> Net2::connect(NetworkAddress toAddr, tcp::socket* existingSocket) {
|
||||
if (toAddr.isTLS()) {
|
||||
initTLS(ETLSInitState::CONNECT);
|
||||
return SSLConnection::connect(&this->reactor.ios, this->sslContextVar.get(), toAddr);
|
||||
return SSLConnection::connect(&this->reactor.ios, this->sslContextVar.get(), toAddr, existingSocket);
|
||||
}
|
||||
|
||||
return Connection::connect(&this->reactor.ios, toAddr);
|
||||
}
|
||||
|
||||
Future<Reference<IConnection>> Net2::connectExternal(NetworkAddress toAddr, const std::string& host) {
|
||||
return connect(toAddr, host);
|
||||
Future<Reference<IConnection>> Net2::connectExternal(NetworkAddress toAddr) {
|
||||
return connect(toAddr);
|
||||
}
|
||||
|
||||
Future<Reference<IUDPSocket>> Net2::createUDPSocket(NetworkAddress toAddr) {
|
||||
|
|
|
@ -173,6 +173,19 @@ public:
|
|||
}
|
||||
|
||||
bool coinflip() { return (this->random01() < 0.5); }
|
||||
|
||||
// Picks a number between 2^minExp and 2^maxExp, but uniformly distributed over exponential buckets 2^n - 2^n+1
|
||||
// For example, randomExp(0, 4) would have a 25% chance of returning 1, a 25% chance of returning 2-3, a 25% chance
|
||||
// of returning 4-7, and a 25% chance of returning 8-15
|
||||
// Similar in Expected Value to doing 1 << randomInt(minExp, maxExp+1), except numbers returned aren't just powers
|
||||
// of 2
|
||||
int randomExp(int minExp, int maxExp) {
|
||||
if (minExp == maxExp) { // N=2, case
|
||||
return 1 << minExp;
|
||||
}
|
||||
int val = 1 << this->randomInt(minExp, maxExp);
|
||||
return this->randomInt(val, val * 2);
|
||||
}
|
||||
};
|
||||
|
||||
extern FILE* randLog;
|
||||
|
|
|
@ -235,6 +235,13 @@ public:
|
|||
int LOW_PRIORITY_DELAY_COUNT;
|
||||
double LOW_PRIORITY_MAX_DELAY;
|
||||
|
||||
// HTTP
|
||||
int HTTP_READ_SIZE;
|
||||
int HTTP_SEND_SIZE;
|
||||
int HTTP_VERBOSE_LEVEL;
|
||||
std::string HTTP_REQUEST_ID_HEADER;
|
||||
bool HTTP_RESPONSE_SKIP_VERIFY_CHECKSUM_FOR_PARTIAL_CONTENT; // skip verify md5 checksum for 206 response
|
||||
|
||||
// IAsyncFile
|
||||
int64_t INCREMENTAL_DELETE_TRUNCATE_AMOUNT;
|
||||
double INCREMENTAL_DELETE_INTERVAL;
|
||||
|
|
|
@ -175,6 +175,7 @@ public: // introduced features
|
|||
PROTOCOL_VERSION_FEATURE(0x0FDB00B072000000LL, ShardEncodeLocationMetaData);
|
||||
PROTOCOL_VERSION_FEATURE(0x0FDB00B072000000LL, Tenants);
|
||||
PROTOCOL_VERSION_FEATURE(0x0FDB00B072000000LL, Metacluster);
|
||||
PROTOCOL_VERSION_FEATURE(0x0FDB00B072000000LL, BlobGranuleFile);
|
||||
};
|
||||
|
||||
template <>
|
||||
|
|
|
@ -1978,22 +1978,25 @@ Future<decltype(std::declval<Fun>()(std::declval<T>()).getValue())> runAfter(Fut
|
|||
return res;
|
||||
}
|
||||
|
||||
ACTOR template <class T, class U>
|
||||
Future<U> runAfter(Future<T> lhs, Future<U> rhs) {
|
||||
T val1 = wait(lhs);
|
||||
U res = wait(rhs);
|
||||
return res;
|
||||
}
|
||||
|
||||
template <class T, class Fun>
|
||||
auto operator>>=(Future<T> lhs, Fun&& rhs) -> Future<decltype(rhs(std::declval<T>()))> {
|
||||
return runAfter(lhs, std::forward<Fun>(rhs));
|
||||
}
|
||||
|
||||
/*
|
||||
* NOTE: This implementation can't guarantee the doesn't really enforce the ACTOR execution order. See issue #7708
|
||||
ACTOR template <class T, class U>
|
||||
Future<U> runAfter(Future<T> lhs, Future<U> rhs) {
|
||||
T val1 = wait(lhs);
|
||||
U res = wait(rhs);
|
||||
return res;
|
||||
}
|
||||
|
||||
template <class T, class U>
|
||||
Future<U> operator>>(Future<T> const& lhs, Future<U> const& rhs) {
|
||||
return runAfter(lhs, rhs);
|
||||
return runAfter(lhs, rhs);
|
||||
}
|
||||
*/
|
||||
|
||||
/*
|
||||
* IAsyncListener is similar to AsyncVar, but it decouples the input and output, so the translation unit
|
||||
|
|
|
@ -472,6 +472,8 @@ public:
|
|||
// At present, implemented by Sim2Conn where we want to disable bits flip for connections between parent process and
|
||||
// child process, also reduce latency for this kind of connection
|
||||
virtual bool isStableConnection() const { throw unsupported_operation(); }
|
||||
|
||||
virtual boost::asio::ip::tcp::socket& getSocket() = 0;
|
||||
};
|
||||
|
||||
class IListener {
|
||||
|
@ -688,9 +690,10 @@ public:
|
|||
|
||||
// Make an outgoing connection to the given address. May return an error or block indefinitely in case of
|
||||
// connection problems!
|
||||
virtual Future<Reference<IConnection>> connect(NetworkAddress toAddr, const std::string& host = "") = 0;
|
||||
virtual Future<Reference<IConnection>> connect(NetworkAddress toAddr,
|
||||
boost::asio::ip::tcp::socket* existingSocket = nullptr) = 0;
|
||||
|
||||
virtual Future<Reference<IConnection>> connectExternal(NetworkAddress toAddr, const std::string& host = "") = 0;
|
||||
virtual Future<Reference<IConnection>> connectExternal(NetworkAddress toAddr) = 0;
|
||||
|
||||
// Make an outgoing udp connection and connect to the passed address.
|
||||
virtual Future<Reference<IUDPSocket>> createUDPSocket(NetworkAddress toAddr) = 0;
|
||||
|
|
|
@ -293,7 +293,7 @@ Future<Reference<IConnection>> INetworkConnections::connect(const std::string& h
|
|||
std::function<Future<Reference<IConnection>>(NetworkAddress const&)>,
|
||||
Reference<IConnection>>(
|
||||
pickEndpoint,
|
||||
[=](NetworkAddress const& addr) -> Future<Reference<IConnection>> { return connectExternal(addr, host); });
|
||||
[=](NetworkAddress const& addr) -> Future<Reference<IConnection>> { return connectExternal(addr); });
|
||||
}
|
||||
|
||||
IUDPSocket::~IUDPSocket() {}
|
||||
|
|
|
@ -130,8 +130,7 @@ if(WITH_PYTHON)
|
|||
add_fdb_test(TEST_FILES fast/BackupToDBCorrectnessClean.toml)
|
||||
add_fdb_test(TEST_FILES fast/BlobGranuleVerifySmall.toml)
|
||||
add_fdb_test(TEST_FILES fast/BlobGranuleVerifySmallClean.toml)
|
||||
add_fdb_test(TEST_FILES fast/BlobGranuleVerifyAtomicOps.toml)
|
||||
add_fdb_test(TEST_FILES fast/BlobGranuleVerifyCycle.toml)
|
||||
add_fdb_test(TEST_FILES fast/BlobGranuleMoveVerifyCycle.toml)
|
||||
add_fdb_test(TEST_FILES fast/CacheTest.toml)
|
||||
add_fdb_test(TEST_FILES fast/CloggedSideband.toml)
|
||||
add_fdb_test(TEST_FILES fast/CompressionUtilsUnit.toml)
|
||||
|
@ -140,6 +139,7 @@ if(WITH_PYTHON)
|
|||
add_fdb_test(TEST_FILES fast/CycleAndLock.toml)
|
||||
add_fdb_test(TEST_FILES fast/CycleTest.toml)
|
||||
add_fdb_test(TEST_FILES fast/ChangeFeeds.toml)
|
||||
add_fdb_test(TEST_FILES fast/ChangeFeedOperations.toml)
|
||||
add_fdb_test(TEST_FILES fast/DataLossRecovery.toml)
|
||||
add_fdb_test(TEST_FILES fast/EncryptionOps.toml)
|
||||
# TODO: fix failures and renable the test
|
||||
|
@ -199,6 +199,8 @@ if(WITH_PYTHON)
|
|||
add_fdb_test(TEST_FILES fast/PhysicalShardMove.toml IGNORE)
|
||||
add_fdb_test(TEST_FILES fast/StorageServerCheckpointRestore.toml IGNORE)
|
||||
endif()
|
||||
add_fdb_test(TEST_FILES rare/BlobGranuleVerifyAtomicOps.toml)
|
||||
add_fdb_test(TEST_FILES rare/BlobGranuleVerifyCycle.toml)
|
||||
add_fdb_test(TEST_FILES rare/CheckRelocation.toml)
|
||||
add_fdb_test(TEST_FILES rare/ClogUnclog.toml)
|
||||
add_fdb_test(TEST_FILES rare/CloggedCycleWithKills.toml)
|
||||
|
|
|
@ -0,0 +1,48 @@
|
|||
[configuration]
|
||||
blobGranulesEnabled = true
|
||||
allowDefaultTenant = false
|
||||
# FIXME: re-enable rocks at some point
|
||||
storageEngineExcludeTypes = [4]
|
||||
|
||||
[[knobs]]
|
||||
bg_range_source = "blobRangeKeys"
|
||||
|
||||
[[test]]
|
||||
testTitle = 'BlobGranuleMoveVerifyCycle'
|
||||
|
||||
[[test.workload]]
|
||||
testName = 'Cycle'
|
||||
transactionsPerSecond = 250.0
|
||||
testDuration = 60.0
|
||||
expectedRate = 0
|
||||
|
||||
[[test.workload]]
|
||||
testName = 'RandomMoveKeys'
|
||||
testDuration = 60.0
|
||||
|
||||
[[test.workload]]
|
||||
testName = 'BlobGranuleVerifier'
|
||||
testDuration = 60.0
|
||||
|
||||
[[test.workload]]
|
||||
testName = 'RandomClogging'
|
||||
testDuration = 60.0
|
||||
|
||||
[[test.workload]]
|
||||
testName = 'Rollback'
|
||||
meanDelay = 60.0
|
||||
testDuration = 60.0
|
||||
|
||||
[[test.workload]]
|
||||
testName = 'Attrition'
|
||||
machinesToKill = 10
|
||||
machinesToLeave = 3
|
||||
reboot = true
|
||||
testDuration = 60.0
|
||||
|
||||
[[test.workload]]
|
||||
testName = 'Attrition'
|
||||
machinesToKill = 10
|
||||
machinesToLeave = 3
|
||||
reboot = true
|
||||
testDuration = 60.0
|
|
@ -1,9 +1,11 @@
|
|||
[configuration]
|
||||
blobGranulesEnabled = true
|
||||
allowDefaultTenant = false
|
||||
injectTargetedSSRestart = true
|
||||
injectSSDelay = true
|
||||
# FIXME: exclude redwood because WriteDuringRead can write massive KV pairs and we don't chunk change feed data on disk yet
|
||||
# FIXME: re-enable rocks at some point
|
||||
storageEngineExcludeTypes = [3, 4]
|
||||
storageEngineExcludeTypes = [3, 4, 5]
|
||||
|
||||
[[knobs]]
|
||||
bg_range_source = "blobRangeKeys"
|
||||
|
|
|
@ -3,7 +3,7 @@ blobGranulesEnabled = true
|
|||
allowDefaultTenant = false
|
||||
# FIXME: exclude redwood because WriteDuringRead can write massive KV pairs and we don't chunk change feed data on disk yet
|
||||
# FIXME: re-enable rocks at some point
|
||||
storageEngineExcludeTypes = [3, 4]
|
||||
storageEngineExcludeTypes = [3, 4, 5]
|
||||
|
||||
[[knobs]]
|
||||
bg_range_source = "blobRangeKeys"
|
||||
|
|
|
@ -0,0 +1,10 @@
|
|||
[configuration]
|
||||
allowDefaultTenant = false
|
||||
|
||||
# TODO add failure events, and then add a version that also supports randomMoveKeys
|
||||
|
||||
[[test]]
|
||||
testTitle = 'ChangeFeedOperationsTest'
|
||||
|
||||
[[test.workload]]
|
||||
testName = 'ChangeFeedOperations'
|
|
@ -1,8 +1,10 @@
|
|||
[configuration]
|
||||
blobGranulesEnabled = true
|
||||
allowDefaultTenant = false
|
||||
injectTargetedSSRestart = true
|
||||
injectSSDelay = true
|
||||
# FIXME: re-enable rocks at some point
|
||||
storageEngineExcludeTypes = [4]
|
||||
storageEngineExcludeTypes = [4, 5]
|
||||
|
||||
[[knobs]]
|
||||
bg_range_source = "blobRangeKeys"
|
|
@ -1,8 +1,10 @@
|
|||
[configuration]
|
||||
blobGranulesEnabled = true
|
||||
allowDefaultTenant = false
|
||||
injectTargetedSSRestart = true
|
||||
injectSSDelay = true
|
||||
# FIXME: re-enable rocks at some point
|
||||
storageEngineExcludeTypes = [4]
|
||||
storageEngineExcludeTypes = [4, 5]
|
||||
|
||||
[[knobs]]
|
||||
bg_range_source = "blobRangeKeys"
|
|
@ -1,4 +1,4 @@
|
|||
storageEngineExcludeTypes=3
|
||||
storageEngineExcludeTypes=[3, 4, 5]
|
||||
|
||||
;Take snap and do cycle test
|
||||
testTitle=SnapCyclePre
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
storageEngineExcludeTypes=[4, 5]
|
||||
buggify=off
|
||||
|
||||
testTitle=SnapCycleRestore
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
storageEngineExcludeTypes=3
|
||||
storageEngineExcludeTypes=[3, 4, 5]
|
||||
|
||||
logAntiQuorum = 0
|
||||
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
storageEngineExcludeTypes=[4, 5]
|
||||
|
||||
testTitle=RestoreBackup
|
||||
simBackupAgents=BackupToFile
|
||||
clearAfterTest=false
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
storageEngineExcludeTypes=3
|
||||
storageEngineExcludeTypes=[3, 4, 5]
|
||||
|
||||
;write 1000 Keys ending with even numbers
|
||||
testTitle=SnapTestPre
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
storageEngineExcludeTypes=[4, 5]
|
||||
|
||||
buggify=off
|
||||
|
||||
; verify all keys are even numbered
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue