Merge remote-tracking branch 'upstream/main' into readaware
This commit is contained in:
commit
73624bcd2a
|
@ -301,6 +301,31 @@ endif()
|
|||
@LOG_DIR@
|
||||
)
|
||||
|
||||
add_fdbclient_test(
|
||||
NAME fdb_c_api_tests_with_tls
|
||||
DISABLE_LOG_DUMP
|
||||
TLS_ENABLED
|
||||
COMMAND ${CMAKE_SOURCE_DIR}/bindings/c/test/apitester/run_c_api_tests.py
|
||||
--cluster-file
|
||||
@CLUSTER_FILE@
|
||||
--tester-binary
|
||||
$<TARGET_FILE:fdb_c_api_tester>
|
||||
--external-client-library
|
||||
${CMAKE_CURRENT_BINARY_DIR}/libfdb_c_external.so
|
||||
--test-dir
|
||||
${CMAKE_SOURCE_DIR}/bindings/c/test/apitester/tests
|
||||
--tmp-dir
|
||||
@TMP_DIR@
|
||||
--log-dir
|
||||
@LOG_DIR@
|
||||
--tls-cert-file
|
||||
@CLIENT_CERT_FILE@
|
||||
--tls-key-file
|
||||
@CLIENT_KEY_FILE@
|
||||
--tls-ca-file
|
||||
@SERVER_CA_FILE@
|
||||
)
|
||||
|
||||
if(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT USE_SANITIZER)
|
||||
add_test(NAME fdb_c_upgrade_single_threaded_630api
|
||||
COMMAND ${CMAKE_SOURCE_DIR}/tests/TestRunner/upgrade_test.py
|
||||
|
|
|
@ -52,6 +52,9 @@ public:
|
|||
std::vector<std::pair<std::string, std::string>> knobs;
|
||||
TestSpec testSpec;
|
||||
std::string bgBasePath;
|
||||
std::string tlsCertFile;
|
||||
std::string tlsKeyFile;
|
||||
std::string tlsCaFile;
|
||||
};
|
||||
|
||||
} // namespace FdbApiTester
|
||||
|
|
|
@ -54,7 +54,10 @@ enum TesterOptionId {
|
|||
OPT_FDB_API_VERSION,
|
||||
OPT_TRANSACTION_RETRY_LIMIT,
|
||||
OPT_BLOB_GRANULE_LOCAL_FILE_PATH,
|
||||
OPT_STATS_INTERVAL
|
||||
OPT_STATS_INTERVAL,
|
||||
OPT_TLS_CERT_FILE,
|
||||
OPT_TLS_KEY_FILE,
|
||||
OPT_TLS_CA_FILE,
|
||||
};
|
||||
|
||||
CSimpleOpt::SOption TesterOptionDefs[] = //
|
||||
|
@ -79,6 +82,9 @@ CSimpleOpt::SOption TesterOptionDefs[] = //
|
|||
{ OPT_TRANSACTION_RETRY_LIMIT, "--transaction-retry-limit", SO_REQ_SEP },
|
||||
{ OPT_BLOB_GRANULE_LOCAL_FILE_PATH, "--blob-granule-local-file-path", SO_REQ_SEP },
|
||||
{ OPT_STATS_INTERVAL, "--stats-interval", SO_REQ_SEP },
|
||||
{ OPT_TLS_CERT_FILE, "--tls-cert-file", SO_REQ_SEP },
|
||||
{ OPT_TLS_KEY_FILE, "--tls-key-file", SO_REQ_SEP },
|
||||
{ OPT_TLS_CA_FILE, "--tls-ca-file", SO_REQ_SEP },
|
||||
SO_END_OF_OPTIONS };
|
||||
|
||||
void printProgramUsage(const char* execName) {
|
||||
|
@ -122,6 +128,12 @@ void printProgramUsage(const char* execName) {
|
|||
" Test file to run.\n"
|
||||
" --stats-interval MILLISECONDS\n"
|
||||
" Time interval in milliseconds for printing workload statistics (default: 0 - disabled).\n"
|
||||
" --tls-cert-file FILE\n"
|
||||
" Path to file containing client's TLS certificate chain\n"
|
||||
" --tls-key-file FILE\n"
|
||||
" Path to file containing client's TLS private key\n"
|
||||
" --tls-ca-file FILE\n"
|
||||
" Path to file containing TLS CA certificate\n"
|
||||
" -h, --help Display this help and exit.\n",
|
||||
FDB_API_VERSION);
|
||||
}
|
||||
|
@ -221,6 +233,15 @@ bool processArg(TesterOptions& options, const CSimpleOpt& args) {
|
|||
case OPT_STATS_INTERVAL:
|
||||
processIntOption(args.OptionText(), args.OptionArg(), 0, 60000, options.statsIntervalMs);
|
||||
break;
|
||||
case OPT_TLS_CERT_FILE:
|
||||
options.tlsCertFile.assign(args.OptionArg());
|
||||
break;
|
||||
case OPT_TLS_KEY_FILE:
|
||||
options.tlsKeyFile.assign(args.OptionArg());
|
||||
break;
|
||||
case OPT_TLS_CA_FILE:
|
||||
options.tlsCaFile.assign(args.OptionArg());
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
@ -299,6 +320,18 @@ void applyNetworkOptions(TesterOptions& options) {
|
|||
fdb_check(FdbApi::setOption(FDBNetworkOption::FDB_NET_OPTION_KNOB,
|
||||
fmt::format("{}={}", knob.first.c_str(), knob.second.c_str())));
|
||||
}
|
||||
|
||||
if (!options.tlsCertFile.empty()) {
|
||||
fdb_check(FdbApi::setOption(FDBNetworkOption::FDB_NET_OPTION_TLS_CERT_PATH, options.tlsCertFile));
|
||||
}
|
||||
|
||||
if (!options.tlsKeyFile.empty()) {
|
||||
fdb_check(FdbApi::setOption(FDBNetworkOption::FDB_NET_OPTION_TLS_KEY_PATH, options.tlsKeyFile));
|
||||
}
|
||||
|
||||
if (!options.tlsCaFile.empty()) {
|
||||
fdb_check(FdbApi::setOption(FDBNetworkOption::FDB_NET_OPTION_TLS_CA_PATH, options.tlsCaFile));
|
||||
}
|
||||
}
|
||||
|
||||
void randomizeOptions(TesterOptions& options) {
|
||||
|
|
|
@ -84,6 +84,15 @@ def run_tester(args, test_file):
|
|||
cmd += ["--blob-granule-local-file-path",
|
||||
args.blob_granule_local_file_path]
|
||||
|
||||
if args.tls_ca_file is not None:
|
||||
cmd += ["--tls-ca-file", args.tls_ca_file]
|
||||
|
||||
if args.tls_key_file is not None:
|
||||
cmd += ["--tls-key-file", args.tls_key_file]
|
||||
|
||||
if args.tls_cert_file is not None:
|
||||
cmd += ["--tls-cert-file", args.tls_cert_file]
|
||||
|
||||
get_logger().info('\nRunning tester \'%s\'...' % ' '.join(cmd))
|
||||
proc = Popen(cmd, stdout=sys.stdout, stderr=sys.stderr)
|
||||
timed_out = False
|
||||
|
@ -149,6 +158,12 @@ def parse_args(argv):
|
|||
help='The directory for storing temporary files (default: None)')
|
||||
parser.add_argument('--blob-granule-local-file-path', type=str, default=None,
|
||||
help='Enable blob granule tests if set, value is path to local blob granule files')
|
||||
parser.add_argument('--tls-ca-file', type=str, default=None,
|
||||
help='Path to client\'s TLS CA file: i.e. certificate of CA that signed the server certificate')
|
||||
parser.add_argument('--tls-cert-file', type=str, default=None,
|
||||
help='Path to client\'s TLS certificate file')
|
||||
parser.add_argument('--tls-key-file', type=str, default=None,
|
||||
help='Path to client\'s TLS private key file')
|
||||
|
||||
return parser.parse_args(argv)
|
||||
|
||||
|
|
|
@ -247,7 +247,7 @@ void ResumableStateForRunWorkload::onTransactionSuccess() {
|
|||
const auto commit_latency = watch_commit.diff();
|
||||
const auto tx_duration = watch_tx.diff();
|
||||
stats.addLatency(OP_COMMIT, commit_latency);
|
||||
stats.addLatency(OP_TRANSACTION, commit_latency);
|
||||
stats.addLatency(OP_TRANSACTION, tx_duration);
|
||||
sample_bins[OP_COMMIT].put(commit_latency);
|
||||
sample_bins[OP_TRANSACTION].put(tx_duration);
|
||||
}
|
||||
|
|
|
@ -958,7 +958,7 @@ GetMappedRangeResult getMappedIndexEntries(int beginId,
|
|||
int endId,
|
||||
fdb::Transaction& tr,
|
||||
std::string mapper,
|
||||
int matchIndex = MATCH_INDEX_ALL) {
|
||||
int matchIndex) {
|
||||
std::string indexEntryKeyBegin = indexEntryKey(beginId);
|
||||
std::string indexEntryKeyEnd = indexEntryKey(endId);
|
||||
|
||||
|
@ -980,8 +980,15 @@ GetMappedRangeResult getMappedIndexEntries(int beginId,
|
|||
GetMappedRangeResult getMappedIndexEntries(int beginId,
|
||||
int endId,
|
||||
fdb::Transaction& tr,
|
||||
int matchIndex = MATCH_INDEX_ALL) {
|
||||
std::string mapper = Tuple().append(prefix).append(RECORD).append("{K[3]}"_sr).append("{...}"_sr).pack().toString();
|
||||
int matchIndex,
|
||||
bool allMissing) {
|
||||
std::string mapper = Tuple()
|
||||
.append(prefix)
|
||||
.append(RECORD)
|
||||
.append(allMissing ? "{K[2]}"_sr : "{K[3]}"_sr)
|
||||
.append("{...}"_sr)
|
||||
.pack()
|
||||
.toString();
|
||||
return getMappedIndexEntries(beginId, endId, tr, mapper, matchIndex);
|
||||
}
|
||||
|
||||
|
@ -1003,7 +1010,7 @@ TEST_CASE("fdb_transaction_get_mapped_range") {
|
|||
} else if (r < 0.75) {
|
||||
matchIndex = MATCH_INDEX_UNMATCHED_ONLY;
|
||||
}
|
||||
auto result = getMappedIndexEntries(beginId, endId, tr, matchIndex);
|
||||
auto result = getMappedIndexEntries(beginId, endId, tr, matchIndex, false);
|
||||
|
||||
if (result.err) {
|
||||
fdb::EmptyFuture f1 = tr.on_error(result.err);
|
||||
|
@ -1023,22 +1030,14 @@ TEST_CASE("fdb_transaction_get_mapped_range") {
|
|||
if (matchIndex == MATCH_INDEX_ALL || i == 0 || i == expectSize - 1) {
|
||||
CHECK(indexEntryKey(id).compare(key) == 0);
|
||||
} else if (matchIndex == MATCH_INDEX_MATCHED_ONLY) {
|
||||
// now we cannot generate a workload that only has partial results matched
|
||||
// thus expecting everything matched
|
||||
// TODO: create tests to generate workloads with partial secondary results present
|
||||
CHECK(indexEntryKey(id).compare(key) == 0);
|
||||
} else if (matchIndex == MATCH_INDEX_UNMATCHED_ONLY) {
|
||||
// now we cannot generate a workload that only has partial results matched
|
||||
// thus expecting everything NOT matched(except for the boundary asserted above)
|
||||
// TODO: create tests to generate workloads with partial secondary results present
|
||||
CHECK(EMPTY.compare(key) == 0);
|
||||
} else {
|
||||
CHECK(EMPTY.compare(key) == 0);
|
||||
}
|
||||
|
||||
// TODO: create tests to generate workloads with partial secondary results present
|
||||
CHECK(boundaryAndExist == boundary);
|
||||
|
||||
bool empty = range_results.empty();
|
||||
CHECK(boundaryAndExist == (boundary && !empty));
|
||||
CHECK(EMPTY.compare(value) == 0);
|
||||
CHECK(range_results.size() == SPLIT_SIZE);
|
||||
for (int split = 0; split < SPLIT_SIZE; split++) {
|
||||
|
@ -1051,6 +1050,58 @@ TEST_CASE("fdb_transaction_get_mapped_range") {
|
|||
}
|
||||
}
|
||||
|
||||
TEST_CASE("fdb_transaction_get_mapped_range_missing_all_secondary") {
|
||||
const int TOTAL_RECORDS = 20;
|
||||
fillInRecords(TOTAL_RECORDS);
|
||||
|
||||
fdb::Transaction tr(db);
|
||||
// RYW should be enabled.
|
||||
while (1) {
|
||||
int beginId = 1;
|
||||
int endId = 19;
|
||||
const double r = deterministicRandom()->random01();
|
||||
int matchIndex = MATCH_INDEX_ALL;
|
||||
if (r < 0.25) {
|
||||
matchIndex = MATCH_INDEX_NONE;
|
||||
} else if (r < 0.5) {
|
||||
matchIndex = MATCH_INDEX_MATCHED_ONLY;
|
||||
} else if (r < 0.75) {
|
||||
matchIndex = MATCH_INDEX_UNMATCHED_ONLY;
|
||||
}
|
||||
auto result = getMappedIndexEntries(beginId, endId, tr, matchIndex, true);
|
||||
|
||||
if (result.err) {
|
||||
fdb::EmptyFuture f1 = tr.on_error(result.err);
|
||||
fdb_check(wait_future(f1));
|
||||
continue;
|
||||
}
|
||||
|
||||
int expectSize = endId - beginId;
|
||||
CHECK(result.mkvs.size() == expectSize);
|
||||
CHECK(!result.more);
|
||||
|
||||
int id = beginId;
|
||||
bool boundary;
|
||||
for (int i = 0; i < expectSize; i++, id++) {
|
||||
boundary = i == 0 || i == expectSize - 1;
|
||||
const auto& [key, value, begin, end, range_results, boundaryAndExist] = result.mkvs[i];
|
||||
if (matchIndex == MATCH_INDEX_ALL || i == 0 || i == expectSize - 1) {
|
||||
CHECK(indexEntryKey(id).compare(key) == 0);
|
||||
} else if (matchIndex == MATCH_INDEX_MATCHED_ONLY) {
|
||||
CHECK(EMPTY.compare(key) == 0);
|
||||
} else if (matchIndex == MATCH_INDEX_UNMATCHED_ONLY) {
|
||||
CHECK(indexEntryKey(id).compare(key) == 0);
|
||||
} else {
|
||||
CHECK(EMPTY.compare(key) == 0);
|
||||
}
|
||||
bool empty = range_results.empty();
|
||||
CHECK(boundaryAndExist == (boundary && !empty));
|
||||
CHECK(EMPTY.compare(value) == 0);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("fdb_transaction_get_mapped_range_restricted_to_serializable") {
|
||||
std::string mapper = Tuple().append(prefix).append(RECORD).append("{K[3]}"_sr).pack().toString();
|
||||
fdb::Transaction tr(db);
|
||||
|
@ -1111,7 +1162,7 @@ TEST_CASE("fdb_transaction_get_mapped_range_fail_on_mapper_not_tuple") {
|
|||
};
|
||||
assertNotTuple(mapper);
|
||||
fdb::Transaction tr(db);
|
||||
auto result = getMappedIndexEntries(1, 3, tr, mapper);
|
||||
auto result = getMappedIndexEntries(1, 3, tr, mapper, MATCH_INDEX_ALL);
|
||||
ASSERT(result.err == error_code_mapper_not_tuple);
|
||||
}
|
||||
|
||||
|
|
|
@ -533,10 +533,14 @@ JNIEXPORT jobject JNICALL Java_com_apple_foundationdb_FutureMappedResults_Future
|
|||
FDBMappedKeyValue kvm = kvms[i];
|
||||
int kvm_count = kvm.getRange.m_size;
|
||||
|
||||
const int totalLengths = 4 + kvm_count * 2;
|
||||
// now it has 5 field, key, value, getRange.begin, getRange.end, boundaryAndExist
|
||||
// this needs to change if FDBMappedKeyValue definition is changed.
|
||||
const int totalFieldFDBMappedKeyValue = 5;
|
||||
|
||||
const int totalLengths = totalFieldFDBMappedKeyValue + kvm_count * 2;
|
||||
|
||||
int totalBytes = kvm.key.key_length + kvm.value.key_length + kvm.getRange.begin.key.key_length +
|
||||
kvm.getRange.end.key.key_length;
|
||||
kvm.getRange.end.key.key_length + sizeof(kvm.boundaryAndExist);
|
||||
for (int i = 0; i < kvm_count; i++) {
|
||||
auto kv = kvm.getRange.data[i];
|
||||
totalBytes += kv.key_length + kv.value_length;
|
||||
|
@ -580,6 +584,7 @@ JNIEXPORT jobject JNICALL Java_com_apple_foundationdb_FutureMappedResults_Future
|
|||
cpBytesAndLength(pByte, pLength, kvm.value);
|
||||
cpBytesAndLength(pByte, pLength, kvm.getRange.begin.key);
|
||||
cpBytesAndLength(pByte, pLength, kvm.getRange.end.key);
|
||||
cpBytesAndLengthInner(pByte, pLength, (uint8_t*)&(kvm.boundaryAndExist), sizeof(kvm.boundaryAndExist));
|
||||
for (int kvm_i = 0; kvm_i < kvm_count; kvm_i++) {
|
||||
auto kv = kvm.getRange.data[kvm_i];
|
||||
cpBytesAndLengthInner(pByte, pLength, kv.key, kv.key_length);
|
||||
|
@ -588,6 +593,7 @@ JNIEXPORT jobject JNICALL Java_com_apple_foundationdb_FutureMappedResults_Future
|
|||
}
|
||||
}
|
||||
// After native arrays are released
|
||||
// call public static method MappedKeyValue::fromBytes()
|
||||
jobject mkv = jenv->CallStaticObjectMethod(
|
||||
mapped_key_value_class, mapped_key_value_from_bytes, (jbyteArray)bytesArray, (jintArray)lengthArray);
|
||||
if (jenv->ExceptionOccurred())
|
||||
|
|
|
@ -208,7 +208,11 @@ class MappedRangeQueryIntegrationTest {
|
|||
assertByteArrayEquals(indexEntryKey(id), mappedKeyValue.getKey());
|
||||
assertByteArrayEquals(EMPTY, mappedKeyValue.getValue());
|
||||
assertByteArrayEquals(indexEntryKey(id), mappedKeyValue.getKey());
|
||||
|
||||
if (id == begin || id == end - 1) {
|
||||
Assertions.assertTrue(mappedKeyValue.getBoundaryAndExist());
|
||||
} else {
|
||||
Assertions.assertFalse(mappedKeyValue.getBoundaryAndExist());
|
||||
}
|
||||
byte[] prefix = recordKeyPrefix(id);
|
||||
assertByteArrayEquals(prefix, mappedKeyValue.getRangeBegin());
|
||||
prefix[prefix.length - 1] = (byte)0x01;
|
||||
|
|
|
@ -358,7 +358,8 @@ class FDBTransaction extends NativeObjectWrapper implements Transaction, OptionC
|
|||
if (mapper == null) {
|
||||
throw new IllegalArgumentException("Mapper must be non-null");
|
||||
}
|
||||
return new MappedRangeQuery(FDBTransaction.this, false, begin, end, mapper, limit, reverse, mode, eventKeeper);
|
||||
return new MappedRangeQuery(FDBTransaction.this, false, begin, end, mapper, limit, matchIndex, reverse, mode,
|
||||
eventKeeper);
|
||||
}
|
||||
|
||||
///////////////////
|
||||
|
@ -463,7 +464,8 @@ class FDBTransaction extends NativeObjectWrapper implements Transaction, OptionC
|
|||
protected FutureMappedResults getMappedRange_internal(KeySelector begin, KeySelector end,
|
||||
byte[] mapper, // Nullable
|
||||
int rowLimit, int targetBytes, int streamingMode,
|
||||
int iteration, boolean isSnapshot, boolean reverse) {
|
||||
int iteration, boolean isSnapshot, boolean reverse,
|
||||
int matchIndex) {
|
||||
if (eventKeeper != null) {
|
||||
eventKeeper.increment(Events.JNI_CALL);
|
||||
}
|
||||
|
@ -476,7 +478,7 @@ class FDBTransaction extends NativeObjectWrapper implements Transaction, OptionC
|
|||
return new FutureMappedResults(
|
||||
Transaction_getMappedRange(getPtr(), begin.getKey(), begin.orEqual(), begin.getOffset(), end.getKey(),
|
||||
end.orEqual(), end.getOffset(), mapper, rowLimit, targetBytes, streamingMode,
|
||||
iteration, MATCH_INDEX_ALL, isSnapshot, reverse),
|
||||
iteration, matchIndex, isSnapshot, reverse),
|
||||
FDB.instance().isDirectBufferQueriesEnabled(), executor, eventKeeper);
|
||||
} finally {
|
||||
pointerReadLock.unlock();
|
||||
|
|
|
@ -22,6 +22,8 @@ package com.apple.foundationdb;
|
|||
|
||||
import com.apple.foundationdb.tuple.ByteArrayUtil;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.ByteOrder;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
@ -31,24 +33,35 @@ public class MappedKeyValue extends KeyValue {
|
|||
private final byte[] rangeBegin;
|
||||
private final byte[] rangeEnd;
|
||||
private final List<KeyValue> rangeResult;
|
||||
private final int boundaryAndExist;
|
||||
|
||||
public MappedKeyValue(byte[] key, byte[] value, byte[] rangeBegin, byte[] rangeEnd, List<KeyValue> rangeResult) {
|
||||
// now it has 5 field, key, value, getRange.begin, getRange.end, boundaryAndExist
|
||||
// this needs to change if FDBMappedKeyValue definition is changed.
|
||||
private static final int TOTAL_SERIALIZED_FIELD_FDBMappedKeyValue = 5;
|
||||
|
||||
public MappedKeyValue(byte[] key, byte[] value, byte[] rangeBegin, byte[] rangeEnd, List<KeyValue> rangeResult,
|
||||
int boundaryAndExist) {
|
||||
super(key, value);
|
||||
this.rangeBegin = rangeBegin;
|
||||
this.rangeEnd = rangeEnd;
|
||||
this.rangeResult = rangeResult;
|
||||
this.boundaryAndExist = boundaryAndExist;
|
||||
}
|
||||
|
||||
public byte[] getRangeBegin() { return rangeBegin; }
|
||||
|
||||
public byte[] getRangeEnd() { return rangeEnd; }
|
||||
|
||||
public boolean getBoundaryAndExist() { return boundaryAndExist == 0 ? false : true; }
|
||||
|
||||
public List<KeyValue> getRangeResult() { return rangeResult; }
|
||||
|
||||
public static MappedKeyValue fromBytes(byte[] bytes, int[] lengths) {
|
||||
// Lengths include: key, value, rangeBegin, rangeEnd, count * (underlying key, underlying value)
|
||||
if (lengths.length < 4) {
|
||||
throw new IllegalArgumentException("There needs to be at least 4 lengths to cover the metadata");
|
||||
if (lengths.length < TOTAL_SERIALIZED_FIELD_FDBMappedKeyValue) {
|
||||
throw new IllegalArgumentException("There needs to be at least " +
|
||||
TOTAL_SERIALIZED_FIELD_FDBMappedKeyValue +
|
||||
" lengths to cover the metadata");
|
||||
}
|
||||
|
||||
Offset offset = new Offset();
|
||||
|
@ -56,18 +69,20 @@ public class MappedKeyValue extends KeyValue {
|
|||
byte[] value = takeBytes(offset, bytes, lengths);
|
||||
byte[] rangeBegin = takeBytes(offset, bytes, lengths);
|
||||
byte[] rangeEnd = takeBytes(offset, bytes, lengths);
|
||||
byte[] boundaryAndExistBytes = takeBytes(offset, bytes, lengths);
|
||||
int boundaryAndExist = ByteBuffer.wrap(boundaryAndExistBytes).order(ByteOrder.LITTLE_ENDIAN).getInt();
|
||||
|
||||
if ((lengths.length - 4) % 2 != 0) {
|
||||
if ((lengths.length - TOTAL_SERIALIZED_FIELD_FDBMappedKeyValue) % 2 != 0) {
|
||||
throw new IllegalArgumentException("There needs to be an even number of lengths!");
|
||||
}
|
||||
int count = (lengths.length - 4) / 2;
|
||||
int count = (lengths.length - TOTAL_SERIALIZED_FIELD_FDBMappedKeyValue) / 2;
|
||||
List<KeyValue> rangeResult = new ArrayList<>(count);
|
||||
for (int i = 0; i < count; i++) {
|
||||
byte[] k = takeBytes(offset, bytes, lengths);
|
||||
byte[] v = takeBytes(offset, bytes, lengths);
|
||||
rangeResult.add(new KeyValue(k, v));
|
||||
}
|
||||
return new MappedKeyValue(key, value, rangeBegin, rangeEnd, rangeResult);
|
||||
return new MappedKeyValue(key, value, rangeBegin, rangeEnd, rangeResult, boundaryAndExist);
|
||||
}
|
||||
|
||||
static class Offset {
|
||||
|
@ -96,13 +111,15 @@ public class MappedKeyValue extends KeyValue {
|
|||
MappedKeyValue rhs = (MappedKeyValue) obj;
|
||||
return Arrays.equals(rangeBegin, rhs.rangeBegin)
|
||||
&& Arrays.equals(rangeEnd, rhs.rangeEnd)
|
||||
&& Objects.equals(rangeResult, rhs.rangeResult);
|
||||
&& Objects.equals(rangeResult, rhs.rangeResult)
|
||||
&& boundaryAndExist == rhs.boundaryAndExist;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
int hashForResult = rangeResult == null ? 0 : rangeResult.hashCode();
|
||||
return 17 + (29 * hashForResult + 37 * Arrays.hashCode(rangeBegin) + Arrays.hashCode(rangeEnd));
|
||||
return 17 +
|
||||
(29 * hashForResult + boundaryAndExist + 37 * Arrays.hashCode(rangeBegin) + Arrays.hashCode(rangeEnd));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -111,6 +128,7 @@ public class MappedKeyValue extends KeyValue {
|
|||
sb.append("rangeBegin=").append(ByteArrayUtil.printable(rangeBegin));
|
||||
sb.append(", rangeEnd=").append(ByteArrayUtil.printable(rangeEnd));
|
||||
sb.append(", rangeResult=").append(rangeResult);
|
||||
sb.append(", boundaryAndExist=").append(boundaryAndExist);
|
||||
sb.append('}');
|
||||
return super.toString() + "->" + sb.toString();
|
||||
}
|
||||
|
|
|
@ -53,18 +53,21 @@ class MappedRangeQuery implements AsyncIterable<MappedKeyValue> {
|
|||
private final byte[] mapper; // Nonnull
|
||||
private final boolean snapshot;
|
||||
private final int rowLimit;
|
||||
private final int matchIndex;
|
||||
private final boolean reverse;
|
||||
private final StreamingMode streamingMode;
|
||||
private final EventKeeper eventKeeper;
|
||||
|
||||
MappedRangeQuery(FDBTransaction transaction, boolean isSnapshot, KeySelector begin, KeySelector end, byte[] mapper,
|
||||
int rowLimit, boolean reverse, StreamingMode streamingMode, EventKeeper eventKeeper) {
|
||||
int rowLimit, int matchIndex, boolean reverse, StreamingMode streamingMode,
|
||||
EventKeeper eventKeeper) {
|
||||
this.tr = transaction;
|
||||
this.begin = begin;
|
||||
this.end = end;
|
||||
this.mapper = mapper;
|
||||
this.snapshot = isSnapshot;
|
||||
this.rowLimit = rowLimit;
|
||||
this.matchIndex = matchIndex;
|
||||
this.reverse = reverse;
|
||||
this.streamingMode = streamingMode;
|
||||
this.eventKeeper = eventKeeper;
|
||||
|
@ -88,14 +91,14 @@ class MappedRangeQuery implements AsyncIterable<MappedKeyValue> {
|
|||
|
||||
FutureMappedResults range =
|
||||
tr.getMappedRange_internal(this.begin, this.end, this.mapper, this.rowLimit, 0,
|
||||
StreamingMode.EXACT.code(), 1, this.snapshot, this.reverse);
|
||||
StreamingMode.EXACT.code(), 1, this.snapshot, this.reverse, this.matchIndex);
|
||||
return range.thenApply(result -> result.get().values).whenComplete((result, e) -> range.close());
|
||||
}
|
||||
|
||||
// If the streaming mode is not EXACT, simply collect the results of an
|
||||
// iteration into a list
|
||||
return AsyncUtil.collect(
|
||||
new MappedRangeQuery(tr, snapshot, begin, end, mapper, rowLimit, reverse, mode, eventKeeper),
|
||||
new MappedRangeQuery(tr, snapshot, begin, end, mapper, rowLimit, matchIndex, reverse, mode, eventKeeper),
|
||||
tr.getExecutor());
|
||||
}
|
||||
|
||||
|
@ -106,7 +109,7 @@ class MappedRangeQuery implements AsyncIterable<MappedKeyValue> {
|
|||
*/
|
||||
@Override
|
||||
public AsyncRangeIterator iterator() {
|
||||
return new AsyncRangeIterator(this.rowLimit, this.reverse, this.streamingMode);
|
||||
return new AsyncRangeIterator(this.rowLimit, this.matchIndex, this.reverse, this.streamingMode);
|
||||
}
|
||||
|
||||
private class AsyncRangeIterator implements AsyncIterator<MappedKeyValue> {
|
||||
|
@ -114,6 +117,7 @@ class MappedRangeQuery implements AsyncIterable<MappedKeyValue> {
|
|||
private final boolean rowsLimited;
|
||||
private final boolean reverse;
|
||||
private final StreamingMode streamingMode;
|
||||
private final int matchIndex;
|
||||
|
||||
// There is the chance for parallelism in the two "chunks" for fetched data
|
||||
private MappedRangeResult chunk = null;
|
||||
|
@ -131,12 +135,13 @@ class MappedRangeQuery implements AsyncIterable<MappedKeyValue> {
|
|||
private CompletableFuture<Boolean> nextFuture;
|
||||
private boolean isCancelled = false;
|
||||
|
||||
private AsyncRangeIterator(int rowLimit, boolean reverse, StreamingMode streamingMode) {
|
||||
private AsyncRangeIterator(int rowLimit, int matchIndex, boolean reverse, StreamingMode streamingMode) {
|
||||
this.begin = MappedRangeQuery.this.begin;
|
||||
this.end = MappedRangeQuery.this.end;
|
||||
this.rowsLimited = rowLimit != 0;
|
||||
this.rowsRemaining = rowLimit;
|
||||
this.reverse = reverse;
|
||||
this.matchIndex = matchIndex;
|
||||
this.streamingMode = streamingMode;
|
||||
|
||||
startNextFetch();
|
||||
|
@ -217,8 +222,9 @@ class MappedRangeQuery implements AsyncIterable<MappedKeyValue> {
|
|||
|
||||
nextFuture = new CompletableFuture<>();
|
||||
final long sTime = System.nanoTime();
|
||||
fetchingChunk = tr.getMappedRange_internal(begin, end, mapper, rowsLimited ? rowsRemaining : 0, 0,
|
||||
streamingMode.code(), ++iteration, snapshot, reverse);
|
||||
fetchingChunk =
|
||||
tr.getMappedRange_internal(begin, end, mapper, rowsLimited ? rowsRemaining : 0, 0, streamingMode.code(),
|
||||
++iteration, snapshot, reverse, matchIndex);
|
||||
|
||||
BiConsumer<MappedRangeResultInfo, Throwable> cons = new FetchComplete(fetchingChunk, nextFuture);
|
||||
if (eventKeeper != null) {
|
||||
|
|
|
@ -51,6 +51,8 @@ class MappedRangeResultDirectBufferIterator extends DirectBufferIterator impleme
|
|||
final byte[] value = getString();
|
||||
final byte[] rangeBegin = getString();
|
||||
final byte[] rangeEnd = getString();
|
||||
final byte[] boundaryAndExistBytes = getString();
|
||||
final int boundaryAndExist = ByteBuffer.wrap(boundaryAndExistBytes).getInt();
|
||||
final int rangeResultSize = byteBuffer.getInt();
|
||||
List<KeyValue> rangeResult = new ArrayList();
|
||||
for (int i = 0; i < rangeResultSize; i++) {
|
||||
|
@ -59,7 +61,7 @@ class MappedRangeResultDirectBufferIterator extends DirectBufferIterator impleme
|
|||
rangeResult.add(new KeyValue(k, v));
|
||||
}
|
||||
current += 1;
|
||||
return new MappedKeyValue(key, value, rangeBegin, rangeEnd, rangeResult);
|
||||
return new MappedKeyValue(key, value, rangeBegin, rangeEnd, rangeResult, boundaryAndExist);
|
||||
}
|
||||
|
||||
private byte[] getString() {
|
||||
|
|
|
@ -215,6 +215,26 @@ def kill(logger):
|
|||
assert new_generation > old_generation
|
||||
|
||||
|
||||
@enable_logging()
|
||||
def killall(logger):
|
||||
# test is designed to make sure 'kill all' sends all requests simultaneously
|
||||
old_generation = get_value_from_status_json(False, 'cluster', 'generation')
|
||||
# This is currently an issue with fdbcli,
|
||||
# where you need to first run 'kill' to initialize processes' list
|
||||
# and then specify the certain process to kill
|
||||
process = subprocess.Popen(command_template[:-1], stdin=subprocess.PIPE, stdout=subprocess.PIPE, env=fdbcli_env)
|
||||
output, error = process.communicate(input='kill; kill all; sleep 1\n'.encode())
|
||||
logger.debug(output)
|
||||
# wait for a second for the cluster recovery
|
||||
time.sleep(1)
|
||||
new_generation = get_value_from_status_json(True, 'cluster', 'generation')
|
||||
logger.debug("Old generation: {}, New generation: {}".format(old_generation, new_generation))
|
||||
# Make sure the kill is not happening sequentially
|
||||
# Pre: each recovery will increase the generated number by 2
|
||||
# Relax the condition to allow one additional recovery happening when we fetched the old generation
|
||||
assert new_generation <= (old_generation + 4)
|
||||
|
||||
|
||||
@enable_logging()
|
||||
def suspend(logger):
|
||||
if not shutil.which("pidof"):
|
||||
|
@ -582,6 +602,7 @@ def triggerddteaminfolog(logger):
|
|||
output = run_fdbcli_command('triggerddteaminfolog')
|
||||
assert output == 'Triggered team info logging in data distribution.'
|
||||
|
||||
|
||||
@enable_logging()
|
||||
def tenants(logger):
|
||||
output = run_fdbcli_command('listtenants')
|
||||
|
@ -610,7 +631,7 @@ def tenants(logger):
|
|||
assert len(lines) == 2
|
||||
assert lines[0].strip().startswith('id: ')
|
||||
assert lines[1].strip().startswith('prefix: ')
|
||||
|
||||
|
||||
output = run_fdbcli_command('usetenant')
|
||||
assert output == 'Using the default tenant'
|
||||
|
||||
|
@ -652,7 +673,8 @@ def tenants(logger):
|
|||
assert lines[3] == '`tenant_test\' is `default_tenant\''
|
||||
|
||||
process = subprocess.Popen(command_template[:-1], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=fdbcli_env)
|
||||
cmd_sequence = ['writemode on', 'usetenant tenant', 'clear tenant_test', 'deletetenant tenant', 'get tenant_test', 'defaulttenant', 'usetenant tenant']
|
||||
cmd_sequence = ['writemode on', 'usetenant tenant', 'clear tenant_test',
|
||||
'deletetenant tenant', 'get tenant_test', 'defaulttenant', 'usetenant tenant']
|
||||
output, error_output = process.communicate(input='\n'.join(cmd_sequence).encode())
|
||||
|
||||
lines = output.decode().strip().split('\n')[-7:]
|
||||
|
@ -680,6 +702,7 @@ def tenants(logger):
|
|||
|
||||
run_fdbcli_command('writemode on; clear tenant_test')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter,
|
||||
description="""
|
||||
|
@ -731,5 +754,6 @@ if __name__ == '__main__':
|
|||
assert args.process_number > 1, "Process number should be positive"
|
||||
coordinators()
|
||||
exclude()
|
||||
killall()
|
||||
# TODO: fix the failure where one process is not available after setclass call
|
||||
#setclass()
|
||||
# setclass()
|
||||
|
|
|
@ -404,8 +404,7 @@ endfunction()
|
|||
|
||||
# Creates a single cluster before running the specified command (usually a ctest test)
|
||||
function(add_fdbclient_test)
|
||||
set(options DISABLED ENABLED DISABLE_LOG_DUMP)
|
||||
set(options DISABLED ENABLED API_TEST_BLOB_GRANULES_ENABLED)
|
||||
set(options DISABLED ENABLED DISABLE_LOG_DUMP API_TEST_BLOB_GRANULES_ENABLED TLS_ENABLED)
|
||||
set(oneValueArgs NAME PROCESS_NUMBER TEST_TIMEOUT WORKING_DIRECTORY)
|
||||
set(multiValueArgs COMMAND)
|
||||
cmake_parse_arguments(T "${options}" "${oneValueArgs}" "${multiValueArgs}" "${ARGN}")
|
||||
|
@ -435,6 +434,9 @@ function(add_fdbclient_test)
|
|||
if(T_API_TEST_BLOB_GRANULES_ENABLED)
|
||||
list(APPEND TMP_CLUSTER_CMD --blob-granules-enabled)
|
||||
endif()
|
||||
if(T_TLS_ENABLED)
|
||||
list(APPEND TMP_CLUSTER_CMD --tls-enabled)
|
||||
endif()
|
||||
message(STATUS "Adding Client test ${T_NAME}")
|
||||
add_test(NAME "${T_NAME}"
|
||||
WORKING_DIRECTORY ${T_WORKING_DIRECTORY}
|
||||
|
|
|
@ -18,6 +18,8 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "boost/algorithm/string.hpp"
|
||||
|
||||
#include "fdbcli/fdbcli.actor.h"
|
||||
|
||||
#include "fdbclient/FDBOptions.g.h"
|
||||
|
@ -40,8 +42,10 @@ ACTOR Future<bool> expensiveDataCheckCommandActor(
|
|||
std::vector<StringRef> tokens,
|
||||
std::map<Key, std::pair<Value, ClientLeaderRegInterface>>* address_interface) {
|
||||
state bool result = true;
|
||||
state std::string addressesStr;
|
||||
if (tokens.size() == 1) {
|
||||
// initialize worker interfaces
|
||||
address_interface->clear();
|
||||
wait(getWorkerInterfaces(tr, address_interface));
|
||||
}
|
||||
if (tokens.size() == 1 || tokencmp(tokens[1], "list")) {
|
||||
|
@ -57,20 +61,26 @@ ACTOR Future<bool> expensiveDataCheckCommandActor(
|
|||
}
|
||||
printf("\n");
|
||||
} else if (tokencmp(tokens[1], "all")) {
|
||||
state std::map<Key, std::pair<Value, ClientLeaderRegInterface>>::const_iterator it;
|
||||
for (it = address_interface->cbegin(); it != address_interface->cend(); it++) {
|
||||
int64_t checkRequestSent = wait(safeThreadFutureToFuture(db->rebootWorker(it->first, true, 0)));
|
||||
if (!checkRequestSent) {
|
||||
result = false;
|
||||
fprintf(stderr, "ERROR: failed to send request to check process `%s'.\n", it->first.toString().c_str());
|
||||
}
|
||||
}
|
||||
if (address_interface->size() == 0) {
|
||||
fprintf(stderr,
|
||||
"ERROR: no processes to check. You must run the `expensive_data_check’ "
|
||||
"command before running `expensive_data_check all’.\n");
|
||||
} else {
|
||||
printf("Attempted to kill and check %zu processes\n", address_interface->size());
|
||||
std::vector<std::string> addressesVec;
|
||||
for (const auto& [address, _] : *address_interface) {
|
||||
addressesVec.push_back(address.toString());
|
||||
}
|
||||
addressesStr = boost::algorithm::join(addressesVec, ",");
|
||||
// make sure we only call the interface once to send requests in parallel
|
||||
int64_t checkRequestsSent = wait(safeThreadFutureToFuture(db->rebootWorker(addressesStr, true, 0)));
|
||||
if (!checkRequestsSent) {
|
||||
result = false;
|
||||
fprintf(stderr,
|
||||
"ERROR: failed to send requests to check all processes, please run the `expensive_data_check’ "
|
||||
"command again to fetch latest addresses.\n");
|
||||
} else {
|
||||
printf("Attempted to kill and check %zu processes\n", address_interface->size());
|
||||
}
|
||||
}
|
||||
} else {
|
||||
state int i;
|
||||
|
@ -83,15 +93,21 @@ ACTOR Future<bool> expensiveDataCheckCommandActor(
|
|||
}
|
||||
|
||||
if (result) {
|
||||
std::vector<std::string> addressesVec;
|
||||
for (i = 1; i < tokens.size(); i++) {
|
||||
int64_t checkRequestSent = wait(safeThreadFutureToFuture(db->rebootWorker(tokens[i], true, 0)));
|
||||
if (!checkRequestSent) {
|
||||
result = false;
|
||||
fprintf(
|
||||
stderr, "ERROR: failed to send request to check process `%s'.\n", tokens[i].toString().c_str());
|
||||
}
|
||||
addressesVec.push_back(tokens[i].toString());
|
||||
}
|
||||
addressesStr = boost::algorithm::join(addressesVec, ",");
|
||||
int64_t checkRequestsSent = wait(safeThreadFutureToFuture(db->rebootWorker(addressesStr, true, 0)));
|
||||
if (!checkRequestsSent) {
|
||||
result = false;
|
||||
fprintf(stderr,
|
||||
"ERROR: failed to send requests to check processes `%s', please run the `expensive_data_check’ "
|
||||
"command again to fetch latest addresses.\n",
|
||||
addressesStr.c_str());
|
||||
} else {
|
||||
printf("Attempted to kill and check %zu processes\n", tokens.size() - 1);
|
||||
}
|
||||
printf("Attempted to kill and check %zu processes\n", tokens.size() - 1);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
|
|
|
@ -18,6 +18,8 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "boost/algorithm/string.hpp"
|
||||
|
||||
#include "fdbcli/fdbcli.actor.h"
|
||||
|
||||
#include "fdbclient/FDBOptions.g.h"
|
||||
|
@ -37,8 +39,10 @@ ACTOR Future<bool> killCommandActor(Reference<IDatabase> db,
|
|||
std::map<Key, std::pair<Value, ClientLeaderRegInterface>>* address_interface) {
|
||||
ASSERT(tokens.size() >= 1);
|
||||
state bool result = true;
|
||||
state std::string addressesStr;
|
||||
if (tokens.size() == 1) {
|
||||
// initialize worker interfaces
|
||||
address_interface->clear();
|
||||
wait(getWorkerInterfaces(tr, address_interface));
|
||||
}
|
||||
if (tokens.size() == 1 || tokencmp(tokens[1], "list")) {
|
||||
|
@ -54,21 +58,27 @@ ACTOR Future<bool> killCommandActor(Reference<IDatabase> db,
|
|||
}
|
||||
printf("\n");
|
||||
} else if (tokencmp(tokens[1], "all")) {
|
||||
state std::map<Key, std::pair<Value, ClientLeaderRegInterface>>::const_iterator it;
|
||||
for (it = address_interface->cbegin(); it != address_interface->cend(); it++) {
|
||||
int64_t killRequestSent = wait(safeThreadFutureToFuture(db->rebootWorker(it->first, false, 0)));
|
||||
if (!killRequestSent) {
|
||||
result = false;
|
||||
fprintf(stderr, "ERROR: failed to send request to kill process `%s'.\n", it->first.toString().c_str());
|
||||
}
|
||||
}
|
||||
if (address_interface->size() == 0) {
|
||||
result = false;
|
||||
fprintf(stderr,
|
||||
"ERROR: no processes to kill. You must run the `kill’ command before "
|
||||
"running `kill all’.\n");
|
||||
} else {
|
||||
printf("Attempted to kill %zu processes\n", address_interface->size());
|
||||
std::vector<std::string> addressesVec;
|
||||
for (const auto& [address, _] : *address_interface) {
|
||||
addressesVec.push_back(address.toString());
|
||||
}
|
||||
addressesStr = boost::algorithm::join(addressesVec, ",");
|
||||
// make sure we only call the interface once to send requests in parallel
|
||||
int64_t killRequestsSent = wait(safeThreadFutureToFuture(db->rebootWorker(addressesStr, false, 0)));
|
||||
if (!killRequestsSent) {
|
||||
result = false;
|
||||
fprintf(stderr,
|
||||
"ERROR: failed to send requests to all processes, please run the `kill’ command again to fetch "
|
||||
"latest addresses.\n");
|
||||
} else {
|
||||
printf("Attempted to kill %zu processes\n", address_interface->size());
|
||||
}
|
||||
}
|
||||
} else {
|
||||
state int i;
|
||||
|
@ -81,15 +91,21 @@ ACTOR Future<bool> killCommandActor(Reference<IDatabase> db,
|
|||
}
|
||||
|
||||
if (result) {
|
||||
std::vector<std::string> addressesVec;
|
||||
for (i = 1; i < tokens.size(); i++) {
|
||||
int64_t killRequestSent = wait(safeThreadFutureToFuture(db->rebootWorker(tokens[i], false, 0)));
|
||||
if (!killRequestSent) {
|
||||
result = false;
|
||||
fprintf(
|
||||
stderr, "ERROR: failed to send request to kill process `%s'.\n", tokens[i].toString().c_str());
|
||||
}
|
||||
addressesVec.push_back(tokens[i].toString());
|
||||
}
|
||||
addressesStr = boost::algorithm::join(addressesVec, ",");
|
||||
int64_t killRequestsSent = wait(safeThreadFutureToFuture(db->rebootWorker(addressesStr, false, 0)));
|
||||
if (!killRequestsSent) {
|
||||
result = false;
|
||||
fprintf(stderr,
|
||||
"ERROR: failed to send requests to kill processes `%s', please run the `kill’ command again to "
|
||||
"fetch latest addresses.\n",
|
||||
addressesStr.c_str());
|
||||
} else {
|
||||
printf("Attempted to kill %zu processes\n", tokens.size() - 1);
|
||||
}
|
||||
printf("Attempted to kill %zu processes\n", tokens.size() - 1);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
|
|
|
@ -18,6 +18,8 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "boost/algorithm/string.hpp"
|
||||
|
||||
#include "fdbcli/fdbcli.actor.h"
|
||||
|
||||
#include "fdbclient/FDBOptions.g.h"
|
||||
|
@ -37,8 +39,10 @@ ACTOR Future<bool> suspendCommandActor(Reference<IDatabase> db,
|
|||
std::map<Key, std::pair<Value, ClientLeaderRegInterface>>* address_interface) {
|
||||
ASSERT(tokens.size() >= 1);
|
||||
state bool result = true;
|
||||
state std::string addressesStr;
|
||||
if (tokens.size() == 1) {
|
||||
// initialize worker interfaces
|
||||
address_interface->clear();
|
||||
wait(getWorkerInterfaces(tr, address_interface));
|
||||
if (address_interface->size() == 0) {
|
||||
printf("\nNo addresses can be suspended.\n");
|
||||
|
@ -72,19 +76,23 @@ ACTOR Future<bool> suspendCommandActor(Reference<IDatabase> db,
|
|||
printUsage(tokens[0]);
|
||||
result = false;
|
||||
} else {
|
||||
int64_t timeout_ms = seconds * 1000;
|
||||
tr->setOption(FDBTransactionOptions::TIMEOUT, StringRef((uint8_t*)&timeout_ms, sizeof(int64_t)));
|
||||
std::vector<std::string> addressesVec;
|
||||
for (i = 2; i < tokens.size(); i++) {
|
||||
int64_t suspendRequestSent =
|
||||
wait(safeThreadFutureToFuture(db->rebootWorker(tokens[i], false, static_cast<int>(seconds))));
|
||||
if (!suspendRequestSent) {
|
||||
result = false;
|
||||
fprintf(stderr,
|
||||
"ERROR: failed to send request to suspend process `%s'.\n",
|
||||
tokens[i].toString().c_str());
|
||||
}
|
||||
addressesVec.push_back(tokens[i].toString());
|
||||
}
|
||||
addressesStr = boost::algorithm::join(addressesVec, ",");
|
||||
int64_t suspendRequestSent =
|
||||
wait(safeThreadFutureToFuture(db->rebootWorker(addressesStr, false, static_cast<int>(seconds))));
|
||||
if (!suspendRequestSent) {
|
||||
result = false;
|
||||
fprintf(
|
||||
stderr,
|
||||
"ERROR: failed to send requests to suspend processes `%s', please run the `suspend’ command "
|
||||
"to fetch latest addresses.\n",
|
||||
addressesStr.c_str());
|
||||
} else {
|
||||
printf("Attempted to suspend %zu processes\n", tokens.size() - 2);
|
||||
}
|
||||
printf("Attempted to suspend %zu processes\n", tokens.size() - 2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1544,62 +1544,9 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise) {
|
|||
|
||||
if (tokencmp(tokens[0], "kill")) {
|
||||
getTransaction(db, managementTenant, tr, options, intrans);
|
||||
if (tokens.size() == 1) {
|
||||
state ThreadFuture<RangeResult> wInterfF =
|
||||
tr->getRange(KeyRangeRef(LiteralStringRef("\xff\xff/worker_interfaces/"),
|
||||
LiteralStringRef("\xff\xff/worker_interfaces0")),
|
||||
CLIENT_KNOBS->TOO_MANY);
|
||||
RangeResult kvs = wait(makeInterruptable(safeThreadFutureToFuture(wInterfF)));
|
||||
ASSERT(!kvs.more);
|
||||
auto connectLock = makeReference<FlowLock>(CLIENT_KNOBS->CLI_CONNECT_PARALLELISM);
|
||||
std::vector<Future<Void>> addInterfs;
|
||||
for (auto it : kvs) {
|
||||
addInterfs.push_back(addInterface(&address_interface, connectLock, it));
|
||||
}
|
||||
wait(waitForAll(addInterfs));
|
||||
}
|
||||
if (tokens.size() == 1 || tokencmp(tokens[1], "list")) {
|
||||
if (address_interface.size() == 0) {
|
||||
printf("\nNo addresses can be killed.\n");
|
||||
} else if (address_interface.size() == 1) {
|
||||
printf("\nThe following address can be killed:\n");
|
||||
} else {
|
||||
printf("\nThe following %zu addresses can be killed:\n", address_interface.size());
|
||||
}
|
||||
for (auto it : address_interface) {
|
||||
printf("%s\n", printable(it.first).c_str());
|
||||
}
|
||||
printf("\n");
|
||||
} else if (tokencmp(tokens[1], "all")) {
|
||||
for (auto it : address_interface) {
|
||||
BinaryReader::fromStringRef<ClientWorkerInterface>(it.second.first, IncludeVersion())
|
||||
.reboot.send(RebootRequest());
|
||||
}
|
||||
if (address_interface.size() == 0) {
|
||||
fprintf(stderr,
|
||||
"ERROR: no processes to kill. You must run the `kill’ command before "
|
||||
"running `kill all’.\n");
|
||||
} else {
|
||||
printf("Attempted to kill %zu processes\n", address_interface.size());
|
||||
}
|
||||
} else {
|
||||
for (int i = 1; i < tokens.size(); i++) {
|
||||
if (!address_interface.count(tokens[i])) {
|
||||
fprintf(stderr, "ERROR: process `%s' not recognized.\n", printable(tokens[i]).c_str());
|
||||
is_error = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!is_error) {
|
||||
for (int i = 1; i < tokens.size(); i++) {
|
||||
BinaryReader::fromStringRef<ClientWorkerInterface>(address_interface[tokens[i]].first,
|
||||
IncludeVersion())
|
||||
.reboot.send(RebootRequest());
|
||||
}
|
||||
printf("Attempted to kill %zu processes\n", tokens.size() - 1);
|
||||
}
|
||||
}
|
||||
bool _result = wait(makeInterruptable(killCommandActor(db, tr, tokens, &address_interface)));
|
||||
if (!_result)
|
||||
is_error = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
|
|
|
@ -153,7 +153,11 @@ public:
|
|||
virtual void addref() = 0;
|
||||
virtual void delref() = 0;
|
||||
|
||||
// Management API, attempt to kill or suspend a process, return 1 for request sent out, 0 for failure
|
||||
// Management API, attempt to kill or suspend a process, return 1 for request being sent out, 0 for failure
|
||||
// The address string can be extended to a comma-delimited string like <addr1>,<addr2>...,<addrN> to send reboot
|
||||
// requests to multiple processes simultaneously
|
||||
// If multiple addresses are provided, it returns 1 for requests being sent out to all provided addresses.
|
||||
// On the contrary, if the client cannot connect to any of the given address, no requests will be sent out
|
||||
virtual ThreadFuture<int64_t> rebootWorker(const StringRef& address, bool check, int duration) = 0;
|
||||
// Management API, force the database to recover into DCID, causing the database to lose the most recently committed
|
||||
// mutations
|
||||
|
|
|
@ -890,8 +890,6 @@ ACTOR Future<MonitorLeaderInfo> monitorProxiesOneGeneration(
|
|||
state ClientLeaderRegInterface clientLeaderServer = clientLeaderServers[index];
|
||||
state OpenDatabaseCoordRequest req;
|
||||
|
||||
coordinator->set(clientLeaderServer);
|
||||
|
||||
req.clusterKey = cs.clusterKey();
|
||||
req.hostnames = cs.hostnames;
|
||||
req.coordinators = cs.coordinators();
|
||||
|
@ -922,16 +920,26 @@ ACTOR Future<MonitorLeaderInfo> monitorProxiesOneGeneration(
|
|||
incorrectTime = Optional<double>();
|
||||
}
|
||||
|
||||
state ErrorOr<CachedSerialization<ClientDBInfo>> rep;
|
||||
state Future<ErrorOr<CachedSerialization<ClientDBInfo>>> repFuture;
|
||||
if (clientLeaderServer.hostname.present()) {
|
||||
wait(store(rep,
|
||||
tryGetReplyFromHostname(req,
|
||||
clientLeaderServer.hostname.get(),
|
||||
WLTOKEN_CLIENTLEADERREG_OPENDATABASE,
|
||||
TaskPriority::CoordinationReply)));
|
||||
repFuture = tryGetReplyFromHostname(req,
|
||||
clientLeaderServer.hostname.get(),
|
||||
WLTOKEN_CLIENTLEADERREG_OPENDATABASE,
|
||||
TaskPriority::CoordinationReply);
|
||||
} else {
|
||||
wait(store(rep, clientLeaderServer.openDatabase.tryGetReply(req, TaskPriority::CoordinationReply)));
|
||||
repFuture = clientLeaderServer.openDatabase.tryGetReply(req, TaskPriority::CoordinationReply);
|
||||
}
|
||||
|
||||
// We need to update the coordinator even if it hasn't changed in case we are establishing a new connection in
|
||||
// FlowTransport. If so, setting the coordinator here forces protocol version monitoring to restart with the new
|
||||
// peer object.
|
||||
//
|
||||
// Both the tryGetReply call and the creation of the ClientLeaderRegInterface above should result in the Peer
|
||||
// object being created in FlowTransport. Having this peer is a prerequisite to us signaling the AsyncVar.
|
||||
coordinator->setUnconditional(clientLeaderServer);
|
||||
|
||||
state ErrorOr<CachedSerialization<ClientDBInfo>> rep = wait(repFuture);
|
||||
|
||||
if (rep.present()) {
|
||||
if (rep.get().read().forward.present()) {
|
||||
TraceEvent("MonitorProxiesForwarding")
|
||||
|
|
|
@ -30,6 +30,7 @@
|
|||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "boost/algorithm/string.hpp"
|
||||
#include "contrib/fmt-8.1.1/include/fmt/format.h"
|
||||
|
||||
#include "fdbclient/FDBOptions.g.h"
|
||||
|
@ -6983,24 +6984,30 @@ ACTOR Future<Optional<ProtocolVersion>> getCoordinatorProtocolFromConnectPacket(
|
|||
coordinatorAddress = coordinator->get().get().getLeader.getEndpoint().getPrimaryAddress();
|
||||
}
|
||||
|
||||
state Reference<AsyncVar<Optional<ProtocolVersion>> const> protocolVersion =
|
||||
state Optional<Reference<AsyncVar<Optional<ProtocolVersion>> const>> protocolVersion =
|
||||
FlowTransport::transport().getPeerProtocolAsyncVar(coordinatorAddress);
|
||||
|
||||
if (!protocolVersion.present()) {
|
||||
TraceEvent(SevWarnAlways, "GetCoordinatorProtocolPeerMissing").detail("Address", coordinatorAddress);
|
||||
wait(delay(FLOW_KNOBS->CONNECTION_MONITOR_TIMEOUT));
|
||||
return Optional<ProtocolVersion>();
|
||||
}
|
||||
|
||||
loop {
|
||||
if (protocolVersion->get().present() && protocolVersion->get() != expectedVersion) {
|
||||
return protocolVersion->get();
|
||||
if (protocolVersion.get()->get().present() && protocolVersion.get()->get() != expectedVersion) {
|
||||
return protocolVersion.get()->get();
|
||||
}
|
||||
|
||||
Future<Void> change = protocolVersion->onChange();
|
||||
if (!protocolVersion->get().present()) {
|
||||
Future<Void> change = protocolVersion.get()->onChange();
|
||||
if (!protocolVersion.get()->get().present()) {
|
||||
// If we still don't have any connection info after a timeout, retry sending the protocol version request
|
||||
change = timeout(change, FLOW_KNOBS->CONNECTION_MONITOR_TIMEOUT, Void());
|
||||
}
|
||||
|
||||
wait(change);
|
||||
|
||||
if (!protocolVersion->get().present()) {
|
||||
return protocolVersion->get();
|
||||
if (!protocolVersion.get()->get().present()) {
|
||||
return protocolVersion.get()->get();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -8143,8 +8150,10 @@ ACTOR Future<std::vector<CheckpointMetaData>> getCheckpointMetaData(Database cx,
|
|||
|
||||
futures.clear();
|
||||
for (index = 0; index < locations.size(); ++index) {
|
||||
futures.push_back(getCheckpointMetaDataInternal(
|
||||
GetCheckpointRequest(version, keys, format), locations[index].locations, timeout));
|
||||
futures.push_back(
|
||||
getCheckpointMetaDataInternal(GetCheckpointRequest(version, locations[index].range, format),
|
||||
locations[index].locations,
|
||||
timeout));
|
||||
TraceEvent("GetCheckpointShardBegin")
|
||||
.detail("Range", locations[index].range)
|
||||
.detail("Version", version)
|
||||
|
@ -8259,57 +8268,74 @@ ACTOR Future<bool> checkSafeExclusions(Database cx, std::vector<AddressExclusion
|
|||
return (ddCheck && coordinatorCheck);
|
||||
}
|
||||
|
||||
ACTOR Future<Void> addInterfaceActor(std::map<Key, std::pair<Value, ClientLeaderRegInterface>>* address_interface,
|
||||
Reference<FlowLock> connectLock,
|
||||
KeyValue kv) {
|
||||
// returns true if we can connect to the given worker interface
|
||||
ACTOR Future<bool> verifyInterfaceActor(Reference<FlowLock> connectLock, ClientWorkerInterface workerInterf) {
|
||||
wait(connectLock->take());
|
||||
state FlowLock::Releaser releaser(*connectLock);
|
||||
state ClientWorkerInterface workerInterf =
|
||||
BinaryReader::fromStringRef<ClientWorkerInterface>(kv.value, IncludeVersion());
|
||||
state ClientLeaderRegInterface leaderInterf(workerInterf.address());
|
||||
choose {
|
||||
when(Optional<LeaderInfo> rep =
|
||||
wait(brokenPromiseToNever(leaderInterf.getLeader.getReply(GetLeaderRequest())))) {
|
||||
StringRef ip_port =
|
||||
kv.key.endsWith(LiteralStringRef(":tls")) ? kv.key.removeSuffix(LiteralStringRef(":tls")) : kv.key;
|
||||
(*address_interface)[ip_port] = std::make_pair(kv.value, leaderInterf);
|
||||
|
||||
if (workerInterf.reboot.getEndpoint().addresses.secondaryAddress.present()) {
|
||||
Key full_ip_port2 =
|
||||
StringRef(workerInterf.reboot.getEndpoint().addresses.secondaryAddress.get().toString());
|
||||
StringRef ip_port2 = full_ip_port2.endsWith(LiteralStringRef(":tls"))
|
||||
? full_ip_port2.removeSuffix(LiteralStringRef(":tls"))
|
||||
: full_ip_port2;
|
||||
(*address_interface)[ip_port2] = std::make_pair(kv.value, leaderInterf);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
when(wait(delay(CLIENT_KNOBS->CLI_CONNECT_TIMEOUT))) {
|
||||
// NOTE : change timeout time here if necessary
|
||||
return false;
|
||||
}
|
||||
when(wait(delay(CLIENT_KNOBS->CLI_CONNECT_TIMEOUT))) {} // NOTE : change timeout time here if necessary
|
||||
}
|
||||
return Void();
|
||||
}
|
||||
|
||||
ACTOR static Future<int64_t> rebootWorkerActor(DatabaseContext* cx, ValueRef addr, bool check, int duration) {
|
||||
// ignore negative value
|
||||
if (duration < 0)
|
||||
duration = 0;
|
||||
// fetch the addresses of all workers
|
||||
state std::map<Key, std::pair<Value, ClientLeaderRegInterface>> address_interface;
|
||||
if (!cx->getConnectionRecord())
|
||||
return 0;
|
||||
// fetch all workers' addresses and interfaces from CC
|
||||
RangeResult kvs = wait(getWorkerInterfaces(cx->getConnectionRecord()));
|
||||
ASSERT(!kvs.more);
|
||||
// map worker network address to its interface
|
||||
state std::map<Key, ClientWorkerInterface> workerInterfaces;
|
||||
for (const auto& it : kvs) {
|
||||
ClientWorkerInterface workerInterf =
|
||||
BinaryReader::fromStringRef<ClientWorkerInterface>(it.value, IncludeVersion());
|
||||
Key primaryAddress =
|
||||
it.key.endsWith(LiteralStringRef(":tls")) ? it.key.removeSuffix(LiteralStringRef(":tls")) : it.key;
|
||||
workerInterfaces[primaryAddress] = workerInterf;
|
||||
// Also add mapping from a worker's second address(if present) to its interface
|
||||
if (workerInterf.reboot.getEndpoint().addresses.secondaryAddress.present()) {
|
||||
Key secondAddress =
|
||||
StringRef(workerInterf.reboot.getEndpoint().addresses.secondaryAddress.get().toString());
|
||||
secondAddress = secondAddress.endsWith(LiteralStringRef(":tls"))
|
||||
? secondAddress.removeSuffix(LiteralStringRef(":tls"))
|
||||
: secondAddress;
|
||||
workerInterfaces[secondAddress] = workerInterf;
|
||||
}
|
||||
}
|
||||
// split and get all the requested addresses to send reboot requests
|
||||
state std::vector<std::string> addressesVec;
|
||||
boost::algorithm::split(addressesVec, addr.toString(), boost::is_any_of(","));
|
||||
// Note: reuse this knob from fdbcli, change it if necessary
|
||||
Reference<FlowLock> connectLock(new FlowLock(CLIENT_KNOBS->CLI_CONNECT_PARALLELISM));
|
||||
std::vector<Future<Void>> addInterfs;
|
||||
for (const auto& it : kvs) {
|
||||
addInterfs.push_back(addInterfaceActor(&address_interface, connectLock, it));
|
||||
state std::vector<Future<bool>> verifyInterfs;
|
||||
for (const auto& requestedAddress : addressesVec) {
|
||||
// step 1: check that the requested address is in the worker list provided by CC
|
||||
if (!workerInterfaces.count(Key(requestedAddress)))
|
||||
return 0;
|
||||
// step 2: try to establish connections to the requested worker
|
||||
verifyInterfs.push_back(verifyInterfaceActor(connectLock, workerInterfaces[Key(requestedAddress)]));
|
||||
}
|
||||
// step 3: check if we can establish connections to all requested workers, return if not
|
||||
wait(waitForAll(verifyInterfs));
|
||||
for (const auto& f : verifyInterfs) {
|
||||
if (!f.get())
|
||||
return 0;
|
||||
}
|
||||
// step 4: After verifying we can connect to all requested workers, send reboot requests together
|
||||
for (const auto& address : addressesVec) {
|
||||
// Note: We want to make sure these requests are sent in parallel
|
||||
workerInterfaces[Key(address)].reboot.send(RebootRequest(false, check, duration));
|
||||
}
|
||||
wait(waitForAll(addInterfs));
|
||||
if (!address_interface.count(addr))
|
||||
return 0;
|
||||
|
||||
BinaryReader::fromStringRef<ClientWorkerInterface>(address_interface[addr].first, IncludeVersion())
|
||||
.reboot.send(RebootRequest(false, check, duration));
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -8853,6 +8879,12 @@ ACTOR Future<KeyRange> getChangeFeedRange(Reference<DatabaseContext> db, Databas
|
|||
} else {
|
||||
Optional<Value> val = wait(tr.get(rangeIDKey));
|
||||
if (!val.present()) {
|
||||
ASSERT(tr.getReadVersion().isReady());
|
||||
TraceEvent(SevDebug, "ChangeFeedNotRegisteredGet")
|
||||
.detail("FeedID", rangeID)
|
||||
.detail("FullFeedKey", rangeIDKey)
|
||||
.detail("BeginVersion", begin)
|
||||
.detail("ReadVersion", tr.getReadVersion().get());
|
||||
throw change_feed_not_registered();
|
||||
}
|
||||
if (db->changeFeedCache.size() > CLIENT_KNOBS->CHANGE_FEED_CACHE_SIZE) {
|
||||
|
@ -9255,6 +9287,12 @@ ACTOR static Future<Void> popChangeFeedBackup(Database cx, Key rangeID, Version
|
|||
tr.set(rangeIDKey, changeFeedValue(range, version, status));
|
||||
}
|
||||
} else {
|
||||
ASSERT(tr.getReadVersion().isReady());
|
||||
TraceEvent(SevDebug, "ChangeFeedNotRegisteredPop")
|
||||
.detail("FeedID", rangeID)
|
||||
.detail("FullFeedKey", rangeIDKey)
|
||||
.detail("PopVersion", version)
|
||||
.detail("ReadVersion", tr.getReadVersion().get());
|
||||
throw change_feed_not_registered();
|
||||
}
|
||||
wait(tr.commit());
|
||||
|
|
|
@ -33,11 +33,12 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
|
|||
init( VERSIONS_PER_SECOND, 1e6 );
|
||||
init( MAX_VERSIONS_IN_FLIGHT, 100 * VERSIONS_PER_SECOND );
|
||||
init( MAX_VERSIONS_IN_FLIGHT_FORCED, 6e5 * VERSIONS_PER_SECOND ); //one week of versions
|
||||
init( MAX_READ_TRANSACTION_LIFE_VERSIONS, 5 * VERSIONS_PER_SECOND ); if (randomize && BUGGIFY) MAX_READ_TRANSACTION_LIFE_VERSIONS = VERSIONS_PER_SECOND; else if (randomize && BUGGIFY) MAX_READ_TRANSACTION_LIFE_VERSIONS = std::max<int>(1, 0.1 * VERSIONS_PER_SECOND); else if( randomize && BUGGIFY ) MAX_READ_TRANSACTION_LIFE_VERSIONS = 10 * VERSIONS_PER_SECOND;
|
||||
init( ENABLE_VERSION_VECTOR, false );
|
||||
bool buggifyShortReadWindow = randomize && BUGGIFY && !ENABLE_VERSION_VECTOR;
|
||||
init( MAX_READ_TRANSACTION_LIFE_VERSIONS, 5 * VERSIONS_PER_SECOND ); if (randomize && BUGGIFY) MAX_READ_TRANSACTION_LIFE_VERSIONS = VERSIONS_PER_SECOND; else if (buggifyShortReadWindow) MAX_READ_TRANSACTION_LIFE_VERSIONS = std::max<int>(1, 0.1 * VERSIONS_PER_SECOND); else if( randomize && BUGGIFY ) MAX_READ_TRANSACTION_LIFE_VERSIONS = 10 * VERSIONS_PER_SECOND;
|
||||
init( MAX_WRITE_TRANSACTION_LIFE_VERSIONS, 5 * VERSIONS_PER_SECOND ); if (randomize && BUGGIFY) MAX_WRITE_TRANSACTION_LIFE_VERSIONS=std::max<int>(1, 1 * VERSIONS_PER_SECOND);
|
||||
init( MAX_COMMIT_BATCH_INTERVAL, 2.0 ); if( randomize && BUGGIFY ) MAX_COMMIT_BATCH_INTERVAL = 0.5; // Each commit proxy generates a CommitTransactionBatchRequest at least this often, so that versions always advance smoothly
|
||||
MAX_COMMIT_BATCH_INTERVAL = std::min(MAX_COMMIT_BATCH_INTERVAL, MAX_READ_TRANSACTION_LIFE_VERSIONS/double(2*VERSIONS_PER_SECOND)); // Ensure that the proxy commits 2 times every MAX_READ_TRANSACTION_LIFE_VERSIONS, otherwise the master will not give out versions fast enough
|
||||
init( ENABLE_VERSION_VECTOR, false );
|
||||
init( ENABLE_VERSION_VECTOR_TLOG_UNICAST, false );
|
||||
init( MAX_VERSION_RATE_MODIFIER, 0.1 );
|
||||
init( MAX_VERSION_RATE_OFFSET, VERSIONS_PER_SECOND ); // If the calculated version is more than this amount away from the expected version, it will be clamped to this value. This prevents huge version jumps.
|
||||
|
@ -464,7 +465,9 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
|
|||
init( TXN_STATE_SEND_AMOUNT, 4 );
|
||||
init( REPORT_TRANSACTION_COST_ESTIMATION_DELAY, 0.1 );
|
||||
init( PROXY_REJECT_BATCH_QUEUED_TOO_LONG, true );
|
||||
init( PROXY_USE_RESOLVER_PRIVATE_MUTATIONS, false ); if( !ENABLE_VERSION_VECTOR_TLOG_UNICAST && randomize && BUGGIFY ) PROXY_USE_RESOLVER_PRIVATE_MUTATIONS = deterministicRandom()->coinflip();
|
||||
|
||||
bool buggfyUseResolverPrivateMutations = randomize && BUGGIFY && !ENABLE_VERSION_VECTOR_TLOG_UNICAST;
|
||||
init( PROXY_USE_RESOLVER_PRIVATE_MUTATIONS, false ); if( buggfyUseResolverPrivateMutations ) PROXY_USE_RESOLVER_PRIVATE_MUTATIONS = deterministicRandom()->coinflip();
|
||||
|
||||
init( RESET_MASTER_BATCHES, 200 );
|
||||
init( RESET_RESOLVER_BATCHES, 200 );
|
||||
|
@ -681,6 +684,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
|
|||
init( FETCH_KEYS_PARALLELISM, 2 );
|
||||
init( FETCH_KEYS_LOWER_PRIORITY, 0 );
|
||||
init( FETCH_CHANGEFEED_PARALLELISM, 2 );
|
||||
init( SERVE_FETCH_CHECKPOINT_PARALLELISM, 4 );
|
||||
init( BUGGIFY_BLOCK_BYTES, 10000 );
|
||||
init( STORAGE_RECOVERY_VERSION_LAG_LIMIT, 2 * MAX_READ_TRANSACTION_LIFE_VERSIONS );
|
||||
init( STORAGE_COMMIT_BYTES, 10000000 ); if( randomize && BUGGIFY ) STORAGE_COMMIT_BYTES = 2000000;
|
||||
|
|
|
@ -632,6 +632,7 @@ public:
|
|||
int FETCH_KEYS_PARALLELISM;
|
||||
int FETCH_KEYS_LOWER_PRIORITY;
|
||||
int FETCH_CHANGEFEED_PARALLELISM;
|
||||
int SERVE_FETCH_CHECKPOINT_PARALLELISM;
|
||||
int BUGGIFY_BLOCK_BYTES;
|
||||
int64_t STORAGE_RECOVERY_VERSION_LAG_LIMIT;
|
||||
double STORAGE_DURABILITY_LAG_REJECT_THRESHOLD;
|
||||
|
|
|
@ -1911,8 +1911,14 @@ Reference<AsyncVar<bool>> FlowTransport::getDegraded() {
|
|||
//
|
||||
// Note that this function does not establish a connection to the peer. In order to obtain a peer's protocol
|
||||
// version, some other mechanism should be used to connect to that peer.
|
||||
Reference<AsyncVar<Optional<ProtocolVersion>> const> FlowTransport::getPeerProtocolAsyncVar(NetworkAddress addr) {
|
||||
return self->peers.at(addr)->protocolVersion;
|
||||
Optional<Reference<AsyncVar<Optional<ProtocolVersion>> const>> FlowTransport::getPeerProtocolAsyncVar(
|
||||
NetworkAddress addr) {
|
||||
auto itr = self->peers.find(addr);
|
||||
if (itr != self->peers.end()) {
|
||||
return itr->second->protocolVersion;
|
||||
} else {
|
||||
return Optional<Reference<AsyncVar<Optional<ProtocolVersion>> const>>();
|
||||
}
|
||||
}
|
||||
|
||||
void FlowTransport::resetConnection(NetworkAddress address) {
|
||||
|
|
|
@ -280,7 +280,7 @@ public:
|
|||
//
|
||||
// Note that this function does not establish a connection to the peer. In order to obtain a peer's protocol
|
||||
// version, some other mechanism should be used to connect to that peer.
|
||||
Reference<AsyncVar<Optional<ProtocolVersion>> const> getPeerProtocolAsyncVar(NetworkAddress addr);
|
||||
Optional<Reference<AsyncVar<Optional<ProtocolVersion>> const>> getPeerProtocolAsyncVar(NetworkAddress addr);
|
||||
|
||||
static FlowTransport& transport() {
|
||||
return *static_cast<FlowTransport*>((void*)g_network->global(INetwork::enFlowTransport));
|
||||
|
|
|
@ -186,6 +186,7 @@ struct BlobWorkerData : NonCopyable, ReferenceCounted<BlobWorkerData> {
|
|||
Promise<Void> fatalError;
|
||||
|
||||
FlowLock initialSnapshotLock;
|
||||
bool shuttingDown = false;
|
||||
|
||||
int changeFeedStreamReplyBufferSize = SERVER_KNOBS->BG_DELTA_FILE_TARGET_BYTES / 2;
|
||||
|
||||
|
@ -703,7 +704,8 @@ ACTOR Future<BlobFileIndex> writeSnapshot(Reference<BlobWorkerData> bwData,
|
|||
ACTOR Future<BlobFileIndex> dumpInitialSnapshotFromFDB(Reference<BlobWorkerData> bwData,
|
||||
Reference<GranuleMetadata> metadata,
|
||||
UID granuleID,
|
||||
Key cfKey) {
|
||||
Key cfKey,
|
||||
std::deque<Future<Void>>* inFlightPops) {
|
||||
if (BW_DEBUG) {
|
||||
fmt::print("Dumping snapshot from FDB for [{0} - {1})\n",
|
||||
metadata->keyRange.begin.printable(),
|
||||
|
@ -742,7 +744,7 @@ ACTOR Future<BlobFileIndex> dumpInitialSnapshotFromFDB(Reference<BlobWorkerData>
|
|||
DEBUG_KEY_RANGE("BlobWorkerFDBSnapshot", readVersion, metadata->keyRange, bwData->id);
|
||||
|
||||
// initial snapshot is committed in fdb, we can pop the change feed up to this version
|
||||
bwData->addActor.send(bwData->db->popChangeFeedMutations(cfKey, readVersion));
|
||||
inFlightPops->push_back(bwData->db->popChangeFeedMutations(cfKey, readVersion));
|
||||
return snapshotWriter.get();
|
||||
} catch (Error& e) {
|
||||
if (e.code() == error_code_operation_cancelled) {
|
||||
|
@ -1000,7 +1002,8 @@ static void handleCompletedDeltaFile(Reference<BlobWorkerData> bwData,
|
|||
BlobFileIndex completedDeltaFile,
|
||||
Key cfKey,
|
||||
Version cfStartVersion,
|
||||
std::deque<std::pair<Version, Version>>* rollbacksCompleted) {
|
||||
std::deque<std::pair<Version, Version>>* rollbacksCompleted,
|
||||
std::deque<Future<Void>>& inFlightPops) {
|
||||
metadata->files.deltaFiles.push_back(completedDeltaFile);
|
||||
ASSERT(metadata->durableDeltaVersion.get() < completedDeltaFile.version);
|
||||
metadata->durableDeltaVersion.set(completedDeltaFile.version);
|
||||
|
@ -1018,7 +1021,7 @@ static void handleCompletedDeltaFile(Reference<BlobWorkerData> bwData,
|
|||
// an async pop at its previousDurableVersion after opening the granule to guarantee it is eventually popped?
|
||||
Future<Void> popFuture = bwData->db->popChangeFeedMutations(cfKey, completedDeltaFile.version);
|
||||
// Do pop asynchronously
|
||||
bwData->addActor.send(popFuture);
|
||||
inFlightPops.push_back(popFuture);
|
||||
}
|
||||
while (!rollbacksCompleted->empty() && completedDeltaFile.version >= rollbacksCompleted->front().second) {
|
||||
if (BW_DEBUG) {
|
||||
|
@ -1285,6 +1288,7 @@ ACTOR Future<Void> blobGranuleUpdateFiles(Reference<BlobWorkerData> bwData,
|
|||
Reference<GranuleMetadata> metadata,
|
||||
Future<GranuleStartState> assignFuture) {
|
||||
state std::deque<InFlightFile> inFlightFiles;
|
||||
state std::deque<Future<Void>> inFlightPops;
|
||||
state Future<Void> oldChangeFeedFuture;
|
||||
state Future<Void> changeFeedFuture;
|
||||
state GranuleStartState startState;
|
||||
|
@ -1369,7 +1373,8 @@ ACTOR Future<Void> blobGranuleUpdateFiles(Reference<BlobWorkerData> bwData,
|
|||
metadata->durableSnapshotVersion.set(startState.blobFilesToSnapshot.get().snapshotFiles.back().version);
|
||||
} else {
|
||||
ASSERT(startState.previousDurableVersion == invalidVersion);
|
||||
BlobFileIndex fromFDB = wait(dumpInitialSnapshotFromFDB(bwData, metadata, startState.granuleID, cfKey));
|
||||
BlobFileIndex fromFDB =
|
||||
wait(dumpInitialSnapshotFromFDB(bwData, metadata, startState.granuleID, cfKey, &inFlightPops));
|
||||
newSnapshotFile = fromFDB;
|
||||
ASSERT(startState.changeFeedStartVersion <= fromFDB.version);
|
||||
startVersion = newSnapshotFile.version;
|
||||
|
@ -1446,7 +1451,8 @@ ACTOR Future<Void> blobGranuleUpdateFiles(Reference<BlobWorkerData> bwData,
|
|||
completedFile,
|
||||
cfKey,
|
||||
startState.changeFeedStartVersion,
|
||||
&rollbacksCompleted);
|
||||
&rollbacksCompleted,
|
||||
inFlightPops);
|
||||
}
|
||||
|
||||
inFlightFiles.pop_front();
|
||||
|
@ -1456,6 +1462,12 @@ ACTOR Future<Void> blobGranuleUpdateFiles(Reference<BlobWorkerData> bwData,
|
|||
}
|
||||
}
|
||||
|
||||
// also check outstanding pops for errors
|
||||
while (!inFlightPops.empty() && inFlightPops.front().isReady()) {
|
||||
wait(inFlightPops.front());
|
||||
inFlightPops.pop_front();
|
||||
}
|
||||
|
||||
// inject delay into reading change feed stream
|
||||
if (BUGGIFY_WITH_PROB(0.001)) {
|
||||
wait(delay(deterministicRandom()->random01(), TaskPriority::BlobWorkerReadChangeFeed));
|
||||
|
@ -1849,7 +1861,8 @@ ACTOR Future<Void> blobGranuleUpdateFiles(Reference<BlobWorkerData> bwData,
|
|||
completedFile,
|
||||
cfKey,
|
||||
startState.changeFeedStartVersion,
|
||||
&rollbacksCompleted);
|
||||
&rollbacksCompleted,
|
||||
inFlightPops);
|
||||
}
|
||||
|
||||
inFlightFiles.pop_front();
|
||||
|
@ -2928,6 +2941,10 @@ ACTOR Future<Void> handleRangeAssign(Reference<BlobWorkerData> bwData,
|
|||
return Void();
|
||||
} catch (Error& e) {
|
||||
if (e.code() == error_code_operation_cancelled) {
|
||||
if (!bwData->shuttingDown) {
|
||||
// the cancelled was because the granule open was cancelled, not because the whole blob worker was.
|
||||
req.reply.sendError(granule_assignment_conflict());
|
||||
}
|
||||
throw e;
|
||||
}
|
||||
if (BW_DEBUG) {
|
||||
|
@ -3256,6 +3273,7 @@ ACTOR Future<Void> blobWorker(BlobWorkerInterface bwInterf,
|
|||
}
|
||||
}
|
||||
when(wait(collection)) {
|
||||
self->shuttingDown = true;
|
||||
TraceEvent("BlobWorkerActorCollectionError", self->id);
|
||||
ASSERT(false);
|
||||
throw internal_error();
|
||||
|
@ -3273,6 +3291,7 @@ ACTOR Future<Void> blobWorker(BlobWorkerInterface bwInterf,
|
|||
}
|
||||
}
|
||||
} catch (Error& e) {
|
||||
self->shuttingDown = true;
|
||||
if (e.code() == error_code_operation_cancelled) {
|
||||
self->granuleMetadata.clear();
|
||||
throw;
|
||||
|
@ -3283,6 +3302,8 @@ ACTOR Future<Void> blobWorker(BlobWorkerInterface bwInterf,
|
|||
TraceEvent("BlobWorkerDied", self->id).errorUnsuppressed(e);
|
||||
}
|
||||
|
||||
self->shuttingDown = true;
|
||||
|
||||
wait(self->granuleMetadata.clearAsync());
|
||||
return Void();
|
||||
}
|
||||
|
|
|
@ -470,20 +470,20 @@ public:
|
|||
self->healthyZone.set(initTeams->initHealthyZoneValue);
|
||||
// SOMEDAY: If some servers have teams and not others (or some servers have more data than others) and there is
|
||||
// an address/locality collision, should we preferentially mark the least used server as undesirable?
|
||||
for (auto& server : initTeams->allServers) {
|
||||
if (self->shouldHandleServer(server.first)) {
|
||||
if (!self->isValidLocality(self->configuration.storagePolicy, server.first.locality)) {
|
||||
for (auto& [server, procClass] : initTeams->allServers) {
|
||||
if (self->shouldHandleServer(server)) {
|
||||
if (!self->isValidLocality(self->configuration.storagePolicy, server.locality)) {
|
||||
TraceEvent(SevWarnAlways, "MissingLocality")
|
||||
.detail("Server", server.first.uniqueID)
|
||||
.detail("Locality", server.first.locality.toString());
|
||||
auto addr = server.first.stableAddress();
|
||||
.detail("Server", server.uniqueID)
|
||||
.detail("Locality", server.locality.toString());
|
||||
auto addr = server.stableAddress();
|
||||
self->invalidLocalityAddr.insert(AddressExclusion(addr.ip, addr.port));
|
||||
if (self->checkInvalidLocalities.isReady()) {
|
||||
self->checkInvalidLocalities = checkAndRemoveInvalidLocalityAddr(self);
|
||||
self->addActor.send(self->checkInvalidLocalities);
|
||||
}
|
||||
}
|
||||
self->addServer(server.first, server.second, self->serverTrackerErrorOut, 0, *ddEnabledState);
|
||||
self->addServer(server, procClass, self->serverTrackerErrorOut, 0, *ddEnabledState);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -516,13 +516,14 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
for (auto i = self->server_info.begin(); i != self->server_info.end(); ++i) {
|
||||
if (!self->server_status.get(i->first).isUnhealthy()) {
|
||||
for (auto& [serverID, server] : self->server_info) {
|
||||
if (!self->server_status.get(serverID).isUnhealthy()) {
|
||||
++serverCount;
|
||||
LocalityData const& serverLocation = i->second->getLastKnownInterface().locality;
|
||||
LocalityData const& serverLocation = server->getLastKnownInterface().locality;
|
||||
machines.insert(serverLocation.zoneId());
|
||||
}
|
||||
}
|
||||
|
||||
uniqueMachines = machines.size();
|
||||
TraceEvent("BuildTeams", self->distributorId)
|
||||
.detail("ServerCount", self->server_info.size())
|
||||
|
@ -596,8 +597,8 @@ public:
|
|||
int maxMachineTeams = SERVER_KNOBS->MAX_TEAMS_PER_SERVER * totalHealthyMachineCount;
|
||||
int healthyMachineTeamCount = self->getHealthyMachineTeamCount();
|
||||
|
||||
std::pair<uint64_t, uint64_t> minMaxTeamsOnServer = self->calculateMinMaxServerTeamsOnServer();
|
||||
std::pair<uint64_t, uint64_t> minMaxMachineTeamsOnMachine =
|
||||
auto [minTeamsOnServer, maxTeamsOnServer] = self->calculateMinMaxServerTeamsOnServer();
|
||||
auto [minMachineTeamsOnMachine, maxMachineTeamsOnMachine] =
|
||||
self->calculateMinMaxMachineTeamsOnMachine();
|
||||
|
||||
TraceEvent("TeamCollectionInfo", self->distributorId)
|
||||
|
@ -613,10 +614,10 @@ public:
|
|||
.detail("DesiredMachineTeams", desiredMachineTeams)
|
||||
.detail("MaxMachineTeams", maxMachineTeams)
|
||||
.detail("TotalHealthyMachines", totalHealthyMachineCount)
|
||||
.detail("MinTeamsOnServer", minMaxTeamsOnServer.first)
|
||||
.detail("MaxTeamsOnServer", minMaxTeamsOnServer.second)
|
||||
.detail("MinMachineTeamsOnMachine", minMaxMachineTeamsOnMachine.first)
|
||||
.detail("MaxMachineTeamsOnMachine", minMaxMachineTeamsOnMachine.second)
|
||||
.detail("MinTeamsOnServer", minTeamsOnServer)
|
||||
.detail("MaxTeamsOnServer", maxTeamsOnServer)
|
||||
.detail("MinMachineTeamsOnMachine", maxMachineTeamsOnMachine)
|
||||
.detail("MaxMachineTeamsOnMachine", minMachineTeamsOnMachine)
|
||||
.detail("DoBuildTeams", self->doBuildTeams)
|
||||
.trackLatest(self->teamCollectionInfoEventHolder->trackingKey);
|
||||
}
|
||||
|
@ -3259,24 +3260,24 @@ void DDTeamCollection::traceServerInfo() const {
|
|||
int i = 0;
|
||||
|
||||
TraceEvent("ServerInfo", distributorId).detail("Size", server_info.size());
|
||||
for (auto& server : server_info) {
|
||||
for (auto& [serverID, server] : server_info) {
|
||||
TraceEvent("ServerInfo", distributorId)
|
||||
.detail("ServerInfoIndex", i++)
|
||||
.detail("ServerID", server.first.toString())
|
||||
.detail("ServerTeamOwned", server.second->getTeams().size())
|
||||
.detail("MachineID", server.second->machine->machineID.contents().toString())
|
||||
.detail("StoreType", server.second->getStoreType().toString())
|
||||
.detail("InDesiredDC", server.second->isInDesiredDC());
|
||||
.detail("ServerID", serverID.toString())
|
||||
.detail("ServerTeamOwned", server->getTeams().size())
|
||||
.detail("MachineID", server->machine->machineID.contents().toString())
|
||||
.detail("StoreType", server->getStoreType().toString())
|
||||
.detail("InDesiredDC", server->isInDesiredDC());
|
||||
}
|
||||
for (auto& server : server_info) {
|
||||
const UID& uid = server.first;
|
||||
for (auto& [serverID, server] : server_info) {
|
||||
TraceEvent("ServerStatus", distributorId)
|
||||
.detail("ServerID", uid)
|
||||
.detail("Healthy", !server_status.get(uid).isUnhealthy())
|
||||
.detail("MachineIsValid", get(server_info, uid)->machine.isValid())
|
||||
.detail("ServerID", serverID)
|
||||
.detail("Healthy", !server_status.get(serverID).isUnhealthy())
|
||||
.detail("MachineIsValid", get(server_info, serverID)->machine.isValid())
|
||||
.detail("MachineTeamSize",
|
||||
get(server_info, uid)->machine.isValid() ? get(server_info, uid)->machine->machineTeams.size()
|
||||
: -1);
|
||||
get(server_info, serverID)->machine.isValid()
|
||||
? get(server_info, serverID)->machine->machineTeams.size()
|
||||
: -1);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -3840,16 +3841,16 @@ void DDTeamCollection::addTeam(const std::vector<Reference<TCServerInfo>>& newTe
|
|||
|
||||
// For a good team, we add it to teams and create machine team for it when necessary
|
||||
teams.push_back(teamInfo);
|
||||
for (int i = 0; i < newTeamServers.size(); ++i) {
|
||||
newTeamServers[i]->addTeam(teamInfo);
|
||||
for (auto& server : newTeamServers) {
|
||||
server->addTeam(teamInfo);
|
||||
}
|
||||
|
||||
// Find or create machine team for the server team
|
||||
// Add the reference of machineTeam (with machineIDs) into process team
|
||||
std::vector<Standalone<StringRef>> machineIDs;
|
||||
for (auto server = newTeamServers.begin(); server != newTeamServers.end(); ++server) {
|
||||
ASSERT_WE_THINK((*server)->machine.isValid());
|
||||
machineIDs.push_back((*server)->machine->machineID);
|
||||
for (auto& server : newTeamServers) {
|
||||
ASSERT_WE_THINK(server->machine.isValid());
|
||||
machineIDs.push_back(server->machine->machineID);
|
||||
}
|
||||
sort(machineIDs.begin(), machineIDs.end());
|
||||
Reference<TCMachineTeamInfo> machineTeamInfo = findMachineTeam(machineIDs);
|
||||
|
@ -3909,9 +3910,9 @@ Reference<TCMachineTeamInfo> DDTeamCollection::addMachineTeam(std::vector<Standa
|
|||
|
||||
int DDTeamCollection::constructMachinesFromServers() {
|
||||
int totalServerIndex = 0;
|
||||
for (auto i = server_info.begin(); i != server_info.end(); ++i) {
|
||||
if (!server_status.get(i->first).isUnhealthy()) {
|
||||
checkAndCreateMachine(i->second);
|
||||
for (auto& [serverID, server] : server_info) {
|
||||
if (!server_status.get(serverID).isUnhealthy()) {
|
||||
checkAndCreateMachine(server);
|
||||
totalServerIndex++;
|
||||
}
|
||||
}
|
||||
|
@ -4023,26 +4024,26 @@ void DDTeamCollection::traceAllInfo(bool shouldPrint) const {
|
|||
void DDTeamCollection::rebuildMachineLocalityMap() {
|
||||
machineLocalityMap.clear();
|
||||
int numHealthyMachine = 0;
|
||||
for (auto machine = machine_info.begin(); machine != machine_info.end(); ++machine) {
|
||||
if (machine->second->serversOnMachine.empty()) {
|
||||
for (auto& [_, machine] : machine_info) {
|
||||
if (machine->serversOnMachine.empty()) {
|
||||
TraceEvent(SevWarn, "RebuildMachineLocalityMapError")
|
||||
.detail("Machine", machine->second->machineID.toString())
|
||||
.detail("Machine", machine->machineID.toString())
|
||||
.detail("NumServersOnMachine", 0);
|
||||
continue;
|
||||
}
|
||||
if (!isMachineHealthy(machine->second)) {
|
||||
if (!isMachineHealthy(machine)) {
|
||||
continue;
|
||||
}
|
||||
Reference<TCServerInfo> representativeServer = machine->second->serversOnMachine[0];
|
||||
Reference<TCServerInfo> representativeServer = machine->serversOnMachine[0];
|
||||
auto& locality = representativeServer->getLastKnownInterface().locality;
|
||||
if (!isValidLocality(configuration.storagePolicy, locality)) {
|
||||
TraceEvent(SevWarn, "RebuildMachineLocalityMapError")
|
||||
.detail("Machine", machine->second->machineID.toString())
|
||||
.detail("Machine", machine->machineID.toString())
|
||||
.detail("InvalidLocality", locality.toString());
|
||||
continue;
|
||||
}
|
||||
const LocalityEntry& localityEntry = machineLocalityMap.add(locality, &representativeServer->getId());
|
||||
machine->second->localityEntry = localityEntry;
|
||||
machine->localityEntry = localityEntry;
|
||||
++numHealthyMachine;
|
||||
}
|
||||
}
|
||||
|
@ -4422,12 +4423,12 @@ bool DDTeamCollection::notEnoughMachineTeamsForAMachine() const {
|
|||
SERVER_KNOBS->TR_FLAG_REMOVE_MT_WITH_MOST_TEAMS
|
||||
? (SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER * (configuration.storageTeamSize + 1)) / 2
|
||||
: SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER;
|
||||
for (auto& m : machine_info) {
|
||||
for (auto& [_, machine] : machine_info) {
|
||||
// If SERVER_KNOBS->TR_FLAG_REMOVE_MT_WITH_MOST_TEAMS is false,
|
||||
// The desired machine team number is not the same with the desired server team number
|
||||
// in notEnoughTeamsForAServer() below, because the machineTeamRemover() does not
|
||||
// remove a machine team with the most number of machine teams.
|
||||
if (m.second->machineTeams.size() < targetMachineTeamNumPerMachine && isMachineHealthy(m.second)) {
|
||||
if (machine->machineTeams.size() < targetMachineTeamNumPerMachine && isMachineHealthy(machine)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
@ -4445,8 +4446,8 @@ bool DDTeamCollection::notEnoughTeamsForAServer() const {
|
|||
// (#servers * DESIRED_TEAMS_PER_SERVER * storageTeamSize) / #servers.
|
||||
int targetTeamNumPerServer = (SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER * (configuration.storageTeamSize + 1)) / 2;
|
||||
ASSERT_GT(targetTeamNumPerServer, 0);
|
||||
for (auto& s : server_info) {
|
||||
if (s.second->getTeams().size() < targetTeamNumPerServer && !server_status.get(s.first).isUnhealthy()) {
|
||||
for (auto& [serverID, server] : server_info) {
|
||||
if (server->getTeams().size() < targetTeamNumPerServer && !server_status.get(serverID).isUnhealthy()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
@ -4653,11 +4654,11 @@ void DDTeamCollection::traceTeamCollectionInfo() const {
|
|||
void DDTeamCollection::noHealthyTeams() const {
|
||||
std::set<UID> desiredServerSet;
|
||||
std::string desc;
|
||||
for (auto i = server_info.begin(); i != server_info.end(); ++i) {
|
||||
ASSERT(i->first == i->second->getId());
|
||||
if (!server_status.get(i->first).isFailed) {
|
||||
desiredServerSet.insert(i->first);
|
||||
desc += i->first.shortString() + " (" + i->second->getLastKnownInterface().toString() + "), ";
|
||||
for (auto& [serverID, server] : server_info) {
|
||||
ASSERT(serverID == server->getId());
|
||||
if (!server_status.get(serverID).isFailed) {
|
||||
desiredServerSet.insert(serverID);
|
||||
desc += serverID.shortString() + " (" + server->getLastKnownInterface().toString() + "), ";
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -390,8 +390,8 @@ bool canLaunchDest(const std::vector<std::pair<Reference<IDataDistributionTeam>,
|
|||
return true;
|
||||
}
|
||||
int workFactor = getDestWorkFactor();
|
||||
for (auto& team : candidateTeams) {
|
||||
for (UID id : team.first->getServerIDs()) {
|
||||
for (auto& [team, _] : candidateTeams) {
|
||||
for (UID id : team->getServerIDs()) {
|
||||
if (!busymapDest[id].canLaunch(priority, workFactor)) {
|
||||
return false;
|
||||
}
|
||||
|
@ -413,8 +413,8 @@ void launchDest(RelocateData& relocation,
|
|||
std::map<UID, Busyness>& destBusymap) {
|
||||
ASSERT(relocation.completeDests.empty());
|
||||
int destWorkFactor = getDestWorkFactor();
|
||||
for (auto& team : candidateTeams) {
|
||||
for (UID id : team.first->getServerIDs()) {
|
||||
for (auto& [team, _] : candidateTeams) {
|
||||
for (UID id : team->getServerIDs()) {
|
||||
relocation.completeDests.push_back(id);
|
||||
destBusymap[id].addWork(relocation.priority, destWorkFactor);
|
||||
}
|
||||
|
|
|
@ -846,8 +846,10 @@ ACTOR Future<Void> rocksDBMetricLogger(std::shared_ptr<rocksdb::Statistics> stat
|
|||
}
|
||||
}
|
||||
|
||||
void logRocksDBError(const rocksdb::Status& status, const std::string& method) {
|
||||
auto level = status.IsTimedOut() ? SevWarn : SevError;
|
||||
void logRocksDBError(const rocksdb::Status& status,
|
||||
const std::string& method,
|
||||
Optional<Severity> sev = Optional<Severity>()) {
|
||||
Severity level = sev.present() ? sev.get() : (status.IsTimedOut() ? SevWarn : SevError);
|
||||
TraceEvent e(level, "RocksDBError");
|
||||
e.detail("Error", status.ToString()).detail("Method", method).detail("RocksDBSeverity", status.severity());
|
||||
if (status.IsIOError()) {
|
||||
|
@ -867,9 +869,28 @@ Error statusToError(const rocksdb::Status& s) {
|
|||
|
||||
struct RocksDBKeyValueStore : IKeyValueStore {
|
||||
struct Writer : IThreadPoolReceiver {
|
||||
struct CheckpointAction : TypedAction<Writer, CheckpointAction> {
|
||||
CheckpointAction(const CheckpointRequest& request) : request(request) {}
|
||||
|
||||
double getTimeEstimate() const override { return SERVER_KNOBS->COMMIT_TIME_ESTIMATE; }
|
||||
|
||||
const CheckpointRequest request;
|
||||
ThreadReturnPromise<CheckpointMetaData> reply;
|
||||
};
|
||||
|
||||
struct RestoreAction : TypedAction<Writer, RestoreAction> {
|
||||
RestoreAction(const std::string& path, const std::vector<CheckpointMetaData>& checkpoints)
|
||||
: path(path), checkpoints(checkpoints) {}
|
||||
|
||||
double getTimeEstimate() const override { return SERVER_KNOBS->COMMIT_TIME_ESTIMATE; }
|
||||
|
||||
const std::string path;
|
||||
const std::vector<CheckpointMetaData> checkpoints;
|
||||
ThreadReturnPromise<Void> done;
|
||||
};
|
||||
|
||||
DB& db;
|
||||
CF& cf;
|
||||
|
||||
UID id;
|
||||
std::shared_ptr<rocksdb::RateLimiter> rateLimiter;
|
||||
std::shared_ptr<ReadIteratorPool> readIterPool;
|
||||
|
@ -1153,127 +1174,9 @@ struct RocksDBKeyValueStore : IKeyValueStore {
|
|||
a.done.send(Void());
|
||||
}
|
||||
|
||||
struct CheckpointAction : TypedAction<Writer, CheckpointAction> {
|
||||
CheckpointAction(const CheckpointRequest& request) : request(request) {}
|
||||
void action(CheckpointAction& a);
|
||||
|
||||
double getTimeEstimate() const override { return SERVER_KNOBS->COMMIT_TIME_ESTIMATE; }
|
||||
|
||||
const CheckpointRequest request;
|
||||
ThreadReturnPromise<CheckpointMetaData> reply;
|
||||
};
|
||||
|
||||
void action(CheckpointAction& a) {
|
||||
TraceEvent("RocksDBServeCheckpointBegin", id)
|
||||
.detail("MinVersion", a.request.version)
|
||||
.detail("Range", a.request.range.toString())
|
||||
.detail("Format", static_cast<int>(a.request.format))
|
||||
.detail("CheckpointDir", a.request.checkpointDir);
|
||||
|
||||
rocksdb::Checkpoint* checkpoint;
|
||||
rocksdb::Status s = rocksdb::Checkpoint::Create(db, &checkpoint);
|
||||
if (!s.ok()) {
|
||||
logRocksDBError(s, "Checkpoint");
|
||||
a.reply.sendError(statusToError(s));
|
||||
return;
|
||||
}
|
||||
|
||||
rocksdb::PinnableSlice value;
|
||||
rocksdb::ReadOptions readOptions = getReadOptions();
|
||||
s = db->Get(readOptions, cf, toSlice(persistVersion), &value);
|
||||
|
||||
if (!s.ok() && !s.IsNotFound()) {
|
||||
logRocksDBError(s, "Checkpoint");
|
||||
a.reply.sendError(statusToError(s));
|
||||
return;
|
||||
}
|
||||
|
||||
const Version version = s.IsNotFound()
|
||||
? latestVersion
|
||||
: BinaryReader::fromStringRef<Version>(toStringRef(value), Unversioned());
|
||||
|
||||
TraceEvent("RocksDBServeCheckpointVersion", id)
|
||||
.detail("CheckpointVersion", a.request.version)
|
||||
.detail("PersistVersion", version);
|
||||
|
||||
// TODO: set the range as the actual shard range.
|
||||
CheckpointMetaData res(version, a.request.range, a.request.format, a.request.checkpointID);
|
||||
const std::string& checkpointDir = a.request.checkpointDir;
|
||||
|
||||
if (a.request.format == RocksDBColumnFamily) {
|
||||
rocksdb::ExportImportFilesMetaData* pMetadata;
|
||||
platform::eraseDirectoryRecursive(checkpointDir);
|
||||
const std::string cwd = platform::getWorkingDirectory() + "/";
|
||||
s = checkpoint->ExportColumnFamily(cf, checkpointDir, &pMetadata);
|
||||
|
||||
if (!s.ok()) {
|
||||
logRocksDBError(s, "Checkpoint");
|
||||
a.reply.sendError(statusToError(s));
|
||||
return;
|
||||
}
|
||||
|
||||
populateMetaData(&res, *pMetadata);
|
||||
delete pMetadata;
|
||||
TraceEvent("RocksDBServeCheckpointSuccess", id)
|
||||
.detail("CheckpointMetaData", res.toString())
|
||||
.detail("RocksDBCF", getRocksCF(res).toString());
|
||||
} else {
|
||||
throw not_implemented();
|
||||
}
|
||||
|
||||
res.setState(CheckpointMetaData::Complete);
|
||||
a.reply.send(res);
|
||||
}
|
||||
|
||||
struct RestoreAction : TypedAction<Writer, RestoreAction> {
|
||||
RestoreAction(const std::string& path, const std::vector<CheckpointMetaData>& checkpoints)
|
||||
: path(path), checkpoints(checkpoints) {}
|
||||
|
||||
double getTimeEstimate() const override { return SERVER_KNOBS->COMMIT_TIME_ESTIMATE; }
|
||||
|
||||
const std::string path;
|
||||
const std::vector<CheckpointMetaData> checkpoints;
|
||||
ThreadReturnPromise<Void> done;
|
||||
};
|
||||
|
||||
void action(RestoreAction& a) {
|
||||
TraceEvent("RocksDBServeRestoreBegin", id).detail("Path", a.path);
|
||||
|
||||
// TODO: Fail gracefully.
|
||||
ASSERT(!a.checkpoints.empty());
|
||||
|
||||
if (a.checkpoints[0].format == RocksDBColumnFamily) {
|
||||
ASSERT_EQ(a.checkpoints.size(), 1);
|
||||
TraceEvent("RocksDBServeRestoreCF", id)
|
||||
.detail("Path", a.path)
|
||||
.detail("Checkpoint", a.checkpoints[0].toString())
|
||||
.detail("RocksDBCF", getRocksCF(a.checkpoints[0]).toString());
|
||||
|
||||
auto options = getOptions();
|
||||
rocksdb::Status status = rocksdb::DB::Open(options, a.path, &db);
|
||||
|
||||
if (!status.ok()) {
|
||||
logRocksDBError(status, "Restore");
|
||||
a.done.sendError(statusToError(status));
|
||||
return;
|
||||
}
|
||||
|
||||
rocksdb::ExportImportFilesMetaData metaData = getMetaData(a.checkpoints[0]);
|
||||
rocksdb::ImportColumnFamilyOptions importOptions;
|
||||
importOptions.move_files = true;
|
||||
status = db->CreateColumnFamilyWithImport(
|
||||
getCFOptions(), SERVER_KNOBS->DEFAULT_FDB_ROCKSDB_COLUMN_FAMILY, importOptions, metaData, &cf);
|
||||
|
||||
if (!status.ok()) {
|
||||
logRocksDBError(status, "Restore");
|
||||
a.done.sendError(statusToError(status));
|
||||
} else {
|
||||
TraceEvent(SevInfo, "RocksDB").detail("Path", a.path).detail("Method", "Restore");
|
||||
a.done.send(Void());
|
||||
}
|
||||
} else {
|
||||
throw not_implemented();
|
||||
}
|
||||
}
|
||||
void action(RestoreAction& a);
|
||||
};
|
||||
|
||||
struct Reader : IThreadPoolReceiver {
|
||||
|
@ -2043,6 +1946,171 @@ struct RocksDBKeyValueStore : IKeyValueStore {
|
|||
}
|
||||
};
|
||||
|
||||
void RocksDBKeyValueStore::Writer::action(CheckpointAction& a) {
|
||||
TraceEvent("RocksDBServeCheckpointBegin", id)
|
||||
.detail("MinVersion", a.request.version)
|
||||
.detail("Range", a.request.range.toString())
|
||||
.detail("Format", static_cast<int>(a.request.format))
|
||||
.detail("CheckpointDir", a.request.checkpointDir);
|
||||
|
||||
rocksdb::Checkpoint* checkpoint;
|
||||
rocksdb::Status s = rocksdb::Checkpoint::Create(db, &checkpoint);
|
||||
if (!s.ok()) {
|
||||
logRocksDBError(s, "Checkpoint");
|
||||
a.reply.sendError(statusToError(s));
|
||||
return;
|
||||
}
|
||||
|
||||
rocksdb::PinnableSlice value;
|
||||
rocksdb::ReadOptions readOptions = getReadOptions();
|
||||
s = db->Get(readOptions, cf, toSlice(persistVersion), &value);
|
||||
|
||||
if (!s.ok() && !s.IsNotFound()) {
|
||||
logRocksDBError(s, "Checkpoint");
|
||||
a.reply.sendError(statusToError(s));
|
||||
return;
|
||||
}
|
||||
|
||||
const Version version =
|
||||
s.IsNotFound() ? latestVersion : BinaryReader::fromStringRef<Version>(toStringRef(value), Unversioned());
|
||||
|
||||
ASSERT(a.request.version == version || a.request.version == latestVersion);
|
||||
TraceEvent(SevDebug, "RocksDBServeCheckpointVersion", id)
|
||||
.detail("CheckpointVersion", a.request.version)
|
||||
.detail("PersistVersion", version);
|
||||
|
||||
// TODO: set the range as the actual shard range.
|
||||
CheckpointMetaData res(version, a.request.range, a.request.format, a.request.checkpointID);
|
||||
const std::string& checkpointDir = abspath(a.request.checkpointDir);
|
||||
|
||||
if (a.request.format == RocksDBColumnFamily) {
|
||||
rocksdb::ExportImportFilesMetaData* pMetadata;
|
||||
platform::eraseDirectoryRecursive(checkpointDir);
|
||||
s = checkpoint->ExportColumnFamily(cf, checkpointDir, &pMetadata);
|
||||
if (!s.ok()) {
|
||||
logRocksDBError(s, "ExportColumnFamily");
|
||||
a.reply.sendError(statusToError(s));
|
||||
return;
|
||||
}
|
||||
|
||||
populateMetaData(&res, *pMetadata);
|
||||
delete pMetadata;
|
||||
TraceEvent("RocksDBServeCheckpointSuccess", id)
|
||||
.detail("CheckpointMetaData", res.toString())
|
||||
.detail("RocksDBCF", getRocksCF(res).toString());
|
||||
} else if (a.request.format == RocksDB) {
|
||||
platform::eraseDirectoryRecursive(checkpointDir);
|
||||
uint64_t debugCheckpointSeq = -1;
|
||||
s = checkpoint->CreateCheckpoint(checkpointDir, /*log_size_for_flush=*/0, &debugCheckpointSeq);
|
||||
if (!s.ok()) {
|
||||
logRocksDBError(s, "Checkpoint");
|
||||
a.reply.sendError(statusToError(s));
|
||||
return;
|
||||
}
|
||||
|
||||
RocksDBCheckpoint rcp;
|
||||
rcp.checkpointDir = checkpointDir;
|
||||
rcp.sstFiles = platform::listFiles(checkpointDir, ".sst");
|
||||
res.serializedCheckpoint = ObjectWriter::toValue(rcp, IncludeVersion());
|
||||
TraceEvent("RocksDBCheckpointCreated", id)
|
||||
.detail("CheckpointVersion", a.request.version)
|
||||
.detail("RocksSequenceNumber", debugCheckpointSeq)
|
||||
.detail("CheckpointDir", checkpointDir);
|
||||
} else {
|
||||
throw not_implemented();
|
||||
}
|
||||
|
||||
res.setState(CheckpointMetaData::Complete);
|
||||
a.reply.send(res);
|
||||
}
|
||||
|
||||
void RocksDBKeyValueStore::Writer::action(RestoreAction& a) {
|
||||
TraceEvent("RocksDBRestoreBegin", id).detail("Path", a.path).detail("Checkpoints", describe(a.checkpoints));
|
||||
|
||||
ASSERT(db != nullptr);
|
||||
ASSERT(!a.checkpoints.empty());
|
||||
|
||||
const CheckpointFormat format = a.checkpoints[0].getFormat();
|
||||
for (int i = 1; i < a.checkpoints.size(); ++i) {
|
||||
if (a.checkpoints[i].getFormat() != format) {
|
||||
throw invalid_checkpoint_format();
|
||||
}
|
||||
}
|
||||
|
||||
rocksdb::Status status;
|
||||
if (format == RocksDBColumnFamily) {
|
||||
ASSERT_EQ(a.checkpoints.size(), 1);
|
||||
TraceEvent("RocksDBServeRestoreCF", id)
|
||||
.detail("Path", a.path)
|
||||
.detail("Checkpoint", a.checkpoints[0].toString())
|
||||
.detail("RocksDBCF", getRocksCF(a.checkpoints[0]).toString());
|
||||
|
||||
if (cf != nullptr) {
|
||||
ASSERT(db->DropColumnFamily(cf).ok());
|
||||
}
|
||||
|
||||
rocksdb::ExportImportFilesMetaData metaData = getMetaData(a.checkpoints[0]);
|
||||
rocksdb::ImportColumnFamilyOptions importOptions;
|
||||
importOptions.move_files = true;
|
||||
status = db->CreateColumnFamilyWithImport(
|
||||
getCFOptions(), SERVER_KNOBS->DEFAULT_FDB_ROCKSDB_COLUMN_FAMILY, importOptions, metaData, &cf);
|
||||
|
||||
if (!status.ok()) {
|
||||
logRocksDBError(status, "Restore");
|
||||
a.done.sendError(statusToError(status));
|
||||
} else {
|
||||
TraceEvent(SevInfo, "RocksDBRestoreCFSuccess")
|
||||
.detail("Path", a.path)
|
||||
.detail("Checkpoint", a.checkpoints[0].toString());
|
||||
a.done.send(Void());
|
||||
}
|
||||
} else if (format == RocksDB) {
|
||||
if (cf == nullptr) {
|
||||
status = db->CreateColumnFamily(getCFOptions(), SERVER_KNOBS->DEFAULT_FDB_ROCKSDB_COLUMN_FAMILY, &cf);
|
||||
TraceEvent("RocksDBServeRestoreRange", id)
|
||||
.detail("Path", a.path)
|
||||
.detail("Checkpoint", describe(a.checkpoints));
|
||||
if (!status.ok()) {
|
||||
logRocksDBError(status, "CreateColumnFamily");
|
||||
a.done.sendError(statusToError(status));
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<std::string> sstFiles;
|
||||
for (const auto& checkpoint : a.checkpoints) {
|
||||
const RocksDBCheckpoint rocksCheckpoint = getRocksCheckpoint(checkpoint);
|
||||
for (const auto& file : rocksCheckpoint.fetchedFiles) {
|
||||
TraceEvent("RocksDBRestoreFile", id)
|
||||
.detail("Checkpoint", rocksCheckpoint.toString())
|
||||
.detail("File", file.toString());
|
||||
sstFiles.push_back(file.path);
|
||||
}
|
||||
}
|
||||
|
||||
if (!sstFiles.empty()) {
|
||||
rocksdb::IngestExternalFileOptions ingestOptions;
|
||||
ingestOptions.move_files = true;
|
||||
ingestOptions.write_global_seqno = false;
|
||||
ingestOptions.verify_checksums_before_ingest = true;
|
||||
status = db->IngestExternalFile(cf, sstFiles, ingestOptions);
|
||||
if (!status.ok()) {
|
||||
logRocksDBError(status, "IngestExternalFile", SevWarnAlways);
|
||||
a.done.sendError(statusToError(status));
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
TraceEvent(SevDebug, "RocksDBServeRestoreEmptyRange", id)
|
||||
.detail("Path", a.path)
|
||||
.detail("Checkpoint", describe(a.checkpoints));
|
||||
}
|
||||
TraceEvent("RocksDBServeRestoreEnd", id).detail("Path", a.path).detail("Checkpoint", describe(a.checkpoints));
|
||||
a.done.send(Void());
|
||||
} else {
|
||||
throw not_implemented();
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
#endif // SSD_ROCKSDB_EXPERIMENTAL
|
||||
|
@ -2155,7 +2223,7 @@ TEST_CASE("noSim/fdbserver/KeyValueStoreRocksDB/RocksDBReopen") {
|
|||
return Void();
|
||||
}
|
||||
|
||||
TEST_CASE("noSim/fdbserver/KeyValueStoreRocksDB/CheckpointRestore") {
|
||||
TEST_CASE("noSim/fdbserver/KeyValueStoreRocksDB/CheckpointRestoreColumnFamily") {
|
||||
state std::string cwd = platform::getWorkingDirectory() + "/";
|
||||
state std::string rocksDBTestDir = "rocksdb-kvstore-br-test-db";
|
||||
platform::eraseDirectoryRecursive(rocksDBTestDir);
|
||||
|
@ -2169,6 +2237,13 @@ TEST_CASE("noSim/fdbserver/KeyValueStoreRocksDB/CheckpointRestore") {
|
|||
Optional<Value> val = wait(kvStore->readValue(LiteralStringRef("foo")));
|
||||
ASSERT(Optional<Value>(LiteralStringRef("bar")) == val);
|
||||
|
||||
state std::string rocksDBRestoreDir = "rocksdb-kvstore-br-restore-db";
|
||||
platform::eraseDirectoryRecursive(rocksDBRestoreDir);
|
||||
|
||||
state IKeyValueStore* kvStoreCopy =
|
||||
new RocksDBKeyValueStore(rocksDBRestoreDir, deterministicRandom()->randomUniqueID());
|
||||
wait(kvStoreCopy->init());
|
||||
|
||||
platform::eraseDirectoryRecursive("checkpoint");
|
||||
state std::string checkpointDir = cwd + "checkpoint";
|
||||
|
||||
|
@ -2176,12 +2251,6 @@ TEST_CASE("noSim/fdbserver/KeyValueStoreRocksDB/CheckpointRestore") {
|
|||
latestVersion, allKeys, RocksDBColumnFamily, deterministicRandom()->randomUniqueID(), checkpointDir);
|
||||
CheckpointMetaData metaData = wait(kvStore->checkpoint(request));
|
||||
|
||||
state std::string rocksDBRestoreDir = "rocksdb-kvstore-br-restore-db";
|
||||
platform::eraseDirectoryRecursive(rocksDBRestoreDir);
|
||||
|
||||
state IKeyValueStore* kvStoreCopy =
|
||||
new RocksDBKeyValueStore(rocksDBRestoreDir, deterministicRandom()->randomUniqueID());
|
||||
|
||||
std::vector<CheckpointMetaData> checkpoints;
|
||||
checkpoints.push_back(metaData);
|
||||
wait(kvStoreCopy->restore(checkpoints));
|
||||
|
@ -2202,11 +2271,52 @@ TEST_CASE("noSim/fdbserver/KeyValueStoreRocksDB/CheckpointRestore") {
|
|||
return Void();
|
||||
}
|
||||
|
||||
TEST_CASE("noSim/fdbserver/KeyValueStoreRocksDB/RocksDBTypes") {
|
||||
// If the following assertion fails, update SstFileMetaData and LiveFileMetaData in RocksDBCheckpointUtils.actor.h
|
||||
// to be the same as rocksdb::SstFileMetaData and rocksdb::LiveFileMetaData.
|
||||
ASSERT_EQ(sizeof(rocksdb::LiveFileMetaData), 184);
|
||||
ASSERT_EQ(sizeof(rocksdb::ExportImportFilesMetaData), 32);
|
||||
TEST_CASE("noSim/fdbserver/KeyValueStoreRocksDB/CheckpointRestoreKeyValues") {
|
||||
state std::string cwd = platform::getWorkingDirectory() + "/";
|
||||
state std::string rocksDBTestDir = "rocksdb-kvstore-brsst-test-db";
|
||||
platform::eraseDirectoryRecursive(rocksDBTestDir);
|
||||
state IKeyValueStore* kvStore = new RocksDBKeyValueStore(rocksDBTestDir, deterministicRandom()->randomUniqueID());
|
||||
wait(kvStore->init());
|
||||
|
||||
kvStore->set({ LiteralStringRef("foo"), LiteralStringRef("bar") });
|
||||
wait(kvStore->commit(false));
|
||||
Optional<Value> val = wait(kvStore->readValue(LiteralStringRef("foo")));
|
||||
ASSERT(Optional<Value>(LiteralStringRef("bar")) == val);
|
||||
|
||||
platform::eraseDirectoryRecursive("checkpoint");
|
||||
std::string checkpointDir = cwd + "checkpoint";
|
||||
|
||||
CheckpointRequest request(latestVersion, allKeys, RocksDB, deterministicRandom()->randomUniqueID(), checkpointDir);
|
||||
CheckpointMetaData metaData = wait(kvStore->checkpoint(request));
|
||||
|
||||
state ICheckpointReader* cpReader = newCheckpointReader(metaData, deterministicRandom()->randomUniqueID());
|
||||
wait(cpReader->init(BinaryWriter::toValue(KeyRangeRef("foo"_sr, "foobar"_sr), IncludeVersion())));
|
||||
loop {
|
||||
try {
|
||||
state RangeResult res =
|
||||
wait(cpReader->nextKeyValues(CLIENT_KNOBS->REPLY_BYTE_LIMIT, CLIENT_KNOBS->REPLY_BYTE_LIMIT));
|
||||
state int i = 0;
|
||||
for (; i < res.size(); ++i) {
|
||||
Optional<Value> val = wait(kvStore->readValue(res[i].key));
|
||||
ASSERT(val.present() && val.get() == res[i].value);
|
||||
}
|
||||
} catch (Error& e) {
|
||||
if (e.code() == error_code_end_of_stream) {
|
||||
break;
|
||||
} else {
|
||||
TraceEvent(SevError, "TestFailed").error(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<Future<Void>> closes;
|
||||
closes.push_back(cpReader->close());
|
||||
closes.push_back(kvStore->onClosed());
|
||||
kvStore->close();
|
||||
wait(waitForAll(closes));
|
||||
|
||||
platform::eraseDirectoryRecursive(rocksDBTestDir);
|
||||
|
||||
return Void();
|
||||
}
|
||||
|
||||
|
|
|
@ -20,32 +20,394 @@
|
|||
|
||||
#include "fdbserver/RocksDBCheckpointUtils.actor.h"
|
||||
|
||||
#ifdef SSD_ROCKSDB_EXPERIMENTAL
|
||||
#include <rocksdb/db.h>
|
||||
#include <rocksdb/env.h>
|
||||
#include <rocksdb/options.h>
|
||||
#include <rocksdb/slice.h>
|
||||
#include <rocksdb/slice_transform.h>
|
||||
#include <rocksdb/types.h>
|
||||
#include <rocksdb/version.h>
|
||||
#endif // SSD_ROCKSDB_EXPERIMENTAL
|
||||
|
||||
#include "fdbclient/FDBTypes.h"
|
||||
#include "fdbclient/NativeAPI.actor.h"
|
||||
#include "fdbclient/StorageCheckpoint.h"
|
||||
#include "fdbserver/CoroFlow.h"
|
||||
#include "fdbserver/Knobs.h"
|
||||
#include "flow/IThreadPool.h"
|
||||
#include "flow/ThreadHelper.actor.h"
|
||||
#include "flow/Trace.h"
|
||||
#include "flow/flow.h"
|
||||
|
||||
#include "flow/actorcompiler.h" // has to be last include
|
||||
|
||||
#ifdef SSD_ROCKSDB_EXPERIMENTAL
|
||||
// Enforcing rocksdb version to be 6.22.1 or greater.
|
||||
static_assert(ROCKSDB_MAJOR == 6 && ROCKSDB_MINOR >= 22 && ROCKSDB_PATCH >= 1,
|
||||
"Unsupported rocksdb version. Update the rocksdb to at least 6.22.1 version");
|
||||
|
||||
namespace {
|
||||
|
||||
using DB = rocksdb::DB*;
|
||||
using CF = rocksdb::ColumnFamilyHandle*;
|
||||
|
||||
const KeyRef persistVersion = "\xff\xffVersion"_sr;
|
||||
|
||||
rocksdb::Slice toSlice(StringRef s) {
|
||||
return rocksdb::Slice(reinterpret_cast<const char*>(s.begin()), s.size());
|
||||
}
|
||||
|
||||
StringRef toStringRef(rocksdb::Slice s) {
|
||||
return StringRef(reinterpret_cast<const uint8_t*>(s.data()), s.size());
|
||||
}
|
||||
|
||||
rocksdb::ColumnFamilyOptions getCFOptions() {
|
||||
rocksdb::ColumnFamilyOptions options;
|
||||
return options;
|
||||
}
|
||||
|
||||
rocksdb::Options getOptions() {
|
||||
rocksdb::Options options({}, getCFOptions());
|
||||
options.create_if_missing = false;
|
||||
options.db_log_dir = SERVER_KNOBS->LOG_DIRECTORY;
|
||||
return options;
|
||||
}
|
||||
|
||||
// Set some useful defaults desired for all reads.
|
||||
rocksdb::ReadOptions getReadOptions() {
|
||||
rocksdb::ReadOptions options;
|
||||
options.background_purge_on_iterator_cleanup = true;
|
||||
return options;
|
||||
}
|
||||
|
||||
void logRocksDBError(const rocksdb::Status& status, const std::string& method) {
|
||||
auto level = status.IsTimedOut() ? SevWarn : SevError;
|
||||
TraceEvent e(level, "RocksDBCheckpointReaderError");
|
||||
e.detail("Error", status.ToString()).detail("Method", method).detail("RocksDBSeverity", status.severity());
|
||||
if (status.IsIOError()) {
|
||||
e.detail("SubCode", status.subcode());
|
||||
}
|
||||
}
|
||||
|
||||
Error statusToError(const rocksdb::Status& s) {
|
||||
if (s.IsIOError()) {
|
||||
return io_error();
|
||||
} else if (s.IsTimedOut()) {
|
||||
return transaction_too_old();
|
||||
} else {
|
||||
return unknown_error();
|
||||
}
|
||||
}
|
||||
|
||||
// RocksDBCheckpointReader reads a RocksDB checkpoint, and returns the key-value pairs via nextKeyValues.
|
||||
class RocksDBCheckpointReader : public ICheckpointReader {
|
||||
public:
|
||||
RocksDBCheckpointReader(const CheckpointMetaData& checkpoint, UID logID)
|
||||
RocksDBCheckpointReader(const CheckpointMetaData& checkpoint, UID logID);
|
||||
|
||||
Future<Void> init(StringRef token) override;
|
||||
|
||||
Future<RangeResult> nextKeyValues(const int rowLimit, const int byteLimit) override;
|
||||
|
||||
Future<Standalone<StringRef>> nextChunk(const int byteLimit) { throw not_implemented(); }
|
||||
|
||||
Future<Void> close() { return doClose(this); }
|
||||
|
||||
private:
|
||||
struct Reader : IThreadPoolReceiver {
|
||||
struct OpenAction : TypedAction<Reader, OpenAction> {
|
||||
OpenAction(std::string path, KeyRange range, Version version)
|
||||
: path(std::move(path)), range(range), version(version) {}
|
||||
|
||||
double getTimeEstimate() const override { return SERVER_KNOBS->COMMIT_TIME_ESTIMATE; }
|
||||
|
||||
const std::string path;
|
||||
const KeyRange range;
|
||||
const Version version;
|
||||
ThreadReturnPromise<Void> done;
|
||||
};
|
||||
|
||||
struct CloseAction : TypedAction<Reader, CloseAction> {
|
||||
CloseAction(std::string path, bool deleteOnClose) : path(path), deleteOnClose(deleteOnClose) {}
|
||||
double getTimeEstimate() const override { return SERVER_KNOBS->COMMIT_TIME_ESTIMATE; }
|
||||
|
||||
std::string path;
|
||||
bool deleteOnClose;
|
||||
ThreadReturnPromise<Void> done;
|
||||
};
|
||||
|
||||
struct ReadRangeAction : TypedAction<Reader, ReadRangeAction>, FastAllocated<ReadRangeAction> {
|
||||
ReadRangeAction(int rowLimit, int byteLimit)
|
||||
: rowLimit(rowLimit), byteLimit(byteLimit), startTime(timer_monotonic()) {}
|
||||
|
||||
double getTimeEstimate() const override { return SERVER_KNOBS->READ_RANGE_TIME_ESTIMATE; }
|
||||
|
||||
const int rowLimit, byteLimit;
|
||||
const double startTime;
|
||||
ThreadReturnPromise<RangeResult> result;
|
||||
};
|
||||
|
||||
explicit Reader(DB& db);
|
||||
~Reader() override {}
|
||||
|
||||
void init() override {}
|
||||
|
||||
void action(OpenAction& a);
|
||||
|
||||
void action(CloseAction& a);
|
||||
|
||||
void action(ReadRangeAction& a);
|
||||
|
||||
DB& db;
|
||||
CF cf;
|
||||
Key begin;
|
||||
Key end;
|
||||
double readRangeTimeout;
|
||||
std::unique_ptr<rocksdb::Iterator> cursor;
|
||||
};
|
||||
|
||||
ACTOR static Future<Void> doClose(RocksDBCheckpointReader* self);
|
||||
|
||||
DB db = nullptr;
|
||||
std::string path;
|
||||
const UID id;
|
||||
Version version;
|
||||
Reference<IThreadPool> readThreads;
|
||||
Future<Void> openFuture;
|
||||
};
|
||||
|
||||
RocksDBCheckpointReader::RocksDBCheckpointReader(const CheckpointMetaData& checkpoint, UID logID)
|
||||
: id(logID), version(checkpoint.version) {
|
||||
RocksDBCheckpoint rocksCheckpoint = getRocksCheckpoint(checkpoint);
|
||||
this->path = rocksCheckpoint.checkpointDir;
|
||||
if (g_network->isSimulated()) {
|
||||
readThreads = CoroThreadPool::createThreadPool();
|
||||
} else {
|
||||
readThreads = createGenericThreadPool();
|
||||
}
|
||||
readThreads->addThread(new Reader(db), "fdb-rocks-rd");
|
||||
}
|
||||
|
||||
Future<Void> RocksDBCheckpointReader::init(StringRef token) {
|
||||
if (openFuture.isValid()) {
|
||||
return openFuture;
|
||||
}
|
||||
|
||||
KeyRange range = BinaryReader::fromStringRef<KeyRange>(token, IncludeVersion());
|
||||
auto a = std::make_unique<Reader::OpenAction>(this->path, range, this->version);
|
||||
openFuture = a->done.getFuture();
|
||||
readThreads->post(a.release());
|
||||
return openFuture;
|
||||
}
|
||||
|
||||
Future<RangeResult> RocksDBCheckpointReader::nextKeyValues(const int rowLimit, const int byteLimit) {
|
||||
auto a = std::make_unique<Reader::ReadRangeAction>(rowLimit, byteLimit);
|
||||
auto res = a->result.getFuture();
|
||||
readThreads->post(a.release());
|
||||
return res;
|
||||
}
|
||||
|
||||
RocksDBCheckpointReader::Reader::Reader(DB& db) : db(db), cf(nullptr) {
|
||||
if (g_network->isSimulated()) {
|
||||
// In simulation, increasing the read operation timeouts to 5 minutes, as some of the tests have
|
||||
// very high load and single read thread cannot process all the load within the timeouts.
|
||||
readRangeTimeout = 5 * 60;
|
||||
} else {
|
||||
readRangeTimeout = SERVER_KNOBS->ROCKSDB_READ_RANGE_TIMEOUT;
|
||||
}
|
||||
}
|
||||
|
||||
void RocksDBCheckpointReader::Reader::action(RocksDBCheckpointReader::Reader::OpenAction& a) {
|
||||
ASSERT(cf == nullptr);
|
||||
|
||||
std::vector<std::string> columnFamilies;
|
||||
rocksdb::Options options = getOptions();
|
||||
rocksdb::Status status = rocksdb::DB::ListColumnFamilies(options, a.path, &columnFamilies);
|
||||
if (std::find(columnFamilies.begin(), columnFamilies.end(), "default") == columnFamilies.end()) {
|
||||
columnFamilies.push_back("default");
|
||||
}
|
||||
|
||||
rocksdb::ColumnFamilyOptions cfOptions = getCFOptions();
|
||||
std::vector<rocksdb::ColumnFamilyDescriptor> descriptors;
|
||||
for (const std::string& name : columnFamilies) {
|
||||
descriptors.push_back(rocksdb::ColumnFamilyDescriptor{ name, cfOptions });
|
||||
}
|
||||
|
||||
std::vector<rocksdb::ColumnFamilyHandle*> handles;
|
||||
status = rocksdb::DB::OpenForReadOnly(options, a.path, descriptors, &handles, &db);
|
||||
|
||||
if (!status.ok()) {
|
||||
logRocksDBError(status, "OpenForReadOnly");
|
||||
a.done.sendError(statusToError(status));
|
||||
return;
|
||||
}
|
||||
|
||||
for (rocksdb::ColumnFamilyHandle* handle : handles) {
|
||||
if (handle->GetName() == SERVER_KNOBS->DEFAULT_FDB_ROCKSDB_COLUMN_FAMILY) {
|
||||
cf = handle;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
ASSERT(db != nullptr && cf != nullptr);
|
||||
|
||||
begin = a.range.begin;
|
||||
end = a.range.end;
|
||||
|
||||
TraceEvent(SevInfo, "RocksDBCheckpointReaderInit")
|
||||
.detail("Path", a.path)
|
||||
.detail("Method", "OpenForReadOnly")
|
||||
.detail("ColumnFamily", cf->GetName())
|
||||
.detail("Begin", begin)
|
||||
.detail("End", end);
|
||||
|
||||
rocksdb::PinnableSlice value;
|
||||
rocksdb::ReadOptions readOptions = getReadOptions();
|
||||
status = db->Get(readOptions, cf, toSlice(persistVersion), &value);
|
||||
|
||||
if (!status.ok() && !status.IsNotFound()) {
|
||||
logRocksDBError(status, "Checkpoint");
|
||||
a.done.sendError(statusToError(status));
|
||||
return;
|
||||
}
|
||||
|
||||
const Version version =
|
||||
status.IsNotFound() ? latestVersion : BinaryReader::fromStringRef<Version>(toStringRef(value), Unversioned());
|
||||
|
||||
ASSERT(version == a.version);
|
||||
|
||||
cursor = std::unique_ptr<rocksdb::Iterator>(db->NewIterator(readOptions, cf));
|
||||
cursor->Seek(toSlice(begin));
|
||||
|
||||
a.done.send(Void());
|
||||
}
|
||||
|
||||
void RocksDBCheckpointReader::Reader::action(RocksDBCheckpointReader::Reader::CloseAction& a) {
|
||||
if (db == nullptr) {
|
||||
a.done.send(Void());
|
||||
return;
|
||||
}
|
||||
|
||||
rocksdb::Status s = db->Close();
|
||||
if (!s.ok()) {
|
||||
logRocksDBError(s, "Close");
|
||||
}
|
||||
|
||||
if (a.deleteOnClose) {
|
||||
std::set<std::string> columnFamilies{ "default" };
|
||||
columnFamilies.insert(SERVER_KNOBS->DEFAULT_FDB_ROCKSDB_COLUMN_FAMILY);
|
||||
std::vector<rocksdb::ColumnFamilyDescriptor> descriptors;
|
||||
for (const std::string& name : columnFamilies) {
|
||||
descriptors.push_back(rocksdb::ColumnFamilyDescriptor{ name, getCFOptions() });
|
||||
}
|
||||
s = rocksdb::DestroyDB(a.path, getOptions(), descriptors);
|
||||
if (!s.ok()) {
|
||||
logRocksDBError(s, "Destroy");
|
||||
} else {
|
||||
TraceEvent("RocksDBCheckpointReader").detail("Path", a.path).detail("Method", "Destroy");
|
||||
}
|
||||
}
|
||||
|
||||
TraceEvent("RocksDBCheckpointReader").detail("Path", a.path).detail("Method", "Close");
|
||||
a.done.send(Void());
|
||||
}
|
||||
|
||||
void RocksDBCheckpointReader::Reader::action(RocksDBCheckpointReader::Reader::ReadRangeAction& a) {
|
||||
const double readBeginTime = timer_monotonic();
|
||||
|
||||
if (readBeginTime - a.startTime > readRangeTimeout) {
|
||||
TraceEvent(SevWarn, "RocksDBCheckpointReaderError")
|
||||
.detail("Error", "Read range request timedout")
|
||||
.detail("Method", "ReadRangeAction")
|
||||
.detail("Timeout value", readRangeTimeout);
|
||||
a.result.sendError(timed_out());
|
||||
return;
|
||||
}
|
||||
|
||||
RangeResult result;
|
||||
if (a.rowLimit == 0 || a.byteLimit == 0) {
|
||||
a.result.send(result);
|
||||
return;
|
||||
}
|
||||
|
||||
// For now, only forward scan is supported.
|
||||
ASSERT(a.rowLimit > 0);
|
||||
|
||||
int accumulatedBytes = 0;
|
||||
rocksdb::Status s;
|
||||
while (cursor->Valid() && toStringRef(cursor->key()) < end) {
|
||||
KeyValueRef kv(toStringRef(cursor->key()), toStringRef(cursor->value()));
|
||||
accumulatedBytes += sizeof(KeyValueRef) + kv.expectedSize();
|
||||
result.push_back_deep(result.arena(), kv);
|
||||
cursor->Next();
|
||||
if (result.size() >= a.rowLimit || accumulatedBytes >= a.byteLimit) {
|
||||
break;
|
||||
}
|
||||
if (timer_monotonic() - a.startTime > readRangeTimeout) {
|
||||
TraceEvent(SevWarn, "RocksDBCheckpointReaderError")
|
||||
.detail("Error", "Read range request timedout")
|
||||
.detail("Method", "ReadRangeAction")
|
||||
.detail("Timeout value", readRangeTimeout);
|
||||
a.result.sendError(transaction_too_old());
|
||||
delete (cursor.release());
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
s = cursor->status();
|
||||
|
||||
if (!s.ok()) {
|
||||
logRocksDBError(s, "ReadRange");
|
||||
a.result.sendError(statusToError(s));
|
||||
delete (cursor.release());
|
||||
return;
|
||||
}
|
||||
|
||||
if (result.empty()) {
|
||||
delete (cursor.release());
|
||||
a.result.sendError(end_of_stream());
|
||||
} else {
|
||||
a.result.send(result);
|
||||
}
|
||||
}
|
||||
|
||||
ACTOR Future<Void> RocksDBCheckpointReader::doClose(RocksDBCheckpointReader* self) {
|
||||
if (self == nullptr)
|
||||
return Void();
|
||||
|
||||
auto a = new RocksDBCheckpointReader::Reader::CloseAction(self->path, false);
|
||||
auto f = a->done.getFuture();
|
||||
self->readThreads->post(a);
|
||||
wait(f);
|
||||
|
||||
if (self != nullptr) {
|
||||
wait(self->readThreads->stop());
|
||||
}
|
||||
|
||||
if (self != nullptr) {
|
||||
delete self;
|
||||
}
|
||||
|
||||
return Void();
|
||||
}
|
||||
|
||||
// RocksDBCFCheckpointReader reads an exported RocksDB Column Family checkpoint, and returns the serialized
|
||||
// checkpoint via nextChunk.
|
||||
class RocksDBCFCheckpointReader : public ICheckpointReader {
|
||||
public:
|
||||
RocksDBCFCheckpointReader(const CheckpointMetaData& checkpoint, UID logID)
|
||||
: checkpoint_(checkpoint), id_(logID), file_(Reference<IAsyncFile>()), offset_(0) {}
|
||||
|
||||
Future<Void> init(StringRef token) override;
|
||||
|
||||
Future<RangeResult> nextKeyValues(const int rowLimit, const int byteLimit) override { throw not_implemented(); }
|
||||
|
||||
// Returns the next chunk of serialized checkpoint.
|
||||
Future<Standalone<StringRef>> nextChunk(const int byteLimit) override;
|
||||
|
||||
Future<Void> close() override;
|
||||
|
||||
private:
|
||||
ACTOR static Future<Void> doInit(RocksDBCheckpointReader* self) {
|
||||
ACTOR static Future<Void> doInit(RocksDBCFCheckpointReader* self) {
|
||||
ASSERT(self != nullptr);
|
||||
try {
|
||||
state Reference<IAsyncFile> _file = wait(IAsyncFileSystem::filesystem()->open(
|
||||
|
@ -62,7 +424,7 @@ private:
|
|||
return Void();
|
||||
}
|
||||
|
||||
ACTOR static Future<Standalone<StringRef>> getNextChunk(RocksDBCheckpointReader* self, int byteLimit) {
|
||||
ACTOR static Future<Standalone<StringRef>> getNextChunk(RocksDBCFCheckpointReader* self, int byteLimit) {
|
||||
int blockSize = std::min(64 * 1024, byteLimit); // Block size read from disk.
|
||||
state Standalone<StringRef> buf = makeAlignedString(_PAGE_SIZE, blockSize);
|
||||
int bytesRead = wait(self->file_->read(mutateString(buf), blockSize, self->offset_));
|
||||
|
@ -74,7 +436,7 @@ private:
|
|||
return buf.substr(0, bytesRead);
|
||||
}
|
||||
|
||||
ACTOR static Future<Void> doClose(RocksDBCheckpointReader* self) {
|
||||
ACTOR static Future<Void> doClose(RocksDBCFCheckpointReader* self) {
|
||||
wait(delay(0, TaskPriority::FetchKeys));
|
||||
delete self;
|
||||
return Void();
|
||||
|
@ -87,7 +449,7 @@ private:
|
|||
std::string path_;
|
||||
};
|
||||
|
||||
Future<Void> RocksDBCheckpointReader::init(StringRef token) {
|
||||
Future<Void> RocksDBCFCheckpointReader::init(StringRef token) {
|
||||
ASSERT_EQ(this->checkpoint_.getFormat(), RocksDBColumnFamily);
|
||||
const std::string name = token.toString();
|
||||
this->offset_ = 0;
|
||||
|
@ -108,11 +470,11 @@ Future<Void> RocksDBCheckpointReader::init(StringRef token) {
|
|||
return doInit(this);
|
||||
}
|
||||
|
||||
Future<Standalone<StringRef>> RocksDBCheckpointReader::nextChunk(const int byteLimit) {
|
||||
Future<Standalone<StringRef>> RocksDBCFCheckpointReader::nextChunk(const int byteLimit) {
|
||||
return getNextChunk(this, byteLimit);
|
||||
}
|
||||
|
||||
Future<Void> RocksDBCheckpointReader::close() {
|
||||
Future<Void> RocksDBCFCheckpointReader::close() {
|
||||
return doClose(this);
|
||||
}
|
||||
|
||||
|
@ -216,13 +578,163 @@ ACTOR Future<Void> fetchCheckpointFile(Database cx,
|
|||
}
|
||||
}
|
||||
|
||||
// TODO: Return when a file exceeds a limit.
|
||||
ACTOR Future<Void> fetchCheckpointRange(Database cx,
|
||||
std::shared_ptr<CheckpointMetaData> metaData,
|
||||
KeyRange range,
|
||||
std::string dir,
|
||||
std::shared_ptr<rocksdb::SstFileWriter> writer,
|
||||
std::function<Future<Void>(const CheckpointMetaData&)> cFun,
|
||||
int maxRetries = 3) {
|
||||
state std::string localFile = dir + "/" + metaData->checkpointID.toString() + ".sst";
|
||||
RocksDBCheckpoint rcp = getRocksCheckpoint(*metaData);
|
||||
TraceEvent("FetchCheckpointRange")
|
||||
.detail("InitialState", metaData->toString())
|
||||
.detail("RocksCheckpoint", rcp.toString());
|
||||
|
||||
for (const auto& file : rcp.fetchedFiles) {
|
||||
ASSERT(!file.range.intersects(range));
|
||||
}
|
||||
|
||||
state UID ssID = metaData->ssID;
|
||||
state Transaction tr(cx);
|
||||
state StorageServerInterface ssi;
|
||||
loop {
|
||||
tr.setOption(FDBTransactionOptions::LOCK_AWARE);
|
||||
tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||
try {
|
||||
Optional<Value> ss = wait(tr.get(serverListKeyFor(ssID)));
|
||||
if (!ss.present()) {
|
||||
TraceEvent(SevWarnAlways, "FetchCheckpointRangeStorageServerNotFound")
|
||||
.detail("SSID", ssID)
|
||||
.detail("InitialState", metaData->toString());
|
||||
throw checkpoint_not_found();
|
||||
}
|
||||
ssi = decodeServerListValue(ss.get());
|
||||
break;
|
||||
} catch (Error& e) {
|
||||
wait(tr.onError(e));
|
||||
}
|
||||
}
|
||||
|
||||
ASSERT(ssi.id() == ssID);
|
||||
|
||||
state int attempt = 0;
|
||||
state int64_t totalBytes = 0;
|
||||
state rocksdb::Status status;
|
||||
state Optional<Error> error;
|
||||
loop {
|
||||
totalBytes = 0;
|
||||
++attempt;
|
||||
try {
|
||||
TraceEvent(SevInfo, "FetchCheckpointRangeBegin")
|
||||
.detail("CheckpointID", metaData->checkpointID)
|
||||
.detail("Range", range.toString())
|
||||
.detail("TargetStorageServerUID", ssID)
|
||||
.detail("LocalFile", localFile)
|
||||
.detail("Attempt", attempt)
|
||||
.log();
|
||||
|
||||
wait(IAsyncFileSystem::filesystem()->deleteFile(localFile, true));
|
||||
status = writer->Open(localFile);
|
||||
if (!status.ok()) {
|
||||
Error e = statusToError(status);
|
||||
TraceEvent(SevError, "FetchCheckpointRangeOpenFileError")
|
||||
.detail("LocalFile", localFile)
|
||||
.detail("Status", status.ToString());
|
||||
throw e;
|
||||
}
|
||||
|
||||
state ReplyPromiseStream<FetchCheckpointKeyValuesStreamReply> stream =
|
||||
ssi.fetchCheckpointKeyValues.getReplyStream(
|
||||
FetchCheckpointKeyValuesRequest(metaData->checkpointID, range));
|
||||
TraceEvent(SevDebug, "FetchCheckpointKeyValuesReceivingData")
|
||||
.detail("CheckpointID", metaData->checkpointID)
|
||||
.detail("Range", range.toString())
|
||||
.detail("TargetStorageServerUID", ssID.toString())
|
||||
.detail("LocalFile", localFile)
|
||||
.detail("Attempt", attempt)
|
||||
.log();
|
||||
|
||||
loop {
|
||||
FetchCheckpointKeyValuesStreamReply rep = waitNext(stream.getFuture());
|
||||
for (int i = 0; i < rep.data.size(); ++i) {
|
||||
status = writer->Put(toSlice(rep.data[i].key), toSlice(rep.data[i].value));
|
||||
if (!status.ok()) {
|
||||
Error e = statusToError(status);
|
||||
TraceEvent(SevError, "FetchCheckpointRangeWriteError")
|
||||
.detail("LocalFile", localFile)
|
||||
.detail("Key", rep.data[i].key.toString())
|
||||
.detail("Value", rep.data[i].value.toString())
|
||||
.detail("Status", status.ToString());
|
||||
throw e;
|
||||
}
|
||||
totalBytes += rep.data[i].expectedSize();
|
||||
}
|
||||
}
|
||||
} catch (Error& e) {
|
||||
Error err = e;
|
||||
if (totalBytes > 0) {
|
||||
status = writer->Finish();
|
||||
if (!status.ok()) {
|
||||
err = statusToError(status);
|
||||
}
|
||||
}
|
||||
if (err.code() != error_code_end_of_stream) {
|
||||
TraceEvent(SevWarn, "FetchCheckpointFileError")
|
||||
.errorUnsuppressed(err)
|
||||
.detail("CheckpointID", metaData->checkpointID)
|
||||
.detail("Range", range.toString())
|
||||
.detail("TargetStorageServerUID", ssID.toString())
|
||||
.detail("LocalFile", localFile)
|
||||
.detail("Attempt", attempt);
|
||||
if (attempt >= maxRetries) {
|
||||
error = err;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
if (totalBytes > 0) {
|
||||
RocksDBCheckpoint rcp = getRocksCheckpoint(*metaData);
|
||||
rcp.fetchedFiles.emplace_back(localFile, range, totalBytes);
|
||||
rcp.checkpointDir = dir;
|
||||
metaData->serializedCheckpoint = ObjectWriter::toValue(rcp, IncludeVersion());
|
||||
}
|
||||
if (!fileExists(localFile)) {
|
||||
TraceEvent(SevWarn, "FetchCheckpointRangeEndFileNotFound")
|
||||
.detail("CheckpointID", metaData->checkpointID)
|
||||
.detail("Range", range.toString())
|
||||
.detail("TargetStorageServerUID", ssID.toString())
|
||||
.detail("LocalFile", localFile)
|
||||
.detail("Attempt", attempt)
|
||||
.detail("TotalBytes", totalBytes);
|
||||
} else {
|
||||
TraceEvent(SevInfo, "FetchCheckpointRangeEnd")
|
||||
.detail("CheckpointID", metaData->checkpointID)
|
||||
.detail("Range", range.toString())
|
||||
.detail("TargetStorageServerUID", ssID.toString())
|
||||
.detail("LocalFile", localFile)
|
||||
.detail("Attempt", attempt)
|
||||
.detail("TotalBytes", totalBytes);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (error.present()) {
|
||||
throw error.get();
|
||||
}
|
||||
|
||||
return Void();
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
ACTOR Future<CheckpointMetaData> fetchRocksDBCheckpoint(Database cx,
|
||||
CheckpointMetaData initialState,
|
||||
std::string dir,
|
||||
std::function<Future<Void>(const CheckpointMetaData&)> cFun) {
|
||||
TraceEvent("FetchRocksCheckpointBegin")
|
||||
TraceEvent(SevInfo, "FetchRocksCheckpointBegin")
|
||||
.detail("InitialState", initialState.toString())
|
||||
.detail("CheckpointDir", dir);
|
||||
|
||||
|
@ -230,50 +742,101 @@ ACTOR Future<CheckpointMetaData> fetchRocksDBCheckpoint(Database cx,
|
|||
|
||||
if (metaData->format == RocksDBColumnFamily) {
|
||||
state RocksDBColumnFamilyCheckpoint rocksCF = getRocksCF(initialState);
|
||||
TraceEvent("RocksDBCheckpointMetaData").detail("RocksCF", rocksCF.toString());
|
||||
TraceEvent(SevDebug, "RocksDBCheckpointMetaData").detail("RocksCF", rocksCF.toString());
|
||||
|
||||
state int i = 0;
|
||||
state std::vector<Future<Void>> fs;
|
||||
for (; i < rocksCF.sstFiles.size(); ++i) {
|
||||
fs.push_back(fetchCheckpointFile(cx, metaData, i, dir, cFun));
|
||||
TraceEvent("GetCheckpointFetchingFile")
|
||||
TraceEvent(SevDebug, "GetCheckpointFetchingFile")
|
||||
.detail("FileName", rocksCF.sstFiles[i].name)
|
||||
.detail("Server", metaData->ssID.toString());
|
||||
}
|
||||
wait(waitForAll(fs));
|
||||
} else {
|
||||
throw not_implemented();
|
||||
} else if (metaData->format == RocksDB) {
|
||||
std::shared_ptr<rocksdb::SstFileWriter> writer =
|
||||
std::make_shared<rocksdb::SstFileWriter>(rocksdb::EnvOptions(), rocksdb::Options());
|
||||
wait(fetchCheckpointRange(cx, metaData, metaData->range, dir, writer, cFun));
|
||||
}
|
||||
|
||||
return *metaData;
|
||||
}
|
||||
|
||||
ACTOR Future<Void> deleteRocksCFCheckpoint(CheckpointMetaData checkpoint) {
|
||||
ASSERT_EQ(checkpoint.getFormat(), RocksDBColumnFamily);
|
||||
RocksDBColumnFamilyCheckpoint rocksCF = getRocksCF(checkpoint);
|
||||
TraceEvent("DeleteRocksColumnFamilyCheckpoint", checkpoint.checkpointID)
|
||||
.detail("CheckpointID", checkpoint.checkpointID)
|
||||
.detail("RocksCF", rocksCF.toString());
|
||||
|
||||
ACTOR Future<Void> deleteRocksCheckpoint(CheckpointMetaData checkpoint) {
|
||||
state CheckpointFormat format = checkpoint.getFormat();
|
||||
state std::unordered_set<std::string> dirs;
|
||||
for (const LiveFileMetaData& file : rocksCF.sstFiles) {
|
||||
dirs.insert(file.db_path);
|
||||
if (format == RocksDBColumnFamily) {
|
||||
RocksDBColumnFamilyCheckpoint rocksCF = getRocksCF(checkpoint);
|
||||
TraceEvent(SevInfo, "DeleteRocksColumnFamilyCheckpoint", checkpoint.checkpointID)
|
||||
.detail("CheckpointID", checkpoint.checkpointID)
|
||||
.detail("RocksCF", rocksCF.toString());
|
||||
|
||||
for (const LiveFileMetaData& file : rocksCF.sstFiles) {
|
||||
dirs.insert(file.db_path);
|
||||
}
|
||||
} else if (format == RocksDB) {
|
||||
RocksDBCheckpoint rocksCheckpoint = getRocksCheckpoint(checkpoint);
|
||||
TraceEvent(SevInfo, "DeleteRocksCheckpoint", checkpoint.checkpointID)
|
||||
.detail("CheckpointID", checkpoint.checkpointID)
|
||||
.detail("RocksCheckpoint", rocksCheckpoint.toString());
|
||||
dirs.insert(rocksCheckpoint.checkpointDir);
|
||||
} else {
|
||||
ASSERT(false);
|
||||
}
|
||||
|
||||
state std::unordered_set<std::string>::iterator it = dirs.begin();
|
||||
for (; it != dirs.end(); ++it) {
|
||||
const std::string dir = *it;
|
||||
platform::eraseDirectoryRecursive(dir);
|
||||
TraceEvent("DeleteCheckpointRemovedDir", checkpoint.checkpointID)
|
||||
TraceEvent(SevInfo, "DeleteCheckpointRemovedDir", checkpoint.checkpointID)
|
||||
.detail("CheckpointID", checkpoint.checkpointID)
|
||||
.detail("Dir", dir);
|
||||
wait(delay(0, TaskPriority::FetchKeys));
|
||||
}
|
||||
|
||||
return Void();
|
||||
}
|
||||
#else
|
||||
ACTOR Future<CheckpointMetaData> fetchRocksDBCheckpoint(Database cx,
|
||||
CheckpointMetaData initialState,
|
||||
std::string dir,
|
||||
std::function<Future<Void>(const CheckpointMetaData&)> cFun) {
|
||||
wait(delay(0));
|
||||
return initialState;
|
||||
}
|
||||
|
||||
ACTOR Future<Void> deleteRocksCheckpoint(CheckpointMetaData checkpoint) {
|
||||
wait(delay(0));
|
||||
return Void();
|
||||
}
|
||||
#endif // SSD_ROCKSDB_EXPERIMENTAL
|
||||
|
||||
int64_t getTotalFetchedBytes(const std::vector<CheckpointMetaData>& checkpoints) {
|
||||
int64_t totalBytes = 0;
|
||||
for (const auto& checkpoint : checkpoints) {
|
||||
const CheckpointFormat format = checkpoint.getFormat();
|
||||
if (format == RocksDBColumnFamily) {
|
||||
// TODO: Returns the checkpoint size of a RocksDB Column Family.
|
||||
} else if (format == RocksDB) {
|
||||
auto rcp = getRocksCheckpoint(checkpoint);
|
||||
for (const auto& file : rcp.fetchedFiles) {
|
||||
totalBytes += file.size;
|
||||
}
|
||||
}
|
||||
}
|
||||
return totalBytes;
|
||||
}
|
||||
|
||||
ICheckpointReader* newRocksDBCheckpointReader(const CheckpointMetaData& checkpoint, UID logID) {
|
||||
return new RocksDBCheckpointReader(checkpoint, logID);
|
||||
#ifdef SSD_ROCKSDB_EXPERIMENTAL
|
||||
const CheckpointFormat format = checkpoint.getFormat();
|
||||
if (format == RocksDBColumnFamily) {
|
||||
return new RocksDBCFCheckpointReader(checkpoint, logID);
|
||||
} else if (format == RocksDB) {
|
||||
return new RocksDBCheckpointReader(checkpoint, logID);
|
||||
}
|
||||
#endif // SSD_ROCKSDB_EXPERIMENTAL
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
RocksDBColumnFamilyCheckpoint getRocksCF(const CheckpointMetaData& checkpoint) {
|
||||
|
@ -281,4 +844,11 @@ RocksDBColumnFamilyCheckpoint getRocksCF(const CheckpointMetaData& checkpoint) {
|
|||
ObjectReader reader(checkpoint.serializedCheckpoint.begin(), IncludeVersion());
|
||||
reader.deserialize(rocksCF);
|
||||
return rocksCF;
|
||||
}
|
||||
|
||||
RocksDBCheckpoint getRocksCheckpoint(const CheckpointMetaData& checkpoint) {
|
||||
RocksDBCheckpoint rocksCheckpoint;
|
||||
ObjectReader reader(checkpoint.serializedCheckpoint.begin(), IncludeVersion());
|
||||
reader.deserialize(rocksCheckpoint);
|
||||
return rocksCheckpoint;
|
||||
}
|
|
@ -31,6 +31,26 @@
|
|||
|
||||
#include "flow/actorcompiler.h" // has to be last include
|
||||
|
||||
struct CheckpointFile {
|
||||
constexpr static FileIdentifier file_identifier = 13804348;
|
||||
std::string path;
|
||||
KeyRange range;
|
||||
int64_t size; // Logical bytes of the checkpoint.
|
||||
|
||||
CheckpointFile() = default;
|
||||
CheckpointFile(std::string path, KeyRange range, int64_t size) : path(path), range(range), size(size) {}
|
||||
|
||||
std::string toString() const {
|
||||
return "CheckpointFile:\nFile Name: " + this->path + "\nRange: " + range.toString() +
|
||||
"\nSize: " + std::to_string(size) + "\n";
|
||||
}
|
||||
|
||||
template <class Ar>
|
||||
void serialize(Ar& ar) {
|
||||
serializer(ar, path, range, size);
|
||||
}
|
||||
};
|
||||
|
||||
// Copied from rocksdb/metadata.h, so that we can add serializer.
|
||||
struct SstFileMetaData {
|
||||
constexpr static FileIdentifier file_identifier = 3804347;
|
||||
|
@ -193,6 +213,34 @@ struct RocksDBColumnFamilyCheckpoint {
|
|||
}
|
||||
};
|
||||
|
||||
// Checkpoint metadata associated with RocksDB format.
|
||||
// The checkpoint is created via rocksdb::CreateCheckpoint().
|
||||
struct RocksDBCheckpoint {
|
||||
constexpr static FileIdentifier file_identifier = 13804347;
|
||||
std::string checkpointDir; // Checkpoint directory on the storage server.
|
||||
std::vector<std::string> sstFiles; // All checkpoint files.
|
||||
std::vector<CheckpointFile> fetchedFiles; // Used for fetchCheckpoint, to record the progress.
|
||||
|
||||
CheckpointFormat format() const { return RocksDB; }
|
||||
|
||||
std::string toString() const {
|
||||
std::string res = "RocksDBCheckpoint:\nCheckpoint dir: " + checkpointDir + "\nFiles: ";
|
||||
for (const std::string& file : sstFiles) {
|
||||
res += (file + " ");
|
||||
}
|
||||
res += "\nFetched files:\n";
|
||||
for (const auto& file : fetchedFiles) {
|
||||
res += file.toString();
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
template <class Ar>
|
||||
void serialize(Ar& ar) {
|
||||
serializer(ar, checkpointDir, sstFiles, fetchedFiles);
|
||||
}
|
||||
};
|
||||
|
||||
// Fetch the checkpoint file(s) to local dir, the checkpoint is specified by initialState.
|
||||
// If cFun is provided, the fetch progress can be checkpointed, so that next time, the fetch process
|
||||
// can be continued, in case of crash.
|
||||
|
@ -201,9 +249,15 @@ ACTOR Future<CheckpointMetaData> fetchRocksDBCheckpoint(Database cx,
|
|||
std::string dir,
|
||||
std::function<Future<Void>(const CheckpointMetaData&)> cFun);
|
||||
|
||||
ACTOR Future<Void> deleteRocksCFCheckpoint(CheckpointMetaData checkpoint);
|
||||
// Returns the total logical bytes of all *fetched* checkpoints.
|
||||
int64_t getTotalFetchedBytes(const std::vector<CheckpointMetaData>& checkpoints);
|
||||
|
||||
// Clean up on-disk files associated with checkpoint.
|
||||
ACTOR Future<Void> deleteRocksCheckpoint(CheckpointMetaData checkpoint);
|
||||
|
||||
ICheckpointReader* newRocksDBCheckpointReader(const CheckpointMetaData& checkpoint, UID logID);
|
||||
|
||||
RocksDBColumnFamilyCheckpoint getRocksCF(const CheckpointMetaData& checkpoint);
|
||||
|
||||
RocksDBCheckpoint getRocksCheckpoint(const CheckpointMetaData& checkpoint);
|
||||
#endif
|
|
@ -24,12 +24,11 @@
|
|||
#include "flow/actorcompiler.h" // has to be last include
|
||||
|
||||
ICheckpointReader* newCheckpointReader(const CheckpointMetaData& checkpoint, UID logID) {
|
||||
if (checkpoint.getFormat() == RocksDBColumnFamily) {
|
||||
const CheckpointFormat format = checkpoint.getFormat();
|
||||
if (format == RocksDBColumnFamily || format == RocksDB) {
|
||||
return newRocksDBCheckpointReader(checkpoint, logID);
|
||||
} else if (checkpoint.getFormat() == RocksDB) {
|
||||
throw not_implemented();
|
||||
} else {
|
||||
ASSERT(false);
|
||||
throw not_implemented();
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
|
@ -37,13 +36,11 @@ ICheckpointReader* newCheckpointReader(const CheckpointMetaData& checkpoint, UID
|
|||
|
||||
ACTOR Future<Void> deleteCheckpoint(CheckpointMetaData checkpoint) {
|
||||
wait(delay(0, TaskPriority::FetchKeys));
|
||||
|
||||
if (checkpoint.getFormat() == RocksDBColumnFamily) {
|
||||
wait(deleteRocksCFCheckpoint(checkpoint));
|
||||
} else if (checkpoint.getFormat() == RocksDB) {
|
||||
throw not_implemented();
|
||||
state CheckpointFormat format = checkpoint.getFormat();
|
||||
if (format == RocksDBColumnFamily || format == RocksDB) {
|
||||
wait(deleteRocksCheckpoint(checkpoint));
|
||||
} else {
|
||||
ASSERT(false);
|
||||
throw not_implemented();
|
||||
}
|
||||
|
||||
return Void();
|
||||
|
@ -53,15 +50,29 @@ ACTOR Future<CheckpointMetaData> fetchCheckpoint(Database cx,
|
|||
CheckpointMetaData initialState,
|
||||
std::string dir,
|
||||
std::function<Future<Void>(const CheckpointMetaData&)> cFun) {
|
||||
TraceEvent("FetchCheckpointBegin", initialState.checkpointID).detail("CheckpointMetaData", initialState.toString());
|
||||
state CheckpointMetaData result;
|
||||
if (initialState.getFormat() == RocksDBColumnFamily) {
|
||||
const CheckpointFormat format = initialState.getFormat();
|
||||
if (format == RocksDBColumnFamily || format == RocksDB) {
|
||||
CheckpointMetaData _result = wait(fetchRocksDBCheckpoint(cx, initialState, dir, cFun));
|
||||
result = _result;
|
||||
} else if (initialState.getFormat() == RocksDB) {
|
||||
throw not_implemented();
|
||||
} else {
|
||||
ASSERT(false);
|
||||
throw not_implemented();
|
||||
}
|
||||
|
||||
TraceEvent("FetchCheckpointEnd", initialState.checkpointID).detail("CheckpointMetaData", result.toString());
|
||||
return result;
|
||||
}
|
||||
|
||||
ACTOR Future<std::vector<CheckpointMetaData>> fetchCheckpoints(
|
||||
Database cx,
|
||||
std::vector<CheckpointMetaData> initialStates,
|
||||
std::string dir,
|
||||
std::function<Future<Void>(const CheckpointMetaData&)> cFun) {
|
||||
std::vector<Future<CheckpointMetaData>> actors;
|
||||
for (const auto& checkpoint : initialStates) {
|
||||
actors.push_back(fetchCheckpoint(cx, checkpoint, dir, cFun));
|
||||
}
|
||||
std::vector<CheckpointMetaData> res = wait(getAll(actors));
|
||||
return res;
|
||||
}
|
|
@ -63,4 +63,10 @@ ACTOR Future<CheckpointMetaData> fetchCheckpoint(Database cx,
|
|||
CheckpointMetaData initialState,
|
||||
std::string dir,
|
||||
std::function<Future<Void>(const CheckpointMetaData&)> cFun = nullptr);
|
||||
|
||||
ACTOR Future<std::vector<CheckpointMetaData>> fetchCheckpoints(
|
||||
Database cx,
|
||||
std::vector<CheckpointMetaData> initialStates,
|
||||
std::string dir,
|
||||
std::function<Future<Void>(const CheckpointMetaData&)> cFun = nullptr);
|
||||
#endif
|
|
@ -62,6 +62,7 @@
|
|||
#include "fdbserver/MoveKeys.actor.h"
|
||||
#include "fdbserver/MutationTracking.h"
|
||||
#include "fdbserver/RecoveryState.h"
|
||||
#include "fdbserver/RocksDBCheckpointUtils.actor.h"
|
||||
#include "fdbserver/StorageMetrics.h"
|
||||
#include "fdbserver/ServerCheckpoint.actor.h"
|
||||
#include "fdbserver/ServerDBInfo.h"
|
||||
|
@ -846,6 +847,8 @@ public:
|
|||
AsyncVar<bool> fetchKeysBudgetUsed;
|
||||
std::vector<Promise<FetchInjectionInfo*>> readyFetchKeys;
|
||||
|
||||
FlowLock serveFetchCheckpointParallelismLock;
|
||||
|
||||
int64_t instanceID;
|
||||
|
||||
Promise<Void> otherError;
|
||||
|
@ -991,6 +994,12 @@ public:
|
|||
});
|
||||
specialCounter(
|
||||
cc, "FetchChangeFeedWaiting", [self]() { return self->fetchChangeFeedParallelismLock.waiters(); });
|
||||
specialCounter(cc, "ServeFetchCheckpointActive", [self]() {
|
||||
return self->serveFetchCheckpointParallelismLock.activePermits();
|
||||
});
|
||||
specialCounter(cc, "ServeFetchCheckpointWaiting", [self]() {
|
||||
return self->serveFetchCheckpointParallelismLock.waiters();
|
||||
});
|
||||
specialCounter(cc, "QueryQueueMax", [self]() { return self->getAndResetMaxQueryQueueSize(); });
|
||||
specialCounter(cc, "BytesStored", [self]() { return self->metrics.byteSample.getEstimate(allKeys); });
|
||||
specialCounter(cc, "ActiveWatches", [self]() { return self->numWatches; });
|
||||
|
@ -1046,6 +1055,7 @@ public:
|
|||
fetchKeysParallelismLock(SERVER_KNOBS->FETCH_KEYS_PARALLELISM),
|
||||
fetchChangeFeedParallelismLock(SERVER_KNOBS->FETCH_KEYS_PARALLELISM),
|
||||
fetchKeysBytesBudget(SERVER_KNOBS->STORAGE_FETCH_BYTES), fetchKeysBudgetUsed(false),
|
||||
serveFetchCheckpointParallelismLock(SERVER_KNOBS->SERVE_FETCH_CHECKPOINT_PARALLELISM),
|
||||
instanceID(deterministicRandom()->randomUniqueID().first()), shuttingDown(false), behind(false),
|
||||
versionBehind(false), debug_inApplyUpdate(false), debug_lastValidateTime(0), lastBytesInputEBrake(0),
|
||||
lastDurableVersionEBrake(0), maxQueryQueue(0), transactionTagCounter(ssi.id()), counters(this),
|
||||
|
@ -1398,7 +1408,7 @@ void updateProcessStats(StorageServer* self) {
|
|||
#endif
|
||||
|
||||
ACTOR Future<Version> waitForVersionActor(StorageServer* data, Version version, SpanContext spanContext) {
|
||||
state Span span("SS.WaitForVersion"_loc, spanContext);
|
||||
state Span span("SS:WaitForVersion"_loc, spanContext);
|
||||
choose {
|
||||
when(wait(data->version.whenAtLeast(version))) {
|
||||
// FIXME: A bunch of these can block with or without the following delay 0.
|
||||
|
@ -1863,6 +1873,8 @@ ACTOR Future<Void> getCheckpointQ(StorageServer* self, GetCheckpointRequest req)
|
|||
|
||||
// Delete the checkpoint from disk, as well as all related presisted meta data.
|
||||
ACTOR Future<Void> deleteCheckpointQ(StorageServer* self, Version version, CheckpointMetaData checkpoint) {
|
||||
wait(delay(0, TaskPriority::Low));
|
||||
|
||||
wait(self->durableVersion.whenAtLeast(version));
|
||||
|
||||
TraceEvent("DeleteCheckpointBegin", self->thisServerID).detail("Checkpoint", checkpoint.toString());
|
||||
|
@ -1937,6 +1949,74 @@ ACTOR Future<Void> fetchCheckpointQ(StorageServer* self, FetchCheckpointRequest
|
|||
return Void();
|
||||
}
|
||||
|
||||
// Serves FetchCheckpointKeyValuesRequest, reads local checkpoint and sends it to the client over wire.
|
||||
ACTOR Future<Void> fetchCheckpointKeyValuesQ(StorageServer* self, FetchCheckpointKeyValuesRequest req) {
|
||||
wait(self->serveFetchCheckpointParallelismLock.take(TaskPriority::DefaultYield));
|
||||
state FlowLock::Releaser holder(self->serveFetchCheckpointParallelismLock);
|
||||
|
||||
TraceEvent("ServeFetchCheckpointKeyValuesBegin", self->thisServerID)
|
||||
.detail("CheckpointID", req.checkpointID)
|
||||
.detail("Range", req.range);
|
||||
|
||||
req.reply.setByteLimit(SERVER_KNOBS->CHECKPOINT_TRANSFER_BLOCK_BYTES);
|
||||
|
||||
// Returns error if the checkpoint cannot be found.
|
||||
const auto it = self->checkpoints.find(req.checkpointID);
|
||||
if (it == self->checkpoints.end()) {
|
||||
req.reply.sendError(checkpoint_not_found());
|
||||
TraceEvent("ServeFetchCheckpointNotFound", self->thisServerID).detail("CheckpointID", req.checkpointID);
|
||||
return Void();
|
||||
}
|
||||
|
||||
try {
|
||||
state ICheckpointReader* reader = newCheckpointReader(it->second, self->thisServerID);
|
||||
wait(reader->init(BinaryWriter::toValue(req.range, IncludeVersion())));
|
||||
|
||||
loop {
|
||||
state RangeResult res =
|
||||
wait(reader->nextKeyValues(CLIENT_KNOBS->REPLY_BYTE_LIMIT, CLIENT_KNOBS->REPLY_BYTE_LIMIT));
|
||||
if (!res.empty()) {
|
||||
TraceEvent(SevDebug, "FetchCheckpontKeyValuesReadRange", self->thisServerID)
|
||||
.detail("CheckpointID", req.checkpointID)
|
||||
.detail("FirstReturnedKey", res.front().key)
|
||||
.detail("LastReturnedKey", res.back().key)
|
||||
.detail("Size", res.size());
|
||||
} else {
|
||||
TraceEvent(SevInfo, "FetchCheckpontKeyValuesEmptyRange", self->thisServerID)
|
||||
.detail("CheckpointID", req.checkpointID);
|
||||
}
|
||||
|
||||
wait(req.reply.onReady());
|
||||
FetchCheckpointKeyValuesStreamReply reply;
|
||||
reply.arena.dependsOn(res.arena());
|
||||
for (int i = 0; i < res.size(); ++i) {
|
||||
reply.data.push_back(reply.arena, res[i]);
|
||||
}
|
||||
|
||||
req.reply.send(reply);
|
||||
}
|
||||
} catch (Error& e) {
|
||||
if (e.code() == error_code_end_of_stream) {
|
||||
req.reply.sendError(end_of_stream());
|
||||
TraceEvent("ServeFetchCheckpointKeyValuesEnd", self->thisServerID)
|
||||
.detail("CheckpointID", req.checkpointID)
|
||||
.detail("Range", req.range);
|
||||
} else {
|
||||
TraceEvent(SevWarnAlways, "ServerFetchCheckpointKeyValuesFailure")
|
||||
.errorUnsuppressed(e)
|
||||
.detail("CheckpointID", req.checkpointID)
|
||||
.detail("Range", req.range);
|
||||
if (!canReplyWith(e)) {
|
||||
throw e;
|
||||
}
|
||||
req.reply.sendError(e);
|
||||
}
|
||||
}
|
||||
|
||||
wait(reader->close());
|
||||
return Void();
|
||||
}
|
||||
|
||||
ACTOR Future<Void> overlappingChangeFeedsQ(StorageServer* data, OverlappingChangeFeedsRequest req) {
|
||||
wait(delay(0));
|
||||
wait(data->version.whenAtLeast(req.minVersion));
|
||||
|
@ -3769,8 +3849,8 @@ ACTOR Future<GetMappedKeyValuesReply> mapKeyValues(StorageServer* data,
|
|||
// Use the mappedKey as the prefix of the range query.
|
||||
GetRangeReqAndResultRef getRange =
|
||||
wait(quickGetKeyValues(data, mappedKey, input.version, &(result.arena), pOriginalReq));
|
||||
if (!getRange.result.empty() && matchIndex == MATCH_INDEX_MATCHED_ONLY ||
|
||||
getRange.result.empty() && matchIndex == MATCH_INDEX_UNMATCHED_ONLY) {
|
||||
if ((!getRange.result.empty() && matchIndex == MATCH_INDEX_MATCHED_ONLY) ||
|
||||
(getRange.result.empty() && matchIndex == MATCH_INDEX_UNMATCHED_ONLY)) {
|
||||
kvm.key = it->key;
|
||||
kvm.value = it->value;
|
||||
}
|
||||
|
@ -5331,13 +5411,12 @@ ACTOR Future<Version> fetchChangeFeed(StorageServer* data,
|
|||
}
|
||||
}
|
||||
|
||||
// TODO REMOVE
|
||||
fmt::print("DBG: SS {} Feed {} possibly destroyed {}, {} metadata create, {} desired committed\n",
|
||||
/*fmt::print("DBG: SS {} Feed {} possibly destroyed {}, {} metadata create, {} desired committed\n",
|
||||
data->thisServerID.toString().substr(0, 4),
|
||||
changeFeedInfo->id.printable(),
|
||||
changeFeedInfo->possiblyDestroyed,
|
||||
changeFeedInfo->metadataCreateVersion,
|
||||
data->desiredOldestVersion.get());
|
||||
data->desiredOldestVersion.get());*/
|
||||
|
||||
// There are two reasons for change_feed_not_registered:
|
||||
// 1. The feed was just created, but the ss mutation stream is ahead of the GRV that fetchChangeFeedApplier
|
||||
|
@ -5354,9 +5433,8 @@ ACTOR Future<Version> fetchChangeFeed(StorageServer* data,
|
|||
.detail("Range", changeFeedInfo->range.toString())
|
||||
.detail("Version", cleanupVersion);
|
||||
|
||||
if (g_network->isSimulated()) {
|
||||
ASSERT(allDestroyedChangeFeeds.count(changeFeedInfo->id));
|
||||
}
|
||||
// FIXME: do simulated validation that feed was destroyed, but needs to be added when client starts
|
||||
// destroying a change feed instead of when server recieves private mutation for it
|
||||
|
||||
Key beginClearKey = changeFeedInfo->id.withPrefix(persistChangeFeedKeys.begin);
|
||||
|
||||
|
@ -5577,11 +5655,9 @@ ACTOR Future<std::vector<Key>> fetchChangeFeedMetadata(StorageServer* data,
|
|||
continue;
|
||||
}
|
||||
|
||||
// TODO REMOVE print
|
||||
fmt::print("DBG: SS {} fetching feed {} was refreshed but not present!! assuming destroyed\n",
|
||||
/*fmt::print("DBG: SS {} fetching feed {} was refreshed but not present!! assuming destroyed\n",
|
||||
data->thisServerID.toString().substr(0, 4),
|
||||
feedId.printable());
|
||||
|
||||
feedId.printable());*/
|
||||
Version cleanupVersion = data->data().getLatestVersion();
|
||||
|
||||
TraceEvent(SevDebug, "DestroyingChangeFeedFromFetchMetadata", data->thisServerID)
|
||||
|
@ -7399,12 +7475,14 @@ ACTOR Future<Void> createCheckpoint(StorageServer* data, CheckpointMetaData meta
|
|||
data->storage.clearRange(singleKeyRange(pendingCheckpointKey));
|
||||
data->storage.writeKeyValue(KeyValueRef(persistCheckpointKey, checkpointValue(checkpointResult)));
|
||||
wait(data->storage.commit());
|
||||
TraceEvent("StorageCreateCheckpointPersisted", data->thisServerID)
|
||||
.detail("Checkpoint", checkpointResult.toString());
|
||||
} catch (Error& e) {
|
||||
// If the checkpoint meta data is not persisted successfully, remove the checkpoint.
|
||||
TraceEvent("StorageCreateCheckpointPersistFailure", data->thisServerID)
|
||||
TraceEvent(SevWarn, "StorageCreateCheckpointPersistFailure", data->thisServerID)
|
||||
.errorUnsuppressed(e)
|
||||
.detail("Checkpoint", checkpointResult.toString());
|
||||
data->checkpoints.erase(checkpointResult.checkpointID);
|
||||
data->checkpoints[checkpointResult.checkpointID].setState(CheckpointMetaData::Deleting);
|
||||
data->actors.add(deleteCheckpointQ(data, metaData.version, checkpointResult));
|
||||
}
|
||||
|
||||
|
@ -7453,6 +7531,7 @@ ACTOR Future<Void> updateStorage(StorageServer* data) {
|
|||
if (cVer <= desiredVersion) {
|
||||
TraceEvent("CheckpointVersionSatisfied", data->thisServerID)
|
||||
.detail("DesiredVersion", desiredVersion)
|
||||
.detail("DurableVersion", data->durableVersion.get())
|
||||
.detail("CheckPointVersion", cVer);
|
||||
desiredVersion = cVer;
|
||||
requireCheckpoint = true;
|
||||
|
@ -7553,13 +7632,29 @@ ACTOR Future<Void> updateStorage(StorageServer* data) {
|
|||
debug_advanceMinCommittedVersion(data->thisServerID, newOldestVersion);
|
||||
|
||||
if (requireCheckpoint) {
|
||||
ASSERT(newOldestVersion == data->pendingCheckpoints.begin()->first);
|
||||
std::vector<Future<Void>> createCheckpoints;
|
||||
for (int idx = 0; idx < data->pendingCheckpoints.begin()->second.size(); ++idx) {
|
||||
createCheckpoints.push_back(createCheckpoint(data, data->pendingCheckpoints.begin()->second[idx]));
|
||||
// `pendingCheckpoints` is a queue of checkpoint requests ordered by their versoins, and
|
||||
// `newOldestVersion` is chosen such that it is no larger than the smallest pending checkpoing
|
||||
// version. When the exact desired checkpoint version is committed, updateStorage() is blocked
|
||||
// and a checkpoint will be created at that version from the underlying storage engine.
|
||||
// Note a pending checkpoint is only dequeued after the corresponding checkpoint is created
|
||||
// successfully.
|
||||
TraceEvent(SevDebug, "CheckpointVersionDurable", data->thisServerID)
|
||||
.detail("NewDurableVersion", newOldestVersion)
|
||||
.detail("DesiredVersion", desiredVersion)
|
||||
.detail("SmallestCheckPointVersion", data->pendingCheckpoints.begin()->first);
|
||||
// newOldestVersion could be smaller than the desired version due to byte limit.
|
||||
ASSERT(newOldestVersion <= data->pendingCheckpoints.begin()->first);
|
||||
if (newOldestVersion == data->pendingCheckpoints.begin()->first) {
|
||||
std::vector<Future<Void>> createCheckpoints;
|
||||
// TODO: Combine these checkpoints if necessary.
|
||||
for (int idx = 0; idx < data->pendingCheckpoints.begin()->second.size(); ++idx) {
|
||||
createCheckpoints.push_back(createCheckpoint(data, data->pendingCheckpoints.begin()->second[idx]));
|
||||
}
|
||||
wait(waitForAll(createCheckpoints));
|
||||
// Erase the pending checkpoint after the checkpoint has been created successfully.
|
||||
ASSERT(newOldestVersion == data->pendingCheckpoints.begin()->first);
|
||||
data->pendingCheckpoints.erase(data->pendingCheckpoints.begin());
|
||||
}
|
||||
wait(waitForAll(createCheckpoints));
|
||||
data->pendingCheckpoints.erase(data->pendingCheckpoints.begin());
|
||||
requireCheckpoint = false;
|
||||
}
|
||||
|
||||
|
@ -8926,6 +9021,9 @@ ACTOR Future<Void> storageServerCore(StorageServer* self, StorageServerInterface
|
|||
when(FetchCheckpointRequest req = waitNext(ssi.fetchCheckpoint.getFuture())) {
|
||||
self->actors.add(fetchCheckpointQ(self, req));
|
||||
}
|
||||
when(FetchCheckpointKeyValuesRequest req = waitNext(ssi.fetchCheckpointKeyValues.getFuture())) {
|
||||
self->actors.add(fetchCheckpointKeyValuesQ(self, req));
|
||||
}
|
||||
when(wait(updateProcessStatsTimer)) {
|
||||
updateProcessStats(self);
|
||||
updateProcessStatsTimer = delay(SERVER_KNOBS->FASTRESTORE_UPDATE_PROCESS_STATS_INTERVAL);
|
||||
|
@ -9370,6 +9468,10 @@ ACTOR Future<Void> storageServer(IKeyValueStore* persistentData,
|
|||
|
||||
throw internal_error();
|
||||
} catch (Error& e) {
|
||||
if (self.byteSampleRecovery.isValid()) {
|
||||
self.byteSampleRecovery.cancel();
|
||||
}
|
||||
|
||||
if (recovered.canBeSet())
|
||||
recovered.send(Void());
|
||||
|
||||
|
|
|
@ -149,33 +149,26 @@ struct GetMappedRangeWorkload : ApiWorkload {
|
|||
const MappedKeyValueRef* it,
|
||||
GetMappedRangeWorkload* self,
|
||||
int matchIndex,
|
||||
bool isBoundary) {
|
||||
bool isBoundary,
|
||||
bool allMissing) {
|
||||
// std::cout << "validateRecord expectedId " << expectedId << " it->key " << printable(it->key) << "
|
||||
// indexEntryKey(expectedId) " << printable(indexEntryKey(expectedId)) << std::endl;
|
||||
if (matchIndex == MATCH_INDEX_ALL || isBoundary) {
|
||||
ASSERT(it->key == indexEntryKey(expectedId));
|
||||
} else if (matchIndex == MATCH_INDEX_MATCHED_ONLY) {
|
||||
// now we cannot generate a workload that only has partial results matched
|
||||
// thus expecting everything matched
|
||||
// TODO: create tests to generate workloads with partial secondary results present
|
||||
ASSERT(it->key == indexEntryKey(expectedId));
|
||||
ASSERT(it->key == (allMissing ? EMPTY : indexEntryKey(expectedId)));
|
||||
} else if (matchIndex == MATCH_INDEX_UNMATCHED_ONLY) {
|
||||
// now we cannot generate a workload that only has partial results matched
|
||||
// thus expecting everything NOT matched(except for the boundary asserted above)
|
||||
// TODO: create tests to generate workloads with partial secondary results present
|
||||
ASSERT(it->key == EMPTY);
|
||||
ASSERT(it->key == (allMissing ? indexEntryKey(expectedId) : EMPTY));
|
||||
} else {
|
||||
ASSERT(it->key == EMPTY);
|
||||
}
|
||||
// TODO: create tests to generate workloads with partial secondary results present
|
||||
ASSERT(it->boundaryAndExist == isBoundary);
|
||||
|
||||
ASSERT(it->value == EMPTY);
|
||||
|
||||
if (self->SPLIT_RECORDS) {
|
||||
ASSERT(std::holds_alternative<GetRangeReqAndResultRef>(it->reqAndResult));
|
||||
auto& getRange = std::get<GetRangeReqAndResultRef>(it->reqAndResult);
|
||||
auto& rangeResult = getRange.result;
|
||||
ASSERT(it->boundaryAndExist == (isBoundary && !rangeResult.empty()));
|
||||
// std::cout << "rangeResult.size()=" << rangeResult.size() << std::endl;
|
||||
// In the future, we may be able to do the continuation more efficiently by combining partial results
|
||||
// together and then validate.
|
||||
|
@ -183,17 +176,20 @@ struct GetMappedRangeWorkload : ApiWorkload {
|
|||
// Retry if the underlying request is not fully completed.
|
||||
return true;
|
||||
}
|
||||
ASSERT(rangeResult.size() == SPLIT_SIZE);
|
||||
for (int split = 0; split < SPLIT_SIZE; split++) {
|
||||
auto& kv = rangeResult[split];
|
||||
// std::cout << "kv.key=" << printable(kv.key)
|
||||
// << ", recordKey(id, split)=" << printable(recordKey(id, split)) <<
|
||||
// std::endl; std::cout << "kv.value=" << printable(kv.value)
|
||||
// << ", recordValue(id, split)=" << printable(recordValue(id, split)) <<
|
||||
// std::endl;
|
||||
ASSERT(kv.key == recordKey(expectedId, split));
|
||||
ASSERT(kv.value == recordValue(expectedId, split));
|
||||
if (!allMissing) {
|
||||
ASSERT(rangeResult.size() == SPLIT_SIZE);
|
||||
for (int split = 0; split < SPLIT_SIZE; split++) {
|
||||
auto& kv = rangeResult[split];
|
||||
// std::cout << "kv.key=" << printable(kv.key)
|
||||
// << ", recordKey(id, split)=" << printable(recordKey(id, split)) <<
|
||||
// std::endl; std::cout << "kv.value=" << printable(kv.value)
|
||||
// << ", recordValue(id, split)=" << printable(recordValue(id,split)) <<
|
||||
// std::endl;
|
||||
ASSERT(kv.key == recordKey(expectedId, split));
|
||||
ASSERT(kv.value == recordValue(expectedId, split));
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
ASSERT(std::holds_alternative<GetValueReqAndResultRef>(it->reqAndResult));
|
||||
auto& getValue = std::get<GetValueReqAndResultRef>(it->reqAndResult);
|
||||
|
@ -211,7 +207,8 @@ struct GetMappedRangeWorkload : ApiWorkload {
|
|||
int limit,
|
||||
int expectedBeginId,
|
||||
GetMappedRangeWorkload* self,
|
||||
int matchIndex) {
|
||||
int matchIndex,
|
||||
bool allMissing) {
|
||||
|
||||
std::cout << "start scanMappedRangeWithLimits beginSelector:" << beginSelector.toString()
|
||||
<< " endSelector:" << endSelector.toString() << " expectedBeginId:" << expectedBeginId
|
||||
|
@ -238,7 +235,8 @@ struct GetMappedRangeWorkload : ApiWorkload {
|
|||
int cnt = 0;
|
||||
const MappedKeyValueRef* it = result.begin();
|
||||
for (; cnt < result.size(); cnt++, it++) {
|
||||
if (validateRecord(expectedId, it, self, matchIndex, cnt == 0 || cnt == result.size() - 1)) {
|
||||
if (validateRecord(
|
||||
expectedId, it, self, matchIndex, cnt == 0 || cnt == result.size() - 1, allMissing)) {
|
||||
needRetry = true;
|
||||
break;
|
||||
}
|
||||
|
@ -270,7 +268,8 @@ struct GetMappedRangeWorkload : ApiWorkload {
|
|||
int endId,
|
||||
Key mapper,
|
||||
GetMappedRangeWorkload* self,
|
||||
int matchIndex) {
|
||||
int matchIndex,
|
||||
bool allMissing = false) {
|
||||
Key beginTuple = Tuple().append(prefix).append(INDEX).append(indexKey(beginId)).getDataAsStandalone();
|
||||
state KeySelector beginSelector = KeySelector(firstGreaterOrEqual(beginTuple));
|
||||
Key endTuple = Tuple().append(prefix).append(INDEX).append(indexKey(endId)).getDataAsStandalone();
|
||||
|
@ -279,7 +278,7 @@ struct GetMappedRangeWorkload : ApiWorkload {
|
|||
state int expectedBeginId = beginId;
|
||||
while (true) {
|
||||
MappedRangeResult result = wait(self->scanMappedRangeWithLimits(
|
||||
cx, beginSelector, endSelector, mapper, limit, expectedBeginId, self, matchIndex));
|
||||
cx, beginSelector, endSelector, mapper, limit, expectedBeginId, self, matchIndex, allMissing));
|
||||
expectedBeginId += result.size();
|
||||
if (result.more) {
|
||||
if (result.empty()) {
|
||||
|
@ -322,7 +321,7 @@ struct GetMappedRangeWorkload : ApiWorkload {
|
|||
int endId,
|
||||
Reference<TransactionWrapper>& tr,
|
||||
GetMappedRangeWorkload* self) {
|
||||
Key mapper = getMapper(self);
|
||||
Key mapper = getMapper(self, false);
|
||||
Key beginTuple = Tuple().append(prefix).append(INDEX).append(indexKey(beginId)).getDataAsStandalone();
|
||||
KeySelector beginSelector = KeySelector(firstGreaterOrEqual(beginTuple));
|
||||
Key endTuple = Tuple().append(prefix).append(INDEX).append(indexKey(endId)).getDataAsStandalone();
|
||||
|
@ -427,7 +426,7 @@ struct GetMappedRangeWorkload : ApiWorkload {
|
|||
std::cout << "Test configuration: transactionType:" << self->transactionType << " snapshot:" << self->snapshot
|
||||
<< "bad_mapper:" << self->BAD_MAPPER << std::endl;
|
||||
|
||||
Key mapper = getMapper(self);
|
||||
Key mapper = getMapper(self, false);
|
||||
// The scanned range cannot be too large to hit get_mapped_key_values_has_more. We have a unit validating the
|
||||
// error is thrown when the range is large.
|
||||
const double r = deterministicRandom()->random01();
|
||||
|
@ -440,15 +439,19 @@ struct GetMappedRangeWorkload : ApiWorkload {
|
|||
matchIndex = MATCH_INDEX_UNMATCHED_ONLY;
|
||||
}
|
||||
wait(self->scanMappedRange(cx, 10, 490, mapper, self, matchIndex));
|
||||
|
||||
Key mapper = getMapper(self, true);
|
||||
wait(self->scanMappedRange(cx, 10, 490, mapper, self, MATCH_INDEX_UNMATCHED_ONLY, true));
|
||||
|
||||
return Void();
|
||||
}
|
||||
|
||||
static Key getMapper(GetMappedRangeWorkload* self) {
|
||||
static Key getMapper(GetMappedRangeWorkload* self, bool mapperForAllMissing) {
|
||||
Tuple mapperTuple;
|
||||
if (self->BAD_MAPPER) {
|
||||
mapperTuple << prefix << RECORD << "{K[xxx]}"_sr;
|
||||
} else {
|
||||
mapperTuple << prefix << RECORD << "{K[3]}"_sr;
|
||||
mapperTuple << prefix << RECORD << (mapperForAllMissing ? "{K[2]}"_sr : "{K[3]}"_sr);
|
||||
if (self->SPLIT_RECORDS) {
|
||||
mapperTuple << "{...}"_sr;
|
||||
}
|
||||
|
|
|
@ -71,18 +71,19 @@ struct SSCheckpointWorkload : TestWorkload {
|
|||
state Key key = "TestKey"_sr;
|
||||
state Key endKey = "TestKey0"_sr;
|
||||
state Value oldValue = "TestValue"_sr;
|
||||
state KeyRange testRange = KeyRangeRef(key, endKey);
|
||||
|
||||
int ignore = wait(setDDMode(cx, 0));
|
||||
state Version version = wait(self->writeAndVerify(self, cx, key, oldValue));
|
||||
|
||||
// Create checkpoint.
|
||||
state Transaction tr(cx);
|
||||
state CheckpointFormat format = RocksDBColumnFamily;
|
||||
state CheckpointFormat format = deterministicRandom()->coinflip() ? RocksDBColumnFamily : RocksDB;
|
||||
loop {
|
||||
try {
|
||||
tr.setOption(FDBTransactionOptions::LOCK_AWARE);
|
||||
tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||
wait(createCheckpoint(&tr, KeyRangeRef(key, endKey), format));
|
||||
wait(createCheckpoint(&tr, testRange, format));
|
||||
wait(tr.commit());
|
||||
version = tr.getCommittedVersion();
|
||||
break;
|
||||
|
@ -91,20 +92,18 @@ struct SSCheckpointWorkload : TestWorkload {
|
|||
}
|
||||
}
|
||||
|
||||
TraceEvent("TestCheckpointCreated")
|
||||
.detail("Range", KeyRangeRef(key, endKey).toString())
|
||||
.detail("Version", version);
|
||||
TraceEvent("TestCheckpointCreated").detail("Range", testRange).detail("Version", version);
|
||||
|
||||
// Fetch checkpoint meta data.
|
||||
loop {
|
||||
try {
|
||||
state std::vector<CheckpointMetaData> records =
|
||||
wait(getCheckpointMetaData(cx, KeyRangeRef(key, endKey), version, format));
|
||||
wait(getCheckpointMetaData(cx, testRange, version, format));
|
||||
break;
|
||||
} catch (Error& e) {
|
||||
TraceEvent("TestFetchCheckpointMetadataError")
|
||||
.errorUnsuppressed(e)
|
||||
.detail("Range", KeyRangeRef(key, endKey).toString())
|
||||
.detail("Range", testRange)
|
||||
.detail("Version", version);
|
||||
|
||||
// The checkpoint was just created, we don't expect this error.
|
||||
|
@ -113,9 +112,9 @@ struct SSCheckpointWorkload : TestWorkload {
|
|||
}
|
||||
|
||||
TraceEvent("TestCheckpointFetched")
|
||||
.detail("Range", KeyRangeRef(key, endKey).toString())
|
||||
.detail("Range", testRange)
|
||||
.detail("Version", version)
|
||||
.detail("Shards", records.size());
|
||||
.detail("Checkpoints", describe(records));
|
||||
|
||||
state std::string pwd = platform::getWorkingDirectory();
|
||||
state std::string folder = pwd + "/checkpoints";
|
||||
|
@ -123,13 +122,15 @@ struct SSCheckpointWorkload : TestWorkload {
|
|||
ASSERT(platform::createDirectory(folder));
|
||||
|
||||
// Fetch checkpoint.
|
||||
state std::vector<CheckpointMetaData> fetchedCheckpoints;
|
||||
state int i = 0;
|
||||
for (; i < records.size(); ++i) {
|
||||
loop {
|
||||
TraceEvent("TestFetchingCheckpoint").detail("Checkpoint", records[i].toString());
|
||||
try {
|
||||
state CheckpointMetaData record = wait(fetchCheckpoint(cx, records[0], folder));
|
||||
TraceEvent("TestCheckpointFetched").detail("Checkpoint", records[i].toString());
|
||||
fetchedCheckpoints.push_back(record);
|
||||
TraceEvent("TestCheckpointFetched").detail("Checkpoint", record.toString());
|
||||
break;
|
||||
} catch (Error& e) {
|
||||
TraceEvent("TestFetchCheckpointError")
|
||||
|
@ -146,8 +147,9 @@ struct SSCheckpointWorkload : TestWorkload {
|
|||
// Restore KVS.
|
||||
state IKeyValueStore* kvStore = keyValueStoreRocksDB(
|
||||
rocksDBTestDir, deterministicRandom()->randomUniqueID(), KeyValueStoreType::SSD_ROCKSDB_V1);
|
||||
wait(kvStore->init());
|
||||
try {
|
||||
wait(kvStore->restore(records));
|
||||
wait(kvStore->restore(fetchedCheckpoints));
|
||||
} catch (Error& e) {
|
||||
TraceEvent(SevError, "TestRestoreCheckpointError")
|
||||
.errorUnsuppressed(e)
|
||||
|
@ -167,10 +169,10 @@ struct SSCheckpointWorkload : TestWorkload {
|
|||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < res.size(); ++i) {
|
||||
Optional<Value> value = wait(kvStore->readValue(res[i].key));
|
||||
ASSERT(value.present());
|
||||
ASSERT(value.get() == res[i].value);
|
||||
RangeResult kvRange = wait(kvStore->readRange(testRange));
|
||||
ASSERT(res.size() == kvRange.size());
|
||||
for (int i = 0; i < res.size(); ++i) {
|
||||
ASSERT(res[i] == kvRange[i]);
|
||||
}
|
||||
|
||||
int ignore = wait(setDDMode(cx, 1));
|
||||
|
|
|
@ -179,9 +179,6 @@ mkcert::CertChainRef ChainSpec::makeChain(Arena& arena) {
|
|||
ofsCert.write(reinterpret_cast<char const*>(cert.begin()), cert.size());
|
||||
auto key = chain[0].privateKeyPem;
|
||||
ofsKey.write(reinterpret_cast<char const*>(key.begin()), key.size());
|
||||
ofsCert.close();
|
||||
ofsKey.close();
|
||||
ofsCa.close();
|
||||
return chain;
|
||||
}
|
||||
|
||||
|
|
|
@ -167,14 +167,12 @@ struct UDPTracer : public ITracer {
|
|||
// Serializes span fields as an array into the supplied TraceRequest
|
||||
// buffer.
|
||||
void serialize_span(const Span& span, TraceRequest& request) {
|
||||
uint16_t size = 14;
|
||||
uint16_t size = 12;
|
||||
request.write_byte(size | 0b10010000); // write as array
|
||||
serialize_value(span.context.traceID.first(), request, 0xcf); // trace id
|
||||
serialize_value(span.context.traceID.second(), request, 0xcf); // trace id
|
||||
serialize_value(span.context.spanID, request, 0xcf); // spanid
|
||||
// parent value
|
||||
serialize_value(span.parentContext.traceID.first(), request, 0xcf); // trace id
|
||||
serialize_value(span.parentContext.traceID.second(), request, 0xcf); // trace id
|
||||
// parent span id
|
||||
serialize_value(span.parentContext.spanID, request, 0xcf); // spanId
|
||||
// Payload
|
||||
serialize_string(span.location.name.toString(), request);
|
||||
|
@ -477,10 +475,6 @@ TEST_CASE("/flow/Tracing/CreateOTELSpan") {
|
|||
Span notSampled("foo"_loc);
|
||||
ASSERT(!notSampled.context.isSampled());
|
||||
|
||||
// Force Sampling
|
||||
// Span sampled("foo"_loc, []() { return 1.0; });
|
||||
// ASSERT(sampled.context.isSampled());
|
||||
|
||||
// Ensure child traceID matches parent, when parent is sampled.
|
||||
Span childTraceIDMatchesParent("foo"_loc, SpanContext(UID(100, 101), 200, TraceFlags::sampled));
|
||||
ASSERT(childTraceIDMatchesParent.context.traceID.first() ==
|
||||
|
@ -493,11 +487,6 @@ TEST_CASE("/flow/Tracing/CreateOTELSpan") {
|
|||
Span parentNotSampled("foo"_loc, SpanContext(UID(1, 1), 1, TraceFlags::unsampled));
|
||||
ASSERT(!parentNotSampled.context.isSampled());
|
||||
|
||||
// When the parent isn't sampled AND it has zero values for traceID and spanID this means
|
||||
// we should defer to the child as the new root of the trace as there was no actual parent.
|
||||
// If the child was sampled we should send the child trace with a null parent.
|
||||
// Span noParent("foo"_loc, SpanContext(UID(0, 0), 0, TraceFlags::unsampled));
|
||||
// ASSERT(noParent.context.isSampled());
|
||||
return Void();
|
||||
};
|
||||
|
||||
|
@ -669,7 +658,7 @@ TEST_CASE("/flow/Tracing/FastUDPMessagePackEncoding") {
|
|||
auto tracer = FastUDPTracer();
|
||||
tracer.serialize_span(span1, request);
|
||||
auto data = request.buffer.get();
|
||||
ASSERT(data[0] == 0b10011110); // Default array size.
|
||||
ASSERT(data[0] == 0b10011100); // Default array size.
|
||||
request.reset();
|
||||
|
||||
// Test - constructor OTELSpan(const Location& location, const SpanContext parent, const SpanContext& link)
|
||||
|
@ -679,7 +668,7 @@ TEST_CASE("/flow/Tracing/FastUDPMessagePackEncoding") {
|
|||
{ SpanContext(UID(200, 201), 2, TraceFlags::sampled) });
|
||||
tracer.serialize_span(span2, request);
|
||||
data = request.buffer.get();
|
||||
ASSERT(data[0] == 0b10011110); // 14 element array.
|
||||
ASSERT(data[0] == 0b10011100); // 12 element array.
|
||||
// Verify the Parent Trace ID overwrites this spans Trace ID
|
||||
ASSERT(data[1] == 0xcf);
|
||||
ASSERT(swapUint64BE(&data[2]) == 100);
|
||||
|
@ -687,37 +676,33 @@ TEST_CASE("/flow/Tracing/FastUDPMessagePackEncoding") {
|
|||
ASSERT(swapUint64BE(&data[11]) == 101);
|
||||
ASSERT(data[19] == 0xcf);
|
||||
// We don't care about the next 8 bytes, they are the ID for the span itself and will be random.
|
||||
// Parent TraceID and Parent SpanID.
|
||||
// Parent SpanID.
|
||||
ASSERT(data[28] == 0xcf);
|
||||
ASSERT(swapUint64BE(&data[29]) == 100);
|
||||
ASSERT(data[37] == 0xcf);
|
||||
ASSERT(swapUint64BE(&data[38]) == 101);
|
||||
ASSERT(data[46] == 0xcf);
|
||||
ASSERT(swapUint64BE(&data[47]) == 1);
|
||||
ASSERT(swapUint64BE(&data[29]) == 1);
|
||||
// Read and verify span name
|
||||
ASSERT(readMPString(&data[55]) == "encoded_span");
|
||||
ASSERT(readMPString(&data[37]) == "encoded_span");
|
||||
// Verify begin/end is encoded, we don't care about the values
|
||||
ASSERT(data[68] == 0xcb);
|
||||
ASSERT(data[77] == 0xcb);
|
||||
ASSERT(data[50] == 0xcb);
|
||||
ASSERT(data[59] == 0xcb);
|
||||
// SpanKind
|
||||
ASSERT(data[86] == 0xcc);
|
||||
ASSERT(data[87] == static_cast<uint8_t>(SpanKind::SERVER));
|
||||
ASSERT(data[68] == 0xcc);
|
||||
ASSERT(data[69] == static_cast<uint8_t>(SpanKind::SERVER));
|
||||
// Status
|
||||
ASSERT(data[88] == 0xcc);
|
||||
ASSERT(data[89] == static_cast<uint8_t>(SpanStatus::OK));
|
||||
ASSERT(data[70] == 0xcc);
|
||||
ASSERT(data[71] == static_cast<uint8_t>(SpanStatus::OK));
|
||||
// Linked SpanContext
|
||||
ASSERT(data[90] == 0b10010001);
|
||||
ASSERT(data[72] == 0b10010001);
|
||||
ASSERT(data[73] == 0xcf);
|
||||
ASSERT(swapUint64BE(&data[74]) == 200);
|
||||
ASSERT(data[82] == 0xcf);
|
||||
ASSERT(swapUint64BE(&data[83]) == 201);
|
||||
ASSERT(data[91] == 0xcf);
|
||||
ASSERT(swapUint64BE(&data[92]) == 200);
|
||||
ASSERT(data[100] == 0xcf);
|
||||
ASSERT(swapUint64BE(&data[101]) == 201);
|
||||
ASSERT(data[109] == 0xcf);
|
||||
ASSERT(swapUint64BE(&data[110]) == 2);
|
||||
ASSERT(swapUint64BE(&data[92]) == 2);
|
||||
// Events
|
||||
ASSERT(data[118] == 0b10010000); // empty
|
||||
ASSERT(data[100] == 0b10010000); // empty
|
||||
// Attributes
|
||||
ASSERT(data[119] == 0b10000001); // single k/v pair
|
||||
ASSERT(data[120] == 0b10100111); // length of key string "address" == 7
|
||||
ASSERT(data[101] == 0b10000001); // single k/v pair
|
||||
ASSERT(data[102] == 0b10100111); // length of key string "address" == 7
|
||||
|
||||
request.reset();
|
||||
|
||||
|
@ -731,41 +716,41 @@ TEST_CASE("/flow/Tracing/FastUDPMessagePackEncoding") {
|
|||
.addEvent(StringRef(s3Arena, LiteralStringRef("event1")), 100.101, attrs);
|
||||
tracer.serialize_span(span3, request);
|
||||
data = request.buffer.get();
|
||||
ASSERT(data[0] == 0b10011110); // 14 element array.
|
||||
// We don't care about the next 54 bytes as there is no parent and a randomly assigned Trace and SpanID
|
||||
ASSERT(data[0] == 0b10011100); // 12 element array.
|
||||
// We don't care about the next 36 bytes as there is no parent and a randomly assigned Trace and SpanID
|
||||
// Read and verify span name
|
||||
ASSERT(readMPString(&data[55]) == "encoded_span_3");
|
||||
ASSERT(readMPString(&data[37]) == "encoded_span_3");
|
||||
// Verify begin/end is encoded, we don't care about the values
|
||||
ASSERT(data[70] == 0xcb);
|
||||
ASSERT(data[79] == 0xcb);
|
||||
ASSERT(data[52] == 0xcb);
|
||||
ASSERT(data[61] == 0xcb);
|
||||
// SpanKind
|
||||
ASSERT(data[88] == 0xcc);
|
||||
ASSERT(data[89] == static_cast<uint8_t>(SpanKind::SERVER));
|
||||
ASSERT(data[70] == 0xcc);
|
||||
ASSERT(data[71] == static_cast<uint8_t>(SpanKind::SERVER));
|
||||
// Status
|
||||
ASSERT(data[90] == 0xcc);
|
||||
ASSERT(data[91] == static_cast<uint8_t>(SpanStatus::OK));
|
||||
ASSERT(data[72] == 0xcc);
|
||||
ASSERT(data[73] == static_cast<uint8_t>(SpanStatus::OK));
|
||||
// Linked SpanContext
|
||||
ASSERT(data[92] == 0b10010001);
|
||||
ASSERT(data[74] == 0b10010001);
|
||||
ASSERT(data[75] == 0xcf);
|
||||
ASSERT(swapUint64BE(&data[76]) == 300);
|
||||
ASSERT(data[84] == 0xcf);
|
||||
ASSERT(swapUint64BE(&data[85]) == 301);
|
||||
ASSERT(data[93] == 0xcf);
|
||||
ASSERT(swapUint64BE(&data[94]) == 300);
|
||||
ASSERT(data[102] == 0xcf);
|
||||
ASSERT(swapUint64BE(&data[103]) == 301);
|
||||
ASSERT(data[111] == 0xcf);
|
||||
ASSERT(swapUint64BE(&data[112]) == 400);
|
||||
ASSERT(swapUint64BE(&data[94]) == 400);
|
||||
// Events
|
||||
ASSERT(data[120] == 0b10010001); // empty
|
||||
ASSERT(readMPString(&data[121]) == "event1");
|
||||
ASSERT(data[128] == 0xcb);
|
||||
ASSERT(swapDoubleBE(&data[129]) == 100.101);
|
||||
ASSERT(data[102] == 0b10010001); // empty
|
||||
ASSERT(readMPString(&data[103]) == "event1");
|
||||
ASSERT(data[110] == 0xcb);
|
||||
ASSERT(swapDoubleBE(&data[111]) == 100.101);
|
||||
// Events Attributes
|
||||
ASSERT(data[137] == 0b10000001); // single k/v pair
|
||||
ASSERT(readMPString(&data[138]) == "foo");
|
||||
ASSERT(readMPString(&data[142]) == "bar");
|
||||
ASSERT(data[119] == 0b10000001); // single k/v pair
|
||||
ASSERT(readMPString(&data[120]) == "foo");
|
||||
ASSERT(readMPString(&data[124]) == "bar");
|
||||
// Attributes
|
||||
ASSERT(data[146] == 0b10000010); // two k/v pair
|
||||
ASSERT(data[128] == 0b10000010); // two k/v pair
|
||||
// Reconstruct map from MessagePack wire format data and verify.
|
||||
std::unordered_map<std::string, std::string> attributes;
|
||||
auto index = 147;
|
||||
auto index = 129;
|
||||
|
||||
auto firstKey = readMPString(&data[index]);
|
||||
index += firstKey.length() + 1; // +1 for control byte
|
||||
|
@ -797,11 +782,11 @@ TEST_CASE("/flow/Tracing/FastUDPMessagePackEncoding") {
|
|||
span4.location = location;
|
||||
tracer.serialize_span(span4, request);
|
||||
data = request.buffer.get();
|
||||
ASSERT(data[0] == 0b10011110); // 14 element array.
|
||||
// We don't care about the next 54 bytes as there is no parent and a randomly assigned Trace and SpanID
|
||||
ASSERT(data[0] == 0b10011100); // 12 element array.
|
||||
// We don't care about the next 36 bytes as there is no parent and a randomly assigned Trace and SpanID
|
||||
// Read and verify span name
|
||||
ASSERT(data[55] == 0xda);
|
||||
ASSERT(readMPString(&data[55]) == longString);
|
||||
ASSERT(data[37] == 0xda);
|
||||
ASSERT(readMPString(&data[37]) == longString);
|
||||
return Void();
|
||||
};
|
||||
#endif
|
||||
|
|
|
@ -195,6 +195,7 @@ ERROR( checkpoint_not_found, 2040, "Checkpoint not found" )
|
|||
ERROR( key_not_tuple, 2041, "The key cannot be parsed as a tuple" );
|
||||
ERROR( value_not_tuple, 2042, "The value cannot be parsed as a tuple" );
|
||||
ERROR( mapper_not_tuple, 2043, "The mapper cannot be parsed as a tuple" );
|
||||
ERROR( invalid_checkpoint_format, 2044, "Invalid checkpoint format" )
|
||||
|
||||
|
||||
ERROR( incompatible_protocol_version, 2100, "Incompatible protocol version" )
|
||||
|
|
|
@ -39,6 +39,17 @@ def is_port_in_use(port):
|
|||
|
||||
valid_letters_for_secret = string.ascii_letters + string.digits
|
||||
|
||||
class TLSConfig:
|
||||
# Passing a negative chain length generates expired leaf certificate
|
||||
def __init__(
|
||||
self,
|
||||
server_chain_len: int = 3,
|
||||
client_chain_len: int = 2,
|
||||
verify_peers = "Check.Valid=1",
|
||||
):
|
||||
self.server_chain_len = server_chain_len
|
||||
self.client_chain_len = client_chain_len
|
||||
self.verify_peers = verify_peers
|
||||
|
||||
def random_secret_string(length):
|
||||
return "".join(random.choice(valid_letters_for_secret) for _ in range(length))
|
||||
|
@ -67,11 +78,12 @@ cluster-file = {etcdir}/fdb.cluster
|
|||
## Default parameters for individual fdbserver processes
|
||||
[fdbserver]
|
||||
command = {fdbserver_bin}
|
||||
public-address = {ip_address}:$ID
|
||||
public-address = {ip_address}:$ID{optional_tls}
|
||||
listen-address = public
|
||||
datadir = {datadir}/$ID
|
||||
logdir = {logdir}
|
||||
{bg_knob_line}
|
||||
{tls_config}
|
||||
# logsize = 10MiB
|
||||
# maxlogssize = 100MiB
|
||||
# machine-id =
|
||||
|
@ -98,12 +110,15 @@ logdir = {logdir}
|
|||
port=None,
|
||||
ip_address=None,
|
||||
blob_granules_enabled: bool = False,
|
||||
redundancy: str = "single"
|
||||
redundancy: str = "single",
|
||||
tls_config: TLSConfig = None,
|
||||
mkcert_binary: str = "",
|
||||
):
|
||||
self.basedir = Path(basedir)
|
||||
self.etc = self.basedir.joinpath("etc")
|
||||
self.log = self.basedir.joinpath("log")
|
||||
self.data = self.basedir.joinpath("data")
|
||||
self.cert = self.basedir.joinpath("cert")
|
||||
self.conf_file = self.etc.joinpath("foundationdb.conf")
|
||||
self.cluster_file = self.etc.joinpath("fdb.cluster")
|
||||
self.fdbserver_binary = Path(fdbserver_binary)
|
||||
|
@ -137,11 +152,22 @@ logdir = {logdir}
|
|||
self.use_legacy_conf_syntax = False
|
||||
self.coordinators = set()
|
||||
self.active_servers = set(self.server_ports.keys())
|
||||
self.tls_config = tls_config
|
||||
self.mkcert_binary = Path(mkcert_binary)
|
||||
self.server_cert_file = self.cert.joinpath("server_cert.pem")
|
||||
self.client_cert_file = self.cert.joinpath("client_cert.pem")
|
||||
self.server_key_file = self.cert.joinpath("server_key.pem")
|
||||
self.client_key_file = self.cert.joinpath("client_key.pem")
|
||||
self.server_ca_file = self.cert.joinpath("server_ca.pem")
|
||||
self.client_ca_file = self.cert.joinpath("client_ca.pem")
|
||||
|
||||
if create_config:
|
||||
self.create_cluster_file()
|
||||
self.save_config()
|
||||
|
||||
if self.tls_config is not None:
|
||||
self.create_tls_cert()
|
||||
|
||||
def __next_port(self):
|
||||
if self.first_port is None:
|
||||
return get_free_port()
|
||||
|
@ -166,6 +192,8 @@ logdir = {logdir}
|
|||
logdir=self.log,
|
||||
ip_address=self.ip_address,
|
||||
bg_knob_line=bg_knob_line,
|
||||
tls_config=self.tls_conf_string(),
|
||||
optional_tls=":tls" if self.tls_config is not None else "",
|
||||
)
|
||||
)
|
||||
# By default, the cluster only has one process
|
||||
|
@ -190,11 +218,12 @@ logdir = {logdir}
|
|||
def create_cluster_file(self):
|
||||
with open(self.cluster_file, "x") as f:
|
||||
f.write(
|
||||
"{desc}:{secret}@{ip_addr}:{server_port}".format(
|
||||
"{desc}:{secret}@{ip_addr}:{server_port}{optional_tls}".format(
|
||||
desc=self.cluster_desc,
|
||||
secret=self.cluster_secret,
|
||||
ip_addr=self.ip_address,
|
||||
server_port=self.server_ports[0],
|
||||
optional_tls=":tls" if self.tls_config is not None else "",
|
||||
)
|
||||
)
|
||||
self.coordinators = {0}
|
||||
|
@ -248,6 +277,10 @@ logdir = {logdir}
|
|||
|
||||
def __fdbcli_exec(self, cmd, stdout, stderr, timeout):
|
||||
args = [self.fdbcli_binary, "-C", self.cluster_file, "--exec", cmd]
|
||||
if self.tls_config:
|
||||
args += ["--tls-certificate-file", self.client_cert_file,
|
||||
"--tls-key-file", self.client_key_file,
|
||||
"--tls-ca-file", self.server_ca_file]
|
||||
res = subprocess.run(args, env=self.process_env(), stderr=stderr, stdout=stdout, timeout=timeout)
|
||||
assert res.returncode == 0, "fdbcli command {} failed with {}".format(cmd, res.returncode)
|
||||
return res.stdout
|
||||
|
@ -271,6 +304,46 @@ logdir = {logdir}
|
|||
if self.blob_granules_enabled:
|
||||
self.fdbcli_exec("blobrange start \\x00 \\xff")
|
||||
|
||||
# Generate and install test certificate chains and keys
|
||||
def create_tls_cert(self):
|
||||
assert self.tls_config is not None, "TLS not enabled"
|
||||
assert self.mkcert_binary.exists() and self.mkcert_binary.is_file(), "{} does not exist".format(self.mkcert_binary)
|
||||
self.cert.mkdir(exist_ok=True)
|
||||
server_chain_len = abs(self.tls_config.server_chain_len)
|
||||
client_chain_len = abs(self.tls_config.client_chain_len)
|
||||
expire_server_cert = (self.tls_config.server_chain_len < 0)
|
||||
expire_client_cert = (self.tls_config.client_chain_len < 0)
|
||||
args = [
|
||||
str(self.mkcert_binary),
|
||||
"--server-chain-length", str(server_chain_len),
|
||||
"--client-chain-length", str(client_chain_len),
|
||||
"--server-cert-file", str(self.server_cert_file),
|
||||
"--client-cert-file", str(self.client_cert_file),
|
||||
"--server-key-file", str(self.server_key_file),
|
||||
"--client-key-file", str(self.client_key_file),
|
||||
"--server-ca-file", str(self.server_ca_file),
|
||||
"--client-ca-file", str(self.client_ca_file),
|
||||
"--print-args",
|
||||
]
|
||||
if expire_server_cert:
|
||||
args.append("--expire-server-cert")
|
||||
if expire_client_cert:
|
||||
args.append("--expire-client-cert")
|
||||
subprocess.run(args, check=True)
|
||||
|
||||
# Materialize server's TLS configuration section
|
||||
def tls_conf_string(self):
|
||||
if self.tls_config is None:
|
||||
return ""
|
||||
else:
|
||||
conf_map = {
|
||||
"tls-certificate-file": self.server_cert_file,
|
||||
"tls-key-file": self.server_key_file,
|
||||
"tls-ca-file": self.client_ca_file,
|
||||
"tls-verify-peers": self.tls_config.verify_peers,
|
||||
}
|
||||
return "\n".join("{} = {}".format(k, v) for k, v in conf_map.items())
|
||||
|
||||
# Get cluster status using fdbcli
|
||||
def get_status(self):
|
||||
status_output = self.fdbcli_exec_and_get("status json")
|
||||
|
|
|
@ -5,25 +5,27 @@ import os
|
|||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
from local_cluster import LocalCluster, random_secret_string
|
||||
from local_cluster import LocalCluster, TLSConfig, random_secret_string
|
||||
from argparse import ArgumentParser, RawDescriptionHelpFormatter
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
class TempCluster:
|
||||
class TempCluster(LocalCluster):
|
||||
def __init__(
|
||||
self,
|
||||
build_dir: str,
|
||||
process_number: int = 1,
|
||||
port: str = None,
|
||||
blob_granules_enabled: bool = False,
|
||||
tls_config: TLSConfig = None,
|
||||
):
|
||||
self.build_dir = Path(build_dir).resolve()
|
||||
assert self.build_dir.exists(), "{} does not exist".format(build_dir)
|
||||
assert self.build_dir.is_dir(), "{} is not a directory".format(build_dir)
|
||||
tmp_dir = self.build_dir.joinpath("tmp", random_secret_string(16))
|
||||
tmp_dir.mkdir(parents=True)
|
||||
self.cluster = LocalCluster(
|
||||
self.tmp_dir = tmp_dir
|
||||
super().__init__(
|
||||
tmp_dir,
|
||||
self.build_dir.joinpath("bin", "fdbserver"),
|
||||
self.build_dir.joinpath("bin", "fdbmonitor"),
|
||||
|
@ -31,23 +33,21 @@ class TempCluster:
|
|||
process_number,
|
||||
port=port,
|
||||
blob_granules_enabled=blob_granules_enabled,
|
||||
tls_config=tls_config,
|
||||
mkcert_binary=self.build_dir.joinpath("bin", "mkcert"),
|
||||
)
|
||||
self.log = self.cluster.log
|
||||
self.etc = self.cluster.etc
|
||||
self.data = self.cluster.data
|
||||
self.tmp_dir = tmp_dir
|
||||
|
||||
def __enter__(self):
|
||||
self.cluster.__enter__()
|
||||
self.cluster.create_database()
|
||||
super().__enter__()
|
||||
super().create_database()
|
||||
return self
|
||||
|
||||
def __exit__(self, xc_type, exc_value, traceback):
|
||||
self.cluster.__exit__(xc_type, exc_value, traceback)
|
||||
super().__exit__(xc_type, exc_value, traceback)
|
||||
shutil.rmtree(self.tmp_dir)
|
||||
|
||||
def close(self):
|
||||
self.cluster.__exit__(None, None, None)
|
||||
super().__exit__(None, None, None)
|
||||
shutil.rmtree(self.tmp_dir)
|
||||
|
||||
|
||||
|
@ -94,12 +94,37 @@ if __name__ == "__main__":
|
|||
parser.add_argument(
|
||||
"--blob-granules-enabled", help="Enable blob granules", action="store_true"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tls-enabled", help="Enable TLS (with test-only certificates)", action="store_true")
|
||||
parser.add_argument(
|
||||
"--server-cert-chain-len",
|
||||
help="Length of server TLS certificate chain including root CA. Negative value deliberately generates expired leaf certificate for TLS testing. Only takes effect with --tls-enabled.",
|
||||
type=int,
|
||||
default=3,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--client-cert-chain-len",
|
||||
help="Length of client TLS certificate chain including root CA. Negative value deliberately generates expired leaf certificate for TLS testing. Only takes effect with --tls-enabled.",
|
||||
type=int,
|
||||
default=2,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tls-verify-peer",
|
||||
help="Rules to verify client certificate chain. See https://apple.github.io/foundationdb/tls.html#peer-verification",
|
||||
type=str,
|
||||
default="Check.Valid=1",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
tls_config = None
|
||||
if args.tls_enabled:
|
||||
tls_config = TLSConfig(server_chain_len=args.server_cert_chain_len,
|
||||
client_chain_len=args.client_cert_chain_len)
|
||||
errcode = 1
|
||||
with TempCluster(
|
||||
args.build_dir,
|
||||
args.process_number,
|
||||
blob_granules_enabled=args.blob_granules_enabled,
|
||||
tls_config=tls_config,
|
||||
) as cluster:
|
||||
print("log-dir: {}".format(cluster.log))
|
||||
print("etc-dir: {}".format(cluster.etc))
|
||||
|
@ -117,6 +142,18 @@ if __name__ == "__main__":
|
|||
cmd_args.append(str(cluster.etc))
|
||||
elif cmd == "@TMP_DIR@":
|
||||
cmd_args.append(str(cluster.tmp_dir))
|
||||
elif cmd == "@SERVER_CERT_FILE@":
|
||||
cmd_args.append(str(cluster.server_cert_file))
|
||||
elif cmd == "@SERVER_KEY_FILE@":
|
||||
cmd_args.append(str(cluster.server_key_file))
|
||||
elif cmd == "@SERVER_CA_FILE@":
|
||||
cmd_args.append(str(cluster.server_ca_file))
|
||||
elif cmd == "@CLIENT_CERT_FILE@":
|
||||
cmd_args.append(str(cluster.client_cert_file))
|
||||
elif cmd == "@CLIENT_KEY_FILE@":
|
||||
cmd_args.append(str(cluster.client_key_file))
|
||||
elif cmd == "@CLIENT_CA_FILE@":
|
||||
cmd_args.append(str(cluster.client_ca_file))
|
||||
elif cmd.startswith("@DATA_DIR@"):
|
||||
cmd_args.append(str(cluster.data) + cmd[len("@DATA_DIR@") :])
|
||||
else:
|
||||
|
|
Loading…
Reference in New Issue