Merge branch 'main' of https://github.com/apple/foundationdb into readaware
This commit is contained in:
commit
a28b0f4361
|
@ -22,7 +22,7 @@ Contributing to FoundationDB can be in contributions to the code base, sharing y
|
|||
|
||||
### Binary downloads
|
||||
|
||||
Developers interested in using FoundationDB can get started by downloading and installing a binary package. Please see the [downloads page](https://www.foundationdb.org/download/) for a list of available packages.
|
||||
Developers interested in using FoundationDB can get started by downloading and installing a binary package. Please see the [downloads page](https://github.com/apple/foundationdb/releases) for a list of available packages.
|
||||
|
||||
|
||||
### Compiling from source
|
||||
|
@ -181,4 +181,4 @@ Under Windows, only Visual Studio with ClangCl is supported
|
|||
1. `mkdir build && cd build`
|
||||
1. `cmake -G "Visual Studio 16 2019" -A x64 -T ClangCl <PATH_TO_FOUNDATIONDB_SOURCE>`
|
||||
1. `msbuild /p:Configuration=Release foundationdb.sln`
|
||||
1. To increase build performance, use `/p:UseMultiToolTask=true` and `/p:CL_MPCount=<NUMBER_OF_PARALLEL_JOBS>`
|
||||
1. To increase build performance, use `/p:UseMultiToolTask=true` and `/p:CL_MPCount=<NUMBER_OF_PARALLEL_JOBS>`
|
||||
|
|
|
@ -18,6 +18,8 @@ endif()
|
|||
|
||||
if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64" OR CMAKE_SYSTEM_PROCESSOR MATCHES "arm64")
|
||||
set(cpu "aarch64")
|
||||
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(ppc64le|powerpc64le)")
|
||||
set(cpu "ppc64le")
|
||||
endif()
|
||||
|
||||
set(IS_ARM_MAC NO)
|
||||
|
@ -49,7 +51,7 @@ endif()
|
|||
add_dependencies(fdb_c fdb_c_generated fdb_c_options)
|
||||
add_dependencies(fdbclient fdb_c_options)
|
||||
add_dependencies(fdbclient_sampling fdb_c_options)
|
||||
target_link_libraries(fdb_c PUBLIC $<BUILD_INTERFACE:fdbclient>)
|
||||
target_link_libraries(fdb_c PRIVATE $<BUILD_INTERFACE:fdbclient>)
|
||||
if(APPLE)
|
||||
set(symbols ${CMAKE_CURRENT_BINARY_DIR}/fdb_c.symbols)
|
||||
add_custom_command(OUTPUT ${symbols}
|
||||
|
@ -121,9 +123,9 @@ if(NOT WIN32 AND NOT IS_ARM_MAC)
|
|||
strip_debug_symbols(fdb_c_ryw_benchmark)
|
||||
strip_debug_symbols(fdb_c_txn_size_test)
|
||||
endif()
|
||||
target_link_libraries(fdb_c_performance_test PRIVATE fdb_c)
|
||||
target_link_libraries(fdb_c_ryw_benchmark PRIVATE fdb_c)
|
||||
target_link_libraries(fdb_c_txn_size_test PRIVATE fdb_c)
|
||||
target_link_libraries(fdb_c_performance_test PRIVATE fdb_c Threads::Threads)
|
||||
target_link_libraries(fdb_c_ryw_benchmark PRIVATE fdb_c Threads::Threads)
|
||||
target_link_libraries(fdb_c_txn_size_test PRIVATE fdb_c Threads::Threads)
|
||||
|
||||
add_dependencies(fdb_c_setup_tests doctest)
|
||||
add_dependencies(fdb_c_unit_tests doctest)
|
||||
|
@ -134,14 +136,14 @@ if(NOT WIN32 AND NOT IS_ARM_MAC)
|
|||
target_include_directories(fdb_c_unit_tests_version_510 PUBLIC ${DOCTEST_INCLUDE_DIR})
|
||||
target_include_directories(disconnected_timeout_unit_tests PUBLIC ${DOCTEST_INCLUDE_DIR})
|
||||
target_link_libraries(fdb_c_setup_tests PRIVATE fdb_c Threads::Threads)
|
||||
target_link_libraries(fdb_c_unit_tests PRIVATE fdb_c Threads::Threads)
|
||||
target_link_libraries(fdb_c_unit_tests PRIVATE fdb_c Threads::Threads fdbclient)
|
||||
target_link_libraries(fdb_c_unit_tests_version_510 PRIVATE fdb_c Threads::Threads)
|
||||
target_link_libraries(trace_partial_file_suffix_test PRIVATE fdb_c Threads::Threads)
|
||||
target_link_libraries(trace_partial_file_suffix_test PRIVATE fdb_c Threads::Threads flow)
|
||||
target_link_libraries(disconnected_timeout_unit_tests PRIVATE fdb_c Threads::Threads)
|
||||
|
||||
# do not set RPATH for mako
|
||||
set_property(TARGET mako PROPERTY SKIP_BUILD_RPATH TRUE)
|
||||
target_link_libraries(mako PRIVATE fdb_c)
|
||||
target_link_libraries(mako PRIVATE fdb_c fdbclient)
|
||||
|
||||
if(NOT OPEN_FOR_IDE)
|
||||
# Make sure that fdb_c.h is compatible with c90
|
||||
|
|
|
@ -59,9 +59,10 @@ def write_windows_asm(asmfile, functions):
|
|||
|
||||
|
||||
def write_unix_asm(asmfile, functions, prefix):
|
||||
if cpu != "aarch64":
|
||||
if cpu != "aarch64" and cpu!= "ppc64le":
|
||||
asmfile.write(".intel_syntax noprefix\n")
|
||||
|
||||
i = 0
|
||||
if os == 'linux' or os == 'freebsd':
|
||||
asmfile.write("\n.data\n")
|
||||
for f in functions:
|
||||
|
@ -70,8 +71,13 @@ def write_unix_asm(asmfile, functions, prefix):
|
|||
if os == 'linux' or os == 'freebsd':
|
||||
asmfile.write("\n.text\n")
|
||||
for f in functions:
|
||||
if cpu == "ppc64le":
|
||||
asmfile.write("\n.LC%d:\n" % (i))
|
||||
asmfile.write("\t.quad \tfdb_api_ptr_%s\n" % (f))
|
||||
asmfile.write("\t.align 2\n")
|
||||
i = i + 1
|
||||
asmfile.write("\t.global %s\n\t.type %s, @function\n" % (f, f))
|
||||
|
||||
i = 0
|
||||
for f in functions:
|
||||
asmfile.write("\n.globl %s%s\n" % (prefix, f))
|
||||
if cpu == 'aarch64' and os == 'osx':
|
||||
|
@ -118,6 +124,46 @@ def write_unix_asm(asmfile, functions, prefix):
|
|||
assert False, '{} not supported for Arm yet'.format(os)
|
||||
asmfile.write("\tldr x8, [x8]\n")
|
||||
asmfile.write("\tbr x8\n")
|
||||
elif cpu == "ppc64le":
|
||||
asmfile.write("\n.LCF%d:\n" % (i))
|
||||
asmfile.write("\taddis 2,12,.TOC.-.LCF%d@ha\n" % (i))
|
||||
asmfile.write("\taddi 2,2,.TOC.-.LCF%d@l\n" % (i))
|
||||
asmfile.write("\tmflr 0\n")
|
||||
asmfile.write("\tstd 31, -8(1)\n")
|
||||
asmfile.write("\tstd 0,16(1)\n")
|
||||
asmfile.write("\tstdu 1,-192(1)\n")
|
||||
#asmfile.write("\tstd 2,24(1)\n")
|
||||
asmfile.write("\taddis 11,2,.LC%d@toc@ha\n" % (i))
|
||||
asmfile.write("\tld 11,.LC%d@toc@l(11)\n" % (i))
|
||||
asmfile.write("\tld 12,0(11)\n")
|
||||
asmfile.write("\tstd 2,24(1)\n")
|
||||
asmfile.write("\tlwa 11,344(1)\n")
|
||||
asmfile.write("\tmtctr 12\n")
|
||||
asmfile.write("\tstd 11,152(1)\n")
|
||||
asmfile.write("\tlwa 11,352(1)\n")
|
||||
asmfile.write("\tstd 11,160(1)\n")
|
||||
asmfile.write("\tlwa 11,336(1)\n")
|
||||
asmfile.write("\tstd 11,144(1)\n")
|
||||
asmfile.write("\tlwa 11,328(1)\n")
|
||||
asmfile.write("\tstd 11,136(1)\n")
|
||||
asmfile.write("\tlwa 11,320(1)\n")
|
||||
asmfile.write("\tstd 11,128(1)\n")
|
||||
asmfile.write("\tlwa 11,312(1)\n")
|
||||
asmfile.write("\tstd 11,120(1)\n")
|
||||
asmfile.write("\tlwa 11,304(1)\n")
|
||||
asmfile.write("\tstd 11,112(1)\n")
|
||||
asmfile.write("\tld 11,296(1)\n")
|
||||
asmfile.write("\tstd 11,104(1)\n")
|
||||
asmfile.write("\tlwa 11,288(1)\n")
|
||||
asmfile.write("\tstd 11,96(1)\n")
|
||||
asmfile.write("\tbctrl\n")
|
||||
asmfile.write("\tld 2,24(1)\n")
|
||||
asmfile.write("\taddi 1,1,192\n")
|
||||
asmfile.write("\tld 0,16(1)\n")
|
||||
asmfile.write("\tld 31, -8(1)\n")
|
||||
asmfile.write("\tmtlr 0\n")
|
||||
asmfile.write("\tblr\n")
|
||||
i = i + 1
|
||||
else:
|
||||
asmfile.write(
|
||||
"\tmov r11, qword ptr [%sfdb_api_ptr_%s@GOTPCREL+rip]\n" % (prefix, f))
|
||||
|
|
|
@ -1,17 +1,18 @@
|
|||
#include <assert.h>
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <getopt.h>
|
||||
#include <inttypes.h>
|
||||
#include <math.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/wait.h>
|
||||
#include <time.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#if defined(__linux__)
|
||||
#include <linux/limits.h>
|
||||
|
@ -615,7 +616,7 @@ int64_t granule_start_load(const char* filename,
|
|||
// don't seek if offset == 0
|
||||
if (offset && fseek(fp, offset, SEEK_SET)) {
|
||||
// if fseek was non-zero, it failed
|
||||
fprintf(stderr, "ERROR: BG could not seek to %ld in file %s\n", offset, full_fname);
|
||||
fprintf(stderr, "ERROR: BG could not seek to %" PRId64 " in file %s\n", offset, full_fname);
|
||||
fclose(fp);
|
||||
return -1;
|
||||
}
|
||||
|
@ -625,7 +626,7 @@ int64_t granule_start_load(const char* filename,
|
|||
fclose(fp);
|
||||
|
||||
if (readSize != length) {
|
||||
fprintf(stderr, "ERROR: BG could not read %ld bytes from file: %s\n", length, full_fname);
|
||||
fprintf(stderr, "ERROR: BG could not read %" PRId64 " bytes from file: %s\n", length, full_fname);
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
@ -636,7 +637,7 @@ int64_t granule_start_load(const char* filename,
|
|||
uint8_t* granule_get_load(int64_t loadId, void* userContext) {
|
||||
BGLocalFileContext* context = (BGLocalFileContext*)userContext;
|
||||
if (context->data_by_id[loadId] == 0) {
|
||||
fprintf(stderr, "ERROR: BG loadId invalid for get_load: %ld\n", loadId);
|
||||
fprintf(stderr, "ERROR: BG loadId invalid for get_load: %" PRId64 "\n", loadId);
|
||||
return 0;
|
||||
}
|
||||
return context->data_by_id[loadId];
|
||||
|
@ -645,7 +646,7 @@ uint8_t* granule_get_load(int64_t loadId, void* userContext) {
|
|||
void granule_free_load(int64_t loadId, void* userContext) {
|
||||
BGLocalFileContext* context = (BGLocalFileContext*)userContext;
|
||||
if (context->data_by_id[loadId] == 0) {
|
||||
fprintf(stderr, "ERROR: BG loadId invalid for free_load: %ld\n", loadId);
|
||||
fprintf(stderr, "ERROR: BG loadId invalid for free_load: %" PRId64 "\n", loadId);
|
||||
}
|
||||
free(context->data_by_id[loadId]);
|
||||
context->data_by_id[loadId] = 0;
|
||||
|
@ -1119,7 +1120,7 @@ int run_workload(FDBTransaction* transaction,
|
|||
if (tracetimer == dotrace) {
|
||||
fdb_error_t err;
|
||||
tracetimer = 0;
|
||||
snprintf(traceid, 32, "makotrace%019ld", total_xacts);
|
||||
snprintf(traceid, 32, "makotrace%019" PRId64, total_xacts);
|
||||
fprintf(debugme, "DEBUG: txn tracing %s\n", traceid);
|
||||
err = fdb_transaction_set_option(transaction,
|
||||
FDB_TR_OPTION_DEBUG_TRANSACTION_IDENTIFIER,
|
||||
|
@ -1283,7 +1284,7 @@ void* worker_thread(void* thread_args) {
|
|||
}
|
||||
|
||||
fprintf(debugme,
|
||||
"DEBUG: worker_id:%d (%d) thread_id:%d (%d) database_index:%lu (tid:%lu)\n",
|
||||
"DEBUG: worker_id:%d (%d) thread_id:%d (%d) database_index:%lu (tid:%" PRIu64 ")\n",
|
||||
worker_id,
|
||||
args->num_processes,
|
||||
thread_id,
|
||||
|
@ -1350,6 +1351,11 @@ void* worker_thread(void* thread_args) {
|
|||
char str2[1000];
|
||||
sprintf(str2, "%s%d", TEMP_DATA_STORE, *parent_id);
|
||||
rc = mkdir(str2, S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
|
||||
if (rc < 0) {
|
||||
int ec = errno;
|
||||
fprintf(stderr, "Failed to make directory: %s because %s\n", str2, strerror(ec));
|
||||
goto failExit;
|
||||
}
|
||||
for (op = 0; op < MAX_OP; op++) {
|
||||
if (args->txnspec.ops[op][OP_COUNT] > 0 || op == OP_COMMIT || op == OP_TRANSACTION) {
|
||||
FILE* fp;
|
||||
|
@ -1357,6 +1363,11 @@ void* worker_thread(void* thread_args) {
|
|||
strcat(file_name, str2);
|
||||
get_stats_file_name(file_name, worker_id, thread_id, op);
|
||||
fp = fopen(file_name, "w");
|
||||
if (!fp) {
|
||||
int ec = errno;
|
||||
fprintf(stderr, "Failed to open file: %s because %s\n", file_name, strerror(ec));
|
||||
goto failExit;
|
||||
}
|
||||
lat_block_t* temp_block = ((thread_args_t*)thread_args)->block[op];
|
||||
if (is_memory_allocated[op]) {
|
||||
size = stats->latency_samples[op] / LAT_BLOCK_SIZE;
|
||||
|
@ -1376,11 +1387,11 @@ void* worker_thread(void* thread_args) {
|
|||
fclose(fp);
|
||||
}
|
||||
}
|
||||
__sync_fetch_and_add(stopcount, 1);
|
||||
}
|
||||
|
||||
/* fall through */
|
||||
failExit:
|
||||
__sync_fetch_and_add(stopcount, 1);
|
||||
for (op = 0; op < MAX_OP; op++) {
|
||||
lat_block_t* curr = ((thread_args_t*)thread_args)->block[op];
|
||||
lat_block_t* prev = NULL;
|
||||
|
@ -2240,9 +2251,9 @@ void print_stats(mako_args_t* args, mako_stats_t* stats, struct timespec* now, s
|
|||
for (op = 0; op < MAX_OP; op++) {
|
||||
if (args->txnspec.ops[op][OP_COUNT] > 0) {
|
||||
uint64_t ops_total_diff = ops_total[op] - ops_total_prev[op];
|
||||
printf("%" STR(STATS_FIELD_WIDTH) "lu ", ops_total_diff);
|
||||
printf("%" STR(STATS_FIELD_WIDTH) PRIu64 " ", ops_total_diff);
|
||||
if (fp) {
|
||||
fprintf(fp, "\"%s\": %lu,", get_ops_name(op), ops_total_diff);
|
||||
fprintf(fp, "\"%s\": %" PRIu64 ",", get_ops_name(op), ops_total_diff);
|
||||
}
|
||||
errors_diff[op] = errors_total[op] - errors_total_prev[op];
|
||||
print_err = (errors_diff[op] > 0);
|
||||
|
@ -2270,7 +2281,7 @@ void print_stats(mako_args_t* args, mako_stats_t* stats, struct timespec* now, s
|
|||
printf("%" STR(STATS_TITLE_WIDTH) "s ", "Errors");
|
||||
for (op = 0; op < MAX_OP; op++) {
|
||||
if (args->txnspec.ops[op][OP_COUNT] > 0) {
|
||||
printf("%" STR(STATS_FIELD_WIDTH) "lu ", errors_diff[op]);
|
||||
printf("%" STR(STATS_FIELD_WIDTH) PRIu64 " ", errors_diff[op]);
|
||||
if (fp) {
|
||||
fprintf(fp, ",\"errors\": %.2f", conflicts_diff);
|
||||
}
|
||||
|
@ -2419,10 +2430,10 @@ void print_report(mako_args_t* args,
|
|||
break;
|
||||
}
|
||||
}
|
||||
printf("Total Xacts: %8lu\n", totalxacts);
|
||||
printf("Total Conflicts: %8lu\n", conflicts);
|
||||
printf("Total Errors: %8lu\n", totalerrors);
|
||||
printf("Overall TPS: %8lu\n\n", totalxacts * 1000000000 / duration_nsec);
|
||||
printf("Total Xacts: %8" PRIu64 "\n", totalxacts);
|
||||
printf("Total Conflicts: %8" PRIu64 "\n", conflicts);
|
||||
printf("Total Errors: %8" PRIu64 "\n", totalerrors);
|
||||
printf("Overall TPS: %8" PRIu64 "\n\n", totalxacts * 1000000000 / duration_nsec);
|
||||
|
||||
if (fp) {
|
||||
fprintf(fp, "\"results\": {");
|
||||
|
@ -2430,10 +2441,10 @@ void print_report(mako_args_t* args,
|
|||
fprintf(fp, "\"totalProcesses\": %d,", args->num_processes);
|
||||
fprintf(fp, "\"totalThreads\": %d,", args->num_threads);
|
||||
fprintf(fp, "\"targetTPS\": %d,", args->tpsmax);
|
||||
fprintf(fp, "\"totalXacts\": %lu,", totalxacts);
|
||||
fprintf(fp, "\"totalConflicts\": %lu,", conflicts);
|
||||
fprintf(fp, "\"totalErrors\": %lu,", totalerrors);
|
||||
fprintf(fp, "\"overallTPS\": %lu,", totalxacts * 1000000000 / duration_nsec);
|
||||
fprintf(fp, "\"totalXacts\": %" PRIu64 ",", totalxacts);
|
||||
fprintf(fp, "\"totalConflicts\": %" PRIu64 ",", conflicts);
|
||||
fprintf(fp, "\"totalErrors\": %" PRIu64 ",", totalerrors);
|
||||
fprintf(fp, "\"overallTPS\": %" PRIu64 ",", totalxacts * 1000000000 / duration_nsec);
|
||||
}
|
||||
|
||||
/* per-op stats */
|
||||
|
@ -2446,14 +2457,14 @@ void print_report(mako_args_t* args,
|
|||
}
|
||||
for (op = 0; op < MAX_OP; op++) {
|
||||
if ((args->txnspec.ops[op][OP_COUNT] > 0 && op != OP_TRANSACTION) || op == OP_COMMIT) {
|
||||
printf("%" STR(STATS_FIELD_WIDTH) "lu ", ops_total[op]);
|
||||
printf("%" STR(STATS_FIELD_WIDTH) PRIu64 " ", ops_total[op]);
|
||||
if (fp) {
|
||||
if (first_op) {
|
||||
first_op = 0;
|
||||
} else {
|
||||
fprintf(fp, ",");
|
||||
}
|
||||
fprintf(fp, "\"%s\": %lu", get_ops_name(op), ops_total[op]);
|
||||
fprintf(fp, "\"%s\": %" PRIu64, get_ops_name(op), ops_total[op]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -2475,14 +2486,14 @@ void print_report(mako_args_t* args,
|
|||
first_op = 1;
|
||||
for (op = 0; op < MAX_OP; op++) {
|
||||
if (args->txnspec.ops[op][OP_COUNT] > 0 && op != OP_TRANSACTION) {
|
||||
printf("%" STR(STATS_FIELD_WIDTH) "lu ", errors_total[op]);
|
||||
printf("%" STR(STATS_FIELD_WIDTH) PRIu64 " ", errors_total[op]);
|
||||
if (fp) {
|
||||
if (first_op) {
|
||||
first_op = 0;
|
||||
} else {
|
||||
fprintf(fp, ",");
|
||||
}
|
||||
fprintf(fp, "\"%s\": %lu", get_ops_name(op), errors_total[op]);
|
||||
fprintf(fp, "\"%s\": %" PRIu64, get_ops_name(op), errors_total[op]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -2500,7 +2511,7 @@ void print_report(mako_args_t* args,
|
|||
for (op = 0; op < MAX_OP; op++) {
|
||||
if (args->txnspec.ops[op][OP_COUNT] > 0 || op == OP_TRANSACTION || op == OP_COMMIT) {
|
||||
if (lat_total[op]) {
|
||||
printf("%" STR(STATS_FIELD_WIDTH) "lu ", lat_samples[op]);
|
||||
printf("%" STR(STATS_FIELD_WIDTH) PRIu64 " ", lat_samples[op]);
|
||||
} else {
|
||||
printf("%" STR(STATS_FIELD_WIDTH) "s ", "N/A");
|
||||
}
|
||||
|
@ -2510,7 +2521,7 @@ void print_report(mako_args_t* args,
|
|||
} else {
|
||||
fprintf(fp, ",");
|
||||
}
|
||||
fprintf(fp, "\"%s\": %lu", get_ops_name(op), lat_samples[op]);
|
||||
fprintf(fp, "\"%s\": %" PRIu64, get_ops_name(op), lat_samples[op]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -2527,14 +2538,14 @@ void print_report(mako_args_t* args,
|
|||
if (lat_min[op] == -1) {
|
||||
printf("%" STR(STATS_FIELD_WIDTH) "s ", "N/A");
|
||||
} else {
|
||||
printf("%" STR(STATS_FIELD_WIDTH) "lu ", lat_min[op]);
|
||||
printf("%" STR(STATS_FIELD_WIDTH) PRIu64 " ", lat_min[op]);
|
||||
if (fp) {
|
||||
if (first_op) {
|
||||
first_op = 0;
|
||||
} else {
|
||||
fprintf(fp, ",");
|
||||
}
|
||||
fprintf(fp, "\"%s\": %lu", get_ops_name(op), lat_min[op]);
|
||||
fprintf(fp, "\"%s\": %" PRIu64, get_ops_name(op), lat_min[op]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -2550,14 +2561,14 @@ void print_report(mako_args_t* args,
|
|||
for (op = 0; op < MAX_OP; op++) {
|
||||
if (args->txnspec.ops[op][OP_COUNT] > 0 || op == OP_TRANSACTION || op == OP_COMMIT) {
|
||||
if (lat_total[op]) {
|
||||
printf("%" STR(STATS_FIELD_WIDTH) "lu ", lat_total[op] / lat_samples[op]);
|
||||
printf("%" STR(STATS_FIELD_WIDTH) PRIu64 " ", lat_total[op] / lat_samples[op]);
|
||||
if (fp) {
|
||||
if (first_op) {
|
||||
first_op = 0;
|
||||
} else {
|
||||
fprintf(fp, ",");
|
||||
}
|
||||
fprintf(fp, "\"%s\": %lu", get_ops_name(op), lat_total[op] / lat_samples[op]);
|
||||
fprintf(fp, "\"%s\": %" PRIu64, get_ops_name(op), lat_total[op] / lat_samples[op]);
|
||||
}
|
||||
} else {
|
||||
printf("%" STR(STATS_FIELD_WIDTH) "s ", "N/A");
|
||||
|
@ -2577,14 +2588,14 @@ void print_report(mako_args_t* args,
|
|||
if (lat_max[op] == 0) {
|
||||
printf("%" STR(STATS_FIELD_WIDTH) "s ", "N/A");
|
||||
} else {
|
||||
printf("%" STR(STATS_FIELD_WIDTH) "lu ", lat_max[op]);
|
||||
printf("%" STR(STATS_FIELD_WIDTH) PRIu64 " ", lat_max[op]);
|
||||
if (fp) {
|
||||
if (first_op) {
|
||||
first_op = 0;
|
||||
} else {
|
||||
fprintf(fp, ",");
|
||||
}
|
||||
fprintf(fp, "\"%s\": %lu", get_ops_name(op), lat_max[op]);
|
||||
fprintf(fp, "\"%s\": %" PRIu64, get_ops_name(op), lat_max[op]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -2635,14 +2646,14 @@ void print_report(mako_args_t* args,
|
|||
} else {
|
||||
median = (dataPoints[op][num_points[op] / 2] + dataPoints[op][num_points[op] / 2 - 1]) >> 1;
|
||||
}
|
||||
printf("%" STR(STATS_FIELD_WIDTH) "lu ", median);
|
||||
printf("%" STR(STATS_FIELD_WIDTH) PRIu64 " ", median);
|
||||
if (fp) {
|
||||
if (first_op) {
|
||||
first_op = 0;
|
||||
} else {
|
||||
fprintf(fp, ",");
|
||||
}
|
||||
fprintf(fp, "\"%s\": %lu", get_ops_name(op), median);
|
||||
fprintf(fp, "\"%s\": %" PRIu64, get_ops_name(op), median);
|
||||
}
|
||||
} else {
|
||||
printf("%" STR(STATS_FIELD_WIDTH) "s ", "N/A");
|
||||
|
@ -2665,14 +2676,14 @@ void print_report(mako_args_t* args,
|
|||
}
|
||||
if (lat_total[op]) {
|
||||
point_95pct = ((float)(num_points[op]) * 0.95) - 1;
|
||||
printf("%" STR(STATS_FIELD_WIDTH) "lu ", dataPoints[op][point_95pct]);
|
||||
printf("%" STR(STATS_FIELD_WIDTH) PRIu64 " ", dataPoints[op][point_95pct]);
|
||||
if (fp) {
|
||||
if (first_op) {
|
||||
first_op = 0;
|
||||
} else {
|
||||
fprintf(fp, ",");
|
||||
}
|
||||
fprintf(fp, "\"%s\": %lu", get_ops_name(op), dataPoints[op][point_95pct]);
|
||||
fprintf(fp, "\"%s\": %" PRIu64, get_ops_name(op), dataPoints[op][point_95pct]);
|
||||
}
|
||||
} else {
|
||||
printf("%" STR(STATS_FIELD_WIDTH) "s ", "N/A");
|
||||
|
@ -2695,14 +2706,14 @@ void print_report(mako_args_t* args,
|
|||
}
|
||||
if (lat_total[op]) {
|
||||
point_99pct = ((float)(num_points[op]) * 0.99) - 1;
|
||||
printf("%" STR(STATS_FIELD_WIDTH) "lu ", dataPoints[op][point_99pct]);
|
||||
printf("%" STR(STATS_FIELD_WIDTH) PRIu64 " ", dataPoints[op][point_99pct]);
|
||||
if (fp) {
|
||||
if (first_op) {
|
||||
first_op = 0;
|
||||
} else {
|
||||
fprintf(fp, ",");
|
||||
}
|
||||
fprintf(fp, "\"%s\": %lu", get_ops_name(op), dataPoints[op][point_99pct]);
|
||||
fprintf(fp, "\"%s\": %" PRIu64, get_ops_name(op), dataPoints[op][point_99pct]);
|
||||
}
|
||||
} else {
|
||||
printf("%" STR(STATS_FIELD_WIDTH) "s ", "N/A");
|
||||
|
@ -2725,14 +2736,14 @@ void print_report(mako_args_t* args,
|
|||
}
|
||||
if (lat_total[op]) {
|
||||
point_99_9pct = ((float)(num_points[op]) * 0.999) - 1;
|
||||
printf("%" STR(STATS_FIELD_WIDTH) "lu ", dataPoints[op][point_99_9pct]);
|
||||
printf("%" STR(STATS_FIELD_WIDTH) PRIu64 " ", dataPoints[op][point_99_9pct]);
|
||||
if (fp) {
|
||||
if (first_op) {
|
||||
first_op = 0;
|
||||
} else {
|
||||
fprintf(fp, ",");
|
||||
}
|
||||
fprintf(fp, "\"%s\": %lu", get_ops_name(op), dataPoints[op][point_99_9pct]);
|
||||
fprintf(fp, "\"%s\": %" PRIu64, get_ops_name(op), dataPoints[op][point_99_9pct]);
|
||||
}
|
||||
} else {
|
||||
printf("%" STR(STATS_FIELD_WIDTH) "s ", "N/A");
|
||||
|
|
|
@ -67,25 +67,25 @@ void runTests(struct ResultSet* rs) {
|
|||
fdb_transaction_set(tr, keys[i], KEY_SIZE, valueStr, VALUE_SIZE);
|
||||
e = getSize(rs, tr, sizes + i);
|
||||
checkError(e, "transaction get size", rs);
|
||||
printf("size %d: %ld\n", i, sizes[i]);
|
||||
printf("size %d: %" PRId64 "\n", i, sizes[i]);
|
||||
i++;
|
||||
|
||||
fdb_transaction_set(tr, keys[i], KEY_SIZE, valueStr, VALUE_SIZE);
|
||||
e = getSize(rs, tr, sizes + i);
|
||||
checkError(e, "transaction get size", rs);
|
||||
printf("size %d: %ld\n", i, sizes[i]);
|
||||
printf("size %d: %" PRId64 "\n", i, sizes[i]);
|
||||
i++;
|
||||
|
||||
fdb_transaction_clear(tr, keys[i], KEY_SIZE);
|
||||
e = getSize(rs, tr, sizes + i);
|
||||
checkError(e, "transaction get size", rs);
|
||||
printf("size %d: %ld\n", i, sizes[i]);
|
||||
printf("size %d: %" PRId64 "\n", i, sizes[i]);
|
||||
i++;
|
||||
|
||||
fdb_transaction_clear_range(tr, keys[i], KEY_SIZE, keys[i + 1], KEY_SIZE);
|
||||
e = getSize(rs, tr, sizes + i);
|
||||
checkError(e, "transaction get size", rs);
|
||||
printf("size %d: %ld\n", i, sizes[i]);
|
||||
printf("size %d: %" PRId64 "\n", i, sizes[i]);
|
||||
i++;
|
||||
|
||||
for (j = 0; j + 1 < i; j++) {
|
||||
|
|
|
@ -18,6 +18,7 @@ set(SRCS
|
|||
|
||||
add_flow_target(STATIC_LIBRARY NAME fdb_flow SRCS ${SRCS})
|
||||
target_link_libraries(fdb_flow PUBLIC fdb_c)
|
||||
target_link_libraries(fdb_flow PUBLIC fdbclient)
|
||||
target_include_directories(fdb_flow PUBLIC
|
||||
"${CMAKE_CURRENT_BINARY_DIR}"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}"
|
||||
|
|
|
@ -154,6 +154,7 @@ endif()
|
|||
set_target_properties(java_workloads PROPERTIES
|
||||
LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/share/foundationdb")
|
||||
target_link_libraries(java_workloads PUBLIC fdb_c ${JNI_LIBRARIES})
|
||||
target_link_libraries(java_workloads PRIVATE flow) # mostly for boost
|
||||
target_include_directories(java_workloads PUBLIC ${JNI_INCLUDE_DIRS})
|
||||
|
||||
set(CMAKE_JAVA_COMPILE_FLAGS "-source" "1.8" "-target" "1.8" "-XDignore.symbol.file")
|
||||
|
@ -228,6 +229,8 @@ if(NOT OPEN_FOR_IDE)
|
|||
else()
|
||||
if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
|
||||
set(lib_destination "linux/aarch64")
|
||||
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "ppc64le")
|
||||
set(lib_destination "linux/ppc64le")
|
||||
else()
|
||||
set(lib_destination "linux/amd64")
|
||||
endif()
|
||||
|
|
|
@ -182,7 +182,7 @@ public class JNIUtil {
|
|||
private static OS getRunningOS() {
|
||||
String osname = System.getProperty("os.name").toLowerCase();
|
||||
String arch = System.getProperty("os.arch");
|
||||
if (!arch.equals("amd64") && !arch.equals("x86_64") && !arch.equals("aarch64")) {
|
||||
if (!arch.equals("amd64") && !arch.equals("x86_64") && !arch.equals("aarch64") && !arch.equals("ppc64le")) {
|
||||
throw new IllegalStateException("Unknown or unsupported arch: " + arch);
|
||||
}
|
||||
if (osname.startsWith("windows")) {
|
||||
|
|
|
@ -219,7 +219,7 @@ else()
|
|||
endif()
|
||||
if(STATIC_LINK_LIBCXX)
|
||||
if (NOT USE_LIBCXX AND NOT APPLE)
|
||||
add_link_options(-static-libstdc++ -static-libgcc)
|
||||
add_link_options(-static-libstdc++ -static-libgcc)
|
||||
endif()
|
||||
endif()
|
||||
# # Instruction sets we require to be supported by the CPU
|
||||
|
@ -309,7 +309,7 @@ else()
|
|||
if (PROFILE_INSTR_GENERATE)
|
||||
message(FATAL_ERROR "Can't set both PROFILE_INSTR_GENERATE and PROFILE_INSTR_USE")
|
||||
endif()
|
||||
add_compile_options(-Wno-error=profile-instr-out-of-date)
|
||||
add_compile_options(-Wno-error=profile-instr-out-of-date -Wno-error=profile-instr-unprofiled)
|
||||
add_compile_options(-fprofile-instr-use=${PROFILE_INSTR_USE})
|
||||
add_link_options(-fprofile-instr-use=${PROFILE_INSTR_USE})
|
||||
endif()
|
||||
|
@ -349,6 +349,9 @@ else()
|
|||
add_compile_options(-march=armv8.2-a+crc+simd)
|
||||
endif()
|
||||
|
||||
if (CMAKE_SYSTEM_PROCESSOR MATCHES "ppc64le")
|
||||
add_compile_options(-m64 -mcpu=power9 -mtune=power9 -DNO_WARN_X86_INTRINSICS)
|
||||
endif()
|
||||
# Check whether we can use dtrace probes
|
||||
include(CheckSymbolExists)
|
||||
check_symbol_exists(DTRACE_PROBE sys/sdt.h SUPPORT_DTRACE)
|
||||
|
|
|
@ -303,7 +303,9 @@ set(CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION
|
|||
"/etc/rc.d/init.d"
|
||||
"/usr/lib/pkgconfig"
|
||||
"/usr/lib/foundationdb"
|
||||
"/usr/lib/cmake")
|
||||
"/usr/lib/cmake"
|
||||
"/usr/lib/foundationdb-${FDB_VERSION}/etc/foundationdb"
|
||||
)
|
||||
set(CPACK_RPM_DEBUGINFO_PACKAGE ${GENERATE_DEBUG_PACKAGES})
|
||||
#set(CPACK_RPM_BUILD_SOURCE_FDB_INSTALL_DIRS_PREFIX /usr/src)
|
||||
set(CPACK_RPM_COMPONENT_INSTALL ON)
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
add_subdirectory(fmt-8.0.1)
|
||||
if(NOT WIN32)
|
||||
add_subdirectory(debug_determinism)
|
||||
add_subdirectory(monitoring)
|
||||
add_subdirectory(TraceLogHelper)
|
||||
add_subdirectory(TestHarness)
|
||||
|
|
|
@ -0,0 +1,5 @@
|
|||
add_library(debug_determinism STATIC debug_determinism.cpp)
|
||||
|
||||
# So that we can link to libfdb_c.so. Not strictly necessary but convenient for use with our
|
||||
# TRACE_PC_GUARD_INSTRUMENTATION_LIB cmake option
|
||||
target_compile_options(debug_determinism PRIVATE -fPIC)
|
|
@ -0,0 +1,45 @@
|
|||
Utilities for debugging unseed mismatches for foundationdb simulation tests.
|
||||
|
||||
99/100 times the source of the nondeterminism is use of uninitialized memory and
|
||||
what you want to do is build with `-DUSE_VALGRIND=ON` and run simulations under
|
||||
valgrind.
|
||||
|
||||
Common sources of nondeterminism and specialized tools to find them.
|
||||
1. Use of uninitialized memory (use valgrind!)
|
||||
1. Memory errors (use valgrind and/or asan)
|
||||
1. Undefined behavior (use ubsan. You can also try _GLIBCXX_DEBUG)
|
||||
|
||||
If it's not any of these then now it's time to try this technique. Look for
|
||||
|
||||
1. Call to some kind of "get current time" function that's not in `INetwork`
|
||||
1. Depending on the relative ordering of allocated memory. E.g. Using heap-allocated pointers as keys in a `std::map`.
|
||||
1. Inspecting something about the current state of the system (e.g. free disk space)
|
||||
1. Depending on iteration order of an unordered map
|
||||
|
||||
# Quickstart
|
||||
|
||||
Set these cmake flags
|
||||
|
||||
```
|
||||
-DTRACE_PC_GUARD_INSTRUMENTATION_LIB=$BUILDDIR/lib/libdebug_determinism.a
|
||||
```
|
||||
|
||||
and change `#define DEBUG_DETERMINISM 0` to `#define DEBUG_DETERMINISM 1` in
|
||||
flow/Platform.h. This disables several known sources of nondeterminism that
|
||||
don't affect unseeds.
|
||||
|
||||
For reasons I don't fully understand, it appears that sqlite exhibits some
|
||||
nondeterminism if you don't add `#define SQLITE_OMIT_LOOKASIDE` to the top of
|
||||
fdbserver/sqlite/sqlite3.amalgamation.c, so you probably want to do that too.
|
||||
|
||||
Now when you run an fdbserver simulation, it will write a file `out.bin` in the
|
||||
current directory which contains the sequence of edges in the control flow graph
|
||||
that were encountered during the simulation. If you rename `out.bin` to `in.bin`
|
||||
and then re-run, the simulation will validate that the sequence of edges is the
|
||||
same as the last run. If it's not, then the simulation will enter an infinite
|
||||
loop at the first difference and print a message. Then you probably want to
|
||||
attach gdb to the process and investigate from there.
|
||||
|
||||
You'll need to make sure you delete the `simfdb` folder before each run, because
|
||||
otherwise you'll take a different codepath for deleting the `simfdb` folder at
|
||||
the beginning of simulation.
|
|
@ -0,0 +1,52 @@
|
|||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
|
||||
namespace {
|
||||
FILE* out = nullptr;
|
||||
FILE* in = nullptr;
|
||||
void loop_forever() {
|
||||
// Try to convince the optimizer not to optimize away this loop
|
||||
static volatile uint64_t x = 0;
|
||||
for (;;) {
|
||||
++x;
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
// This callback is inserted by the compiler as a module constructor
|
||||
// into every DSO. 'start' and 'stop' correspond to the
|
||||
// beginning and end of the section with the guards for the entire
|
||||
// binary (executable or DSO). The callback will be called at least
|
||||
// once per DSO and may be called multiple times with the same parameters.
|
||||
extern "C" void __sanitizer_cov_trace_pc_guard_init(uint32_t* start, uint32_t* stop) {
|
||||
in = fopen("in.bin", "r");
|
||||
out = fopen("out.bin", "w");
|
||||
static uint64_t N; // Counter for the guards.
|
||||
if (start == stop || *start)
|
||||
return; // Initialize only once.
|
||||
for (uint32_t* x = start; x < stop; x++) {
|
||||
*x = ++N; // Guards should start from 1.
|
||||
}
|
||||
}
|
||||
|
||||
// This callback is inserted by the compiler on every edge in the
|
||||
// control flow (some optimizations apply).
|
||||
// Typically, the compiler will emit the code like this:
|
||||
// if(*guard)
|
||||
// __sanitizer_cov_trace_pc_guard(guard);
|
||||
// But for large functions it will emit a simple call:
|
||||
// __sanitizer_cov_trace_pc_guard(guard);
|
||||
extern "C" void __sanitizer_cov_trace_pc_guard(uint32_t* guard) {
|
||||
if (!guard) {
|
||||
return;
|
||||
}
|
||||
fwrite(guard, 1, sizeof(*guard), out);
|
||||
if (in) {
|
||||
uint32_t theirs;
|
||||
fread(&theirs, 1, sizeof(theirs), in);
|
||||
if (*guard != theirs) {
|
||||
printf("Non-determinism detected\n");
|
||||
loop_forever();
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,37 @@
|
|||
#!/bin/bash
|
||||
if [ $# -eq 0 ] || [ $# -gt 2 ]
|
||||
then
|
||||
echo "Usage: generate_profile.sh Path_Of_Foundation_Build_Directory Storage_Engine"
|
||||
exit 1
|
||||
fi
|
||||
fdbdir=$1
|
||||
storage_engine='ssd'
|
||||
if [ $# -eq 2 ]
|
||||
then
|
||||
storage_engine=$2
|
||||
fi
|
||||
|
||||
export LD_LIBRARY_PATH=$fdbdir/lib:$LD_LIBRARY_PATH
|
||||
export FDB_CLUSTER_FILE=$fdbdir/fdb.cluster
|
||||
export LLVM_PROFILE_FILE=$fdbdir/sandbox/fdb-%p.profraw
|
||||
$fdbdir/bin/fdbmonitor --conffile $fdbdir/sandbox/foundationdb.conf --lockfile $fdbdir/sandbox/fdbmonitor.pid &
|
||||
# This profile will be ignored
|
||||
export LLVM_PROFILE_FILE=$fdbdir/sandbox/cli-%m.profraw
|
||||
$fdbdir/bin/fdbcli -C $fdbdir/fdb.cluster --exec "configure new $storage_engine single"
|
||||
export LLVM_PROFILE_FILE=$fdbdir/sandbox/mako-build-%m.profraw
|
||||
$fdbdir/bin/mako -p 64 -t 1 --keylen 32 --vallen 16 --mode build --rows 10000 --trace --trace_format json
|
||||
export LLVM_PROFILE_FILE=$fdbdir/sandbox/mako-run-%m.profraw
|
||||
$fdbdir/bin/mako -p 1 -t 2 --keylen 32 --vallen 16 --mode run --rows 10000 --transaction grvg7i2gr1:48cr1:48 --seconds 60 --trace $fdbdir/sandbox/logs --trace_format json
|
||||
|
||||
# Shutdown fdbserver to trigger profile dumping
|
||||
fdbmonitor_pid=$(cat $fdbdir/sandbox/fdbmonitor.pid)
|
||||
fdbserver_pid=$(cat /proc/$fdbmonitor_pid/task/$fdbmonitor_pid/children)
|
||||
gdb --batch --eval-command 'call (void)exit(0)' --pid $fdbserver_pid
|
||||
|
||||
# Clean up
|
||||
kill -9 $fdbmonitor_pid
|
||||
|
||||
# Profile for server
|
||||
llvm-profdata merge -output=$fdbdir/fdb.profdata $fdbdir/sandbox/fdb-*.profraw
|
||||
# Profile for client
|
||||
llvm-profdata merge -output=$fdbdir/mako.profdata $fdbdir/sandbox/mako-*.profraw
|
|
@ -147,7 +147,7 @@ def centos_image_with_fdb_helper(versioned: bool) -> Iterator[Optional[Image]]:
|
|||
container = None
|
||||
image = None
|
||||
try:
|
||||
container = Container("centos", initd=True)
|
||||
container = Container("centos:7", initd=True)
|
||||
for rpm in rpms:
|
||||
container.copy_to(rpm, "/opt")
|
||||
container.run(["bash", "-c", "yum update -y"])
|
||||
|
@ -237,10 +237,6 @@ def test_write(linux_container: Container, snapshot):
|
|||
assert snapshot == linux_container.run(["fdbcli", "--exec", "get x"])
|
||||
|
||||
|
||||
def test_fdbcli_help_text(linux_container: Container, snapshot):
|
||||
assert snapshot == linux_container.run(["fdbcli", "--help"])
|
||||
|
||||
|
||||
def test_execstack_permissions_libfdb_c(linux_container: Container, snapshot):
|
||||
linux_container.run(["ldconfig"])
|
||||
assert snapshot == linux_container.run(
|
||||
|
|
|
@ -2,8 +2,16 @@
|
|||
Release Notes
|
||||
#############
|
||||
|
||||
6.3.24
|
||||
======
|
||||
* Fixed a bug where get key location can overload proxies. `(PR #6453) <https://github.com/apple/foundationdb/pull/6453>`_
|
||||
* Added a mechanism that can reduce the number of empty peek reply by not always returning empty peek reply immediately. `(PR #6413) <https://github.com/apple/foundationdb/pull/6413>`_
|
||||
* Enable TLS support for Windows. `(PR #6193) <https://github.com/apple/foundationdb/pull/6193>`_
|
||||
* Fixed a bug where a shard gets merged too soon. `(PR #6115) <https://github.com/apple/foundationdb/pull/6115>`_
|
||||
|
||||
6.3.23
|
||||
======
|
||||
* Add AWS v4 header support for backup. `(PR #6025) <https://github.com/apple/foundationdb/pull/6025>`_
|
||||
* Fixed a bug that remoteDCIsHealthy logic is not guarded by CC_ENABLE_WORKER_HEALTH_MONITOR, which may prevent HA failback. `(PR #6106) <https://github.com/apple/foundationdb/pull/6106>`_
|
||||
* Fixed a race condition with updating the coordinated state and updating the master registration. `(PR #6088) <https://github.com/apple/foundationdb/pull/6088>`_
|
||||
* Changed dbinfo broadcast to be explicitly requested by the worker registration message. `(PR #6073) <https://github.com/apple/foundationdb/pull/6073>`_
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "contrib/fmt-8.0.1/include/fmt/format.h"
|
||||
#include "flow/flow.h"
|
||||
#include "flow/Platform.h"
|
||||
#include "flow/DeterministicRandom.h"
|
||||
|
@ -413,7 +414,7 @@ ACTOR Future<Void> logThroughput(int64_t* v, Key* next) {
|
|||
loop {
|
||||
state int64_t last = *v;
|
||||
wait(delay(1));
|
||||
printf("throughput: %ld bytes/s, next: %s\n", *v - last, printable(*next).c_str());
|
||||
fmt::print("throughput: {} bytes/s, next: {}\n", *v - last, printable(*next).c_str());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1690,7 +1690,7 @@ ACTOR Future<Void> cleanupStatus(Reference<ReadYourWritesTransaction> tr,
|
|||
readMore = true;
|
||||
} catch (Error& e) {
|
||||
// If doc can't be parsed or isn't alive, delete it.
|
||||
TraceEvent(SevWarn, "RemovedDeadBackupLayerStatus").detail("Key", docs[i].key).error(e, true);
|
||||
TraceEvent(SevWarn, "RemovedDeadBackupLayerStatus").errorUnsuppressed(e).detail("Key", docs[i].key);
|
||||
tr->clear(docs[i].key);
|
||||
// If limit is 1 then read more.
|
||||
if (limit == 1)
|
||||
|
@ -2754,7 +2754,7 @@ ACTOR Future<Void> queryBackup(const char* name,
|
|||
reportBackupQueryError(operationId,
|
||||
result,
|
||||
errorMessage =
|
||||
format("the specified restorable version %ld is not valid", restoreVersion));
|
||||
format("the specified restorable version %lld is not valid", restoreVersion));
|
||||
return Void();
|
||||
}
|
||||
Optional<RestorableFileSet> fileSet = wait(bc->getRestoreSet(restoreVersion, keyRangesFilter));
|
||||
|
@ -3081,7 +3081,7 @@ static void addKeyRange(std::string optionValue, Standalone<VectorRef<KeyRangeRe
|
|||
|
||||
// Too many keys
|
||||
default:
|
||||
fprintf(stderr, "ERROR: Invalid key range identified with %ld keys", tokens.size());
|
||||
fmt::print(stderr, "ERROR: Invalid key range identified with {} keys", tokens.size());
|
||||
throw invalid_option_value();
|
||||
break;
|
||||
}
|
||||
|
@ -3887,9 +3887,9 @@ int main(int argc, char* argv[]) {
|
|||
} else {
|
||||
fprintf(stderr, "ERROR: Failed to set knob option '%s': %s\n", knobName.c_str(), e.what());
|
||||
TraceEvent(SevError, "FailedToSetKnob")
|
||||
.error(e)
|
||||
.detail("Knob", printable(knobName))
|
||||
.detail("Value", printable(knobValueString))
|
||||
.error(e);
|
||||
.detail("Value", printable(knobValueString));
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
*/
|
||||
|
||||
#include "boost/lexical_cast.hpp"
|
||||
|
||||
#include "contrib/fmt-8.0.1/include/fmt/format.h"
|
||||
#include "fdbcli/fdbcli.actor.h"
|
||||
|
||||
#include "fdbclient/IClientApi.h"
|
||||
|
@ -40,7 +40,7 @@ ACTOR Future<bool> advanceVersionCommandActor(Reference<IDatabase> db, std::vect
|
|||
} else {
|
||||
state Version v;
|
||||
int n = 0;
|
||||
if (sscanf(tokens[1].toString().c_str(), "%ld%n", &v, &n) != 1 || n != tokens[1].size()) {
|
||||
if (sscanf(tokens[1].toString().c_str(), "%" PRId64 "%n", &v, &n) != 1 || n != tokens[1].size()) {
|
||||
printUsage(tokens[0]);
|
||||
return false;
|
||||
} else {
|
||||
|
@ -53,7 +53,7 @@ ACTOR Future<bool> advanceVersionCommandActor(Reference<IDatabase> db, std::vect
|
|||
tr->set(advanceVersionSpecialKey, boost::lexical_cast<std::string>(v));
|
||||
wait(safeThreadFutureToFuture(tr->commit()));
|
||||
} else {
|
||||
printf("Current read version is %ld\n", rv);
|
||||
fmt::print("Current read version is {}\n", rv);
|
||||
return true;
|
||||
}
|
||||
} catch (Error& e) {
|
||||
|
|
|
@ -115,7 +115,7 @@ ACTOR Future<bool> changeFeedCommandActor(Database localDb, std::vector<StringRe
|
|||
Version end = std::numeric_limits<Version>::max();
|
||||
if (tokens.size() > 3) {
|
||||
int n = 0;
|
||||
if (sscanf(tokens[3].toString().c_str(), "%ld%n", &begin, &n) != 1 || n != tokens[3].size()) {
|
||||
if (sscanf(tokens[3].toString().c_str(), "%" PRId64 "%n", &begin, &n) != 1 || n != tokens[3].size()) {
|
||||
printUsage(tokens[0]);
|
||||
return false;
|
||||
}
|
||||
|
@ -168,7 +168,7 @@ ACTOR Future<bool> changeFeedCommandActor(Database localDb, std::vector<StringRe
|
|||
}
|
||||
Version v;
|
||||
int n = 0;
|
||||
if (sscanf(tokens[3].toString().c_str(), "%ld%n", &v, &n) != 1 || n != tokens[3].size()) {
|
||||
if (sscanf(tokens[3].toString().c_str(), "%" PRId64 "%n", &v, &n) != 1 || n != tokens[3].size()) {
|
||||
printUsage(tokens[0]);
|
||||
return false;
|
||||
} else {
|
||||
|
|
|
@ -176,7 +176,7 @@ ACTOR Future<bool> configureCommandActor(Reference<IDatabase> db,
|
|||
case ConfigurationResult::STORAGE_MIGRATION_DISABLED:
|
||||
fprintf(stderr,
|
||||
"ERROR: Storage engine type cannot be changed because "
|
||||
"storage_migration_mode=disabled.\n");
|
||||
"storage_migration_type=disabled.\n");
|
||||
fprintf(stderr,
|
||||
"Type `configure perpetual_storage_wiggle=1 storage_migration_type=gradual' to enable gradual "
|
||||
"migration with the perpetual wiggle, or `configure "
|
||||
|
|
|
@ -65,13 +65,14 @@ ACTOR Future<bool> changeCoordinators(Reference<IDatabase> db, std::vector<Strin
|
|||
state StringRef new_cluster_description;
|
||||
state std::string auto_coordinators_str;
|
||||
StringRef nameTokenBegin = LiteralStringRef("description=");
|
||||
for (auto tok = tokens.begin() + 1; tok != tokens.end(); ++tok)
|
||||
for (auto tok = tokens.begin() + 1; tok != tokens.end(); ++tok) {
|
||||
if (tok->startsWith(nameTokenBegin)) {
|
||||
new_cluster_description = tok->substr(nameTokenBegin.size());
|
||||
std::copy(tok + 1, tokens.end(), tok);
|
||||
tokens.resize(tokens.size() - 1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
state bool automatic = tokens.size() == 2 && tokens[1] == LiteralStringRef("auto");
|
||||
state Reference<ITransaction> tr = db->createTransaction();
|
||||
|
@ -96,17 +97,32 @@ ACTOR Future<bool> changeCoordinators(Reference<IDatabase> db, std::vector<Strin
|
|||
tr->set(fdb_cli::coordinatorsProcessSpecialKey, auto_coordinators_str);
|
||||
} else if (tokens.size() > 1) {
|
||||
state std::set<NetworkAddress> new_coordinators_addresses;
|
||||
state std::vector<std::string> newAddresslist;
|
||||
state std::set<Hostname> new_coordinators_hostnames;
|
||||
state std::vector<std::string> newCoordinatorslist;
|
||||
state std::vector<StringRef>::iterator t;
|
||||
for (t = tokens.begin() + 1; t != tokens.end(); ++t) {
|
||||
try {
|
||||
auto const& addr = NetworkAddress::parse(t->toString());
|
||||
if (new_coordinators_addresses.count(addr)) {
|
||||
fprintf(stderr, "ERROR: passed redundant coordinators: `%s'\n", addr.toString().c_str());
|
||||
return true;
|
||||
if (Hostname::isHostname(t->toString())) {
|
||||
// We do not resolve hostnames here. We commit them as is.
|
||||
const auto& hostname = Hostname::parse(t->toString());
|
||||
if (new_coordinators_hostnames.count(hostname)) {
|
||||
fprintf(stderr,
|
||||
"ERROR: passed redundant coordinators: `%s'\n",
|
||||
hostname.toString().c_str());
|
||||
return true;
|
||||
}
|
||||
new_coordinators_hostnames.insert(hostname);
|
||||
newCoordinatorslist.push_back(hostname.toString());
|
||||
} else {
|
||||
const auto& addr = NetworkAddress::parse(t->toString());
|
||||
if (new_coordinators_addresses.count(addr)) {
|
||||
fprintf(
|
||||
stderr, "ERROR: passed redundant coordinators: `%s'\n", addr.toString().c_str());
|
||||
return true;
|
||||
}
|
||||
new_coordinators_addresses.insert(addr);
|
||||
newCoordinatorslist.push_back(addr.toString());
|
||||
}
|
||||
new_coordinators_addresses.insert(addr);
|
||||
newAddresslist.push_back(addr.toString());
|
||||
} catch (Error& e) {
|
||||
if (e.code() == error_code_connection_string_invalid) {
|
||||
fprintf(
|
||||
|
@ -116,12 +132,12 @@ ACTOR Future<bool> changeCoordinators(Reference<IDatabase> db, std::vector<Strin
|
|||
throw;
|
||||
}
|
||||
}
|
||||
std::string new_addresses_str = boost::algorithm::join(newAddresslist, ", ");
|
||||
tr->set(fdb_cli::coordinatorsProcessSpecialKey, new_addresses_str);
|
||||
std::string new_coordinators_str = boost::algorithm::join(newCoordinatorslist, ", ");
|
||||
tr->set(fdb_cli::coordinatorsProcessSpecialKey, new_coordinators_str);
|
||||
}
|
||||
wait(safeThreadFutureToFuture(tr->commit()));
|
||||
// commit should always fail here
|
||||
// if coordinators are changed, we should get commit_unknown() error
|
||||
// If the commit succeeds, the coordinators change and the commit will fail with commit_unknown_result().
|
||||
ASSERT(false);
|
||||
} catch (Error& e) {
|
||||
state Error err(e);
|
||||
|
|
|
@ -59,7 +59,7 @@ ACTOR Future<Void> includeLocalities(Reference<IDatabase> db,
|
|||
wait(safeThreadFutureToFuture(tr->commit()));
|
||||
return Void();
|
||||
} catch (Error& e) {
|
||||
TraceEvent("IncludeLocalitiesError").error(e, true);
|
||||
TraceEvent("IncludeLocalitiesError").errorUnsuppressed(e);
|
||||
wait(safeThreadFutureToFuture(tr->onError(e)));
|
||||
}
|
||||
}
|
||||
|
@ -99,7 +99,7 @@ ACTOR Future<Void> includeServers(Reference<IDatabase> db, std::vector<AddressEx
|
|||
wait(safeThreadFutureToFuture(tr->commit()));
|
||||
return Void();
|
||||
} catch (Error& e) {
|
||||
TraceEvent("IncludeServersError").error(e, true);
|
||||
TraceEvent("IncludeServersError").errorUnsuppressed(e);
|
||||
wait(safeThreadFutureToFuture(tr->onError(e)));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -705,12 +705,12 @@ void printStatus(StatusObjectReader statusObj,
|
|||
}
|
||||
}
|
||||
outputString += format(
|
||||
" %s log epoch: %ld begin: %ld end: %s, missing "
|
||||
" %s log epoch: %lld begin: %lld end: %s, missing "
|
||||
"log interfaces(id,address): %s\n",
|
||||
current ? "Current" : "Old",
|
||||
epoch,
|
||||
beginVersion,
|
||||
endVersion == invalidVersion ? "(unknown)" : format("%ld", endVersion).c_str(),
|
||||
endVersion == invalidVersion ? "(unknown)" : format("%lld", endVersion).c_str(),
|
||||
missing_log_interfaces.c_str());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1014,9 +1014,9 @@ struct CLIOptions {
|
|||
} else {
|
||||
fprintf(stderr, "ERROR: Failed to set knob option '%s': %s\n", knobName.c_str(), e.what());
|
||||
TraceEvent(SevError, "FailedToSetKnob")
|
||||
.error(e)
|
||||
.detail("Knob", printable(knobName))
|
||||
.detail("Value", printable(knobValueString))
|
||||
.error(e);
|
||||
.detail("Value", printable(knobValueString));
|
||||
exit_code = FDB_EXIT_ERROR;
|
||||
}
|
||||
}
|
||||
|
@ -1157,7 +1157,6 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise) {
|
|||
|
||||
state bool writeMode = false;
|
||||
|
||||
state std::string clusterConnectString;
|
||||
state std::map<Key, std::pair<Value, ClientLeaderRegInterface>> address_interface;
|
||||
|
||||
state FdbOptions globalOptions;
|
||||
|
@ -1171,6 +1170,7 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise) {
|
|||
ClusterConnectionFile::lookupClusterFileName(opt.clusterFile);
|
||||
try {
|
||||
ccf = makeReference<ClusterConnectionFile>(resolvedClusterFile.first);
|
||||
wait(ccf->resolveHostnames());
|
||||
} catch (Error& e) {
|
||||
fprintf(stderr, "%s\n", ClusterConnectionFile::getErrorString(resolvedClusterFile, e).c_str());
|
||||
return 1;
|
||||
|
@ -1615,7 +1615,7 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise) {
|
|||
} else {
|
||||
Version v = wait(makeInterruptable(
|
||||
safeThreadFutureToFuture(getTransaction(db, tr, options, intrans)->getReadVersion())));
|
||||
printf("%ld\n", v);
|
||||
fmt::print("{}\n", v);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
|
|
@ -28,6 +28,7 @@
|
|||
|
||||
#include "fdbclient/CoordinationInterface.h"
|
||||
|
||||
// Determine public IP address by calling the first coordinator.
|
||||
IPAddress determinePublicIPAutomatically(ClusterConnectionString& ccs) {
|
||||
try {
|
||||
using namespace boost::asio;
|
||||
|
@ -35,6 +36,7 @@ IPAddress determinePublicIPAutomatically(ClusterConnectionString& ccs) {
|
|||
io_service ioService;
|
||||
ip::udp::socket socket(ioService);
|
||||
|
||||
ccs.resolveHostnamesBlocking();
|
||||
const auto& coordAddr = ccs.coordinators()[0];
|
||||
const auto boostIp = coordAddr.ip.isV6() ? ip::address(ip::address_v6(coordAddr.ip.toV6()))
|
||||
: ip::address(ip::address_v4(coordAddr.ip.toV4()));
|
||||
|
|
|
@ -305,9 +305,9 @@ Reference<IBackupContainer> IBackupContainer::openContainer(const std::string& u
|
|||
throw;
|
||||
|
||||
TraceEvent m(SevWarn, "BackupContainer");
|
||||
m.error(e);
|
||||
m.detail("Description", "Invalid container specification. See help.");
|
||||
m.detail("URL", url);
|
||||
m.error(e);
|
||||
if (e.code() == error_code_backup_invalid_url)
|
||||
m.detail("LastOpenError", lastOpenError);
|
||||
|
||||
|
@ -360,10 +360,9 @@ ACTOR Future<std::vector<std::string>> listContainers_impl(std::string baseURL)
|
|||
throw;
|
||||
|
||||
TraceEvent m(SevWarn, "BackupContainer");
|
||||
|
||||
m.error(e);
|
||||
m.detail("Description", "Invalid backup container URL prefix. See help.");
|
||||
m.detail("URL", baseURL);
|
||||
m.error(e);
|
||||
if (e.code() == error_code_backup_invalid_url)
|
||||
m.detail("LastOpenError", IBackupContainer::lastOpenError);
|
||||
|
||||
|
|
|
@ -1149,8 +1149,8 @@ public:
|
|||
keyFile = _keyFile;
|
||||
} catch (Error& e) {
|
||||
TraceEvent(SevWarnAlways, "FailedToOpenEncryptionKeyFile")
|
||||
.detail("FileName", encryptionKeyFileName)
|
||||
.error(e);
|
||||
.error(e)
|
||||
.detail("FileName", encryptionKeyFileName);
|
||||
throw e;
|
||||
}
|
||||
int bytesRead = wait(keyFile->read(cipherKey->data(), cipherKey->size(), 0));
|
||||
|
@ -1377,8 +1377,8 @@ ACTOR static Future<KeyRange> getSnapshotFileKeyRange_impl(Reference<BackupConta
|
|||
e.code() == error_code_timed_out || e.code() == error_code_lookup_failed) {
|
||||
// blob http request failure, retry
|
||||
TraceEvent(SevWarnAlways, "BackupContainerGetSnapshotFileKeyRangeConnectionFailure")
|
||||
.detail("Retries", ++readFileRetries)
|
||||
.error(e);
|
||||
.error(e)
|
||||
.detail("Retries", ++readFileRetries);
|
||||
wait(delayJittered(0.1));
|
||||
} else {
|
||||
TraceEvent(SevError, "BackupContainerGetSnapshotFileKeyRangeUnexpectedError").error(e);
|
||||
|
@ -1549,9 +1549,9 @@ Reference<BackupContainerFileSystem> BackupContainerFileSystem::openContainerFS(
|
|||
throw;
|
||||
|
||||
TraceEvent m(SevWarn, "BackupContainer");
|
||||
m.error(e);
|
||||
m.detail("Description", "Invalid container specification. See help.");
|
||||
m.detail("URL", url);
|
||||
m.error(e);
|
||||
if (e.code() == error_code_backup_invalid_url)
|
||||
m.detail("LastOpenError", lastOpenError);
|
||||
|
||||
|
|
|
@ -86,6 +86,8 @@ void ClientKnobs::initialize(Randomize randomize) {
|
|||
|
||||
init( LOCATION_CACHE_EVICTION_SIZE, 600000 );
|
||||
init( LOCATION_CACHE_EVICTION_SIZE_SIM, 10 ); if( randomize && BUGGIFY ) LOCATION_CACHE_EVICTION_SIZE_SIM = 3;
|
||||
init( LOCATION_CACHE_ENDPOINT_FAILURE_GRACE_PERIOD, 60 );
|
||||
init( LOCATION_CACHE_FAILED_ENDPOINT_RETRY_INTERVAL, 60 );
|
||||
|
||||
init( GET_RANGE_SHARD_LIMIT, 2 );
|
||||
init( WARM_RANGE_SHARD_LIMIT, 100 );
|
||||
|
|
|
@ -86,6 +86,8 @@ public:
|
|||
// When locationCache in DatabaseContext gets to be this size, items will be evicted
|
||||
int LOCATION_CACHE_EVICTION_SIZE;
|
||||
int LOCATION_CACHE_EVICTION_SIZE_SIM;
|
||||
double LOCATION_CACHE_ENDPOINT_FAILURE_GRACE_PERIOD;
|
||||
double LOCATION_CACHE_FAILED_ENDPOINT_RETRY_INTERVAL;
|
||||
|
||||
int GET_RANGE_SHARD_LIMIT;
|
||||
int WARM_RANGE_SHARD_LIMIT;
|
||||
|
|
|
@ -27,7 +27,7 @@ ConfigKey ConfigKeyRef::decodeKey(KeyRef const& key) {
|
|||
try {
|
||||
tuple = Tuple::unpack(key);
|
||||
} catch (Error& e) {
|
||||
TraceEvent(SevWarnAlways, "FailedToUnpackConfigKey").detail("Key", printable(key)).error(e);
|
||||
TraceEvent(SevWarnAlways, "FailedToUnpackConfigKey").error(e).detail("Key", printable(key));
|
||||
throw invalid_config_db_key();
|
||||
}
|
||||
if (tuple.size() != 2) {
|
||||
|
@ -96,7 +96,7 @@ public:
|
|||
|
||||
struct ToStringFunc {
|
||||
std::string operator()(int v) const { return format("int:%d", v); }
|
||||
std::string operator()(int64_t v) const { return format("int64_t:%ld", v); }
|
||||
std::string operator()(int64_t v) const { return format("int64_t:%lld", v); }
|
||||
std::string operator()(bool v) const { return format("bool:%d", v); }
|
||||
std::string operator()(ValueRef v) const { return "string:" + v.toString(); }
|
||||
std::string operator()(double v) const { return format("double:%lf", v); }
|
||||
|
|
|
@ -58,13 +58,28 @@ struct ClientLeaderRegInterface {
|
|||
// - There is no address present more than once
|
||||
class ClusterConnectionString {
|
||||
public:
|
||||
enum ConnectionStringStatus { RESOLVED, RESOLVING, UNRESOLVED };
|
||||
|
||||
ClusterConnectionString() {}
|
||||
ClusterConnectionString(const std::string& connStr);
|
||||
ClusterConnectionString(const std::vector<NetworkAddress>& coordinators, Key key);
|
||||
ClusterConnectionString(const std::vector<Hostname>& hosts, Key key);
|
||||
|
||||
ClusterConnectionString(const ClusterConnectionString& rhs) { operator=(rhs); }
|
||||
ClusterConnectionString& operator=(const ClusterConnectionString& rhs) {
|
||||
// Copy everything except AsyncTrigger resolveFinish.
|
||||
status = rhs.status;
|
||||
coords = rhs.coords;
|
||||
hostnames = rhs.hostnames;
|
||||
networkAddressToHostname = rhs.networkAddressToHostname;
|
||||
key = rhs.key;
|
||||
keyDesc = rhs.keyDesc;
|
||||
connectionString = rhs.connectionString;
|
||||
return *this;
|
||||
}
|
||||
|
||||
std::vector<NetworkAddress> const& coordinators() const { return coords; }
|
||||
void addResolved(Hostname hostname, NetworkAddress address) {
|
||||
void addResolved(const Hostname& hostname, const NetworkAddress& address) {
|
||||
coords.push_back(address);
|
||||
networkAddressToHostname.emplace(address, hostname);
|
||||
}
|
||||
|
@ -78,16 +93,20 @@ public:
|
|||
// This one should only be used when resolving asynchronously is impossible. For all other cases, resolveHostnames()
|
||||
// should be preferred.
|
||||
void resolveHostnamesBlocking();
|
||||
void resetToUnresolved();
|
||||
// This function derives the member connectionString from the current key, coordinators and hostnames.
|
||||
void resetConnectionString();
|
||||
|
||||
bool hasUnresolvedHostnames = false;
|
||||
void resetToUnresolved();
|
||||
void parseKey(const std::string& key);
|
||||
|
||||
ConnectionStringStatus status = RESOLVED;
|
||||
AsyncTrigger resolveFinish;
|
||||
std::vector<NetworkAddress> coords;
|
||||
std::vector<Hostname> hostnames;
|
||||
std::unordered_map<NetworkAddress, Hostname> networkAddressToHostname;
|
||||
|
||||
private:
|
||||
void parseConnString();
|
||||
void parseKey(const std::string& key);
|
||||
std::unordered_map<NetworkAddress, Hostname> networkAddressToHostname;
|
||||
Key key, keyDesc;
|
||||
std::string connectionString;
|
||||
};
|
||||
|
@ -139,7 +158,7 @@ public:
|
|||
// Signals to the connection record that it was successfully used to connect to a cluster.
|
||||
void notifyConnected();
|
||||
|
||||
bool hasUnresolvedHostnames() const;
|
||||
ClusterConnectionString::ConnectionStringStatus connectionStringStatus() const;
|
||||
Future<Void> resolveHostnames();
|
||||
// This one should only be used when resolving asynchronously is impossible. For all other cases, resolveHostnames()
|
||||
// should be preferred.
|
||||
|
|
|
@ -2142,7 +2142,7 @@ struct StartFullBackupTaskFunc : TaskFuncBase {
|
|||
wait(tr->commit());
|
||||
break;
|
||||
} catch (Error& e) {
|
||||
TraceEvent("SetDestUidOrBeginVersionError").error(e, true);
|
||||
TraceEvent("SetDestUidOrBeginVersionError").errorUnsuppressed(e);
|
||||
wait(tr->onError(e));
|
||||
}
|
||||
}
|
||||
|
@ -2907,7 +2907,7 @@ public:
|
|||
TraceEvent("DBA_Abort").detail("CommitVersion", tr->getCommittedVersion());
|
||||
break;
|
||||
} catch (Error& e) {
|
||||
TraceEvent("DBA_AbortError").error(e, true);
|
||||
TraceEvent("DBA_AbortError").errorUnsuppressed(e);
|
||||
wait(tr->onError(e));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -198,6 +198,11 @@ struct ChangeFeedData : ReferenceCounted<ChangeFeedData> {
|
|||
ChangeFeedData() : notAtLatest(1) {}
|
||||
};
|
||||
|
||||
struct EndpointFailureInfo {
|
||||
double startTime = 0;
|
||||
double lastRefreshTime = 0;
|
||||
};
|
||||
|
||||
class DatabaseContext : public ReferenceCounted<DatabaseContext>, public FastAllocated<DatabaseContext>, NonCopyable {
|
||||
public:
|
||||
static DatabaseContext* allocateOnForeignThread() {
|
||||
|
@ -241,6 +246,14 @@ public:
|
|||
void invalidateCache(const KeyRef&, Reverse isBackward = Reverse::False);
|
||||
void invalidateCache(const KeyRangeRef&);
|
||||
|
||||
// Records that `endpoint` is failed on a healthy server.
|
||||
void setFailedEndpointOnHealthyServer(const Endpoint& endpoint);
|
||||
|
||||
// Updates `endpoint` refresh time if the `endpoint` is a failed endpoint. If not, this does nothing.
|
||||
void updateFailedEndpointRefreshTime(const Endpoint& endpoint);
|
||||
Optional<EndpointFailureInfo> getEndpointFailureInfo(const Endpoint& endpoint);
|
||||
void clearFailedEndpointOnHealthyServer(const Endpoint& endpoint);
|
||||
|
||||
bool sampleReadTags() const;
|
||||
bool sampleOnCost(uint64_t cost) const;
|
||||
|
||||
|
@ -394,6 +407,7 @@ public:
|
|||
// Cache of location information
|
||||
int locationCacheSize;
|
||||
CoalescedKeyRangeMap<Reference<LocationInfo>> locationCache;
|
||||
std::unordered_map<Endpoint, EndpointFailureInfo> failedEndpointsOnHealthyServersInfo;
|
||||
|
||||
std::map<UID, StorageServerInfo*> server_interf;
|
||||
std::map<UID, BlobWorkerInterface> blobWorker_interf; // blob workers don't change endpoints for the same ID
|
||||
|
|
|
@ -1204,10 +1204,12 @@ struct ReadBlobGranuleContext {
|
|||
struct StorageMetadataType {
|
||||
constexpr static FileIdentifier file_identifier = 732123;
|
||||
// when the SS is initialized
|
||||
uint64_t createdTime; // comes from Platform::timer_int()
|
||||
uint64_t createdTime; // comes from currentTime()
|
||||
StorageMetadataType() : createdTime(0) {}
|
||||
StorageMetadataType(uint64_t t) : createdTime(t) {}
|
||||
|
||||
static uint64_t currentTime() { return g_network->timer() * 1e9; }
|
||||
|
||||
// To change this serialization, ProtocolVersion::StorageMetadata must be updated, and downgrades need
|
||||
// to be considered
|
||||
template <class Ar>
|
||||
|
|
|
@ -87,7 +87,7 @@ std::string secondsToTimeFormat(int64_t seconds) {
|
|||
else if (seconds >= 60)
|
||||
return format("%.2f minute(s)", seconds / 60.0);
|
||||
else
|
||||
return format("%ld second(s)", seconds);
|
||||
return format("%lld second(s)", seconds);
|
||||
}
|
||||
|
||||
const Key FileBackupAgent::keyLastRestorable = LiteralStringRef("last_restorable");
|
||||
|
@ -4407,9 +4407,9 @@ public:
|
|||
break;
|
||||
} catch (Error& e) {
|
||||
TraceEvent(numTries > 50 ? SevError : SevInfo, "FastRestoreToolSubmitRestoreRequestsMayFail")
|
||||
.error(e)
|
||||
.detail("Reason", "DB is not properly locked")
|
||||
.detail("ExpectedLockID", randomUID)
|
||||
.error(e);
|
||||
.detail("ExpectedLockID", randomUID);
|
||||
numTries++;
|
||||
wait(tr->onError(e));
|
||||
}
|
||||
|
@ -4443,8 +4443,8 @@ public:
|
|||
break;
|
||||
} catch (Error& e) {
|
||||
TraceEvent(numTries > 50 ? SevError : SevInfo, "FastRestoreToolSubmitRestoreRequestsRetry")
|
||||
.detail("RestoreIndex", restoreIndex)
|
||||
.error(e);
|
||||
.error(e)
|
||||
.detail("RestoreIndex", restoreIndex);
|
||||
numTries++;
|
||||
wait(tr->onError(e));
|
||||
}
|
||||
|
@ -5183,7 +5183,7 @@ public:
|
|||
else
|
||||
statusText += "The initial snapshot is still running.\n";
|
||||
|
||||
statusText += format("\nDetails:\n LogBytes written - %ld\n RangeBytes written - %ld\n "
|
||||
statusText += format("\nDetails:\n LogBytes written - %lld\n RangeBytes written - %lld\n "
|
||||
"Last complete log version and timestamp - %s, %s\n "
|
||||
"Last complete snapshot version and timestamp - %s, %s\n "
|
||||
"Current Snapshot start version and timestamp - %s, %s\n "
|
||||
|
@ -5800,9 +5800,9 @@ ACTOR static Future<Void> transformDatabaseContents(Database cx,
|
|||
break;
|
||||
} catch (Error& e) {
|
||||
TraceEvent("FastRestoreWorkloadTransformDatabaseContentsGetAllKeys")
|
||||
.error(e)
|
||||
.detail("Index", i)
|
||||
.detail("RestoreRange", restoreRanges[i])
|
||||
.error(e);
|
||||
.detail("RestoreRange", restoreRanges[i]);
|
||||
oldData = Standalone<VectorRef<KeyValueRef>>(); // clear the vector
|
||||
wait(tr.onError(e));
|
||||
}
|
||||
|
|
|
@ -448,6 +448,7 @@ ACTOR Future<Reference<HTTP::Response>> doRequest(Reference<IConnection> conn,
|
|||
err = http_bad_request_id();
|
||||
|
||||
TraceEvent(SevError, "HTTPRequestFailedIDMismatch")
|
||||
.error(err.get())
|
||||
.detail("DebugID", conn->getDebugID())
|
||||
.detail("RemoteAddress", conn->getPeerAddress())
|
||||
.detail("Verb", verb)
|
||||
|
@ -456,8 +457,7 @@ ACTOR Future<Reference<HTTP::Response>> doRequest(Reference<IConnection> conn,
|
|||
.detail("ResponseCode", r->code)
|
||||
.detail("ResponseContentLen", r->contentLen)
|
||||
.detail("RequestIDSent", requestID)
|
||||
.detail("RequestIDReceived", responseID)
|
||||
.error(err.get());
|
||||
.detail("RequestIDReceived", responseID);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -501,7 +501,7 @@ ACTOR Future<Reference<HTTP::Response>> doRequest(Reference<IConnection> conn,
|
|||
contentLen,
|
||||
total_sent);
|
||||
}
|
||||
event.error(e);
|
||||
event.errorUnsuppressed(e);
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -169,7 +169,7 @@ std::map<std::string, std::string> configForToken(std::string const& mode) {
|
|||
} else if (value == "gradual") {
|
||||
type = StorageMigrationType::GRADUAL;
|
||||
} else {
|
||||
printf("Error: Only disabled|aggressive|gradual are valid for storage_migration_mode.\n");
|
||||
printf("Error: Only disabled|aggressive|gradual are valid for storage_migration_type.\n");
|
||||
return out;
|
||||
}
|
||||
out[p + key] = format("%d", type);
|
||||
|
@ -772,7 +772,7 @@ ACTOR Future<std::vector<NetworkAddress>> getCoordinators(Database cx) {
|
|||
|
||||
ACTOR Future<Optional<CoordinatorsResult>> changeQuorumChecker(Transaction* tr,
|
||||
Reference<IQuorumChange> change,
|
||||
std::vector<NetworkAddress>* desiredCoordinators) {
|
||||
ClusterConnectionString* conn) {
|
||||
tr->setOption(FDBTransactionOptions::LOCK_AWARE);
|
||||
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||
tr->setOption(FDBTransactionOptions::USE_PROVISIONAL_PROXIES);
|
||||
|
@ -783,44 +783,47 @@ ACTOR Future<Optional<CoordinatorsResult>> changeQuorumChecker(Transaction* tr,
|
|||
return CoordinatorsResult::BAD_DATABASE_STATE; // Someone deleted this key entirely?
|
||||
|
||||
state ClusterConnectionString old(currentKey.get().toString());
|
||||
wait(old.resolveHostnames());
|
||||
if (tr->getDatabase()->getConnectionRecord() &&
|
||||
old.clusterKeyName().toString() !=
|
||||
tr->getDatabase()->getConnectionRecord()->getConnectionString().clusterKeyName())
|
||||
return CoordinatorsResult::BAD_DATABASE_STATE; // Someone changed the "name" of the database??
|
||||
|
||||
state CoordinatorsResult result = CoordinatorsResult::SUCCESS;
|
||||
if (!desiredCoordinators->size()) {
|
||||
std::vector<NetworkAddress> _desiredCoordinators = wait(change->getDesiredCoordinators(
|
||||
if (!conn->coords.size()) {
|
||||
std::vector<NetworkAddress> desiredCoordinatorAddresses = wait(change->getDesiredCoordinators(
|
||||
tr,
|
||||
old.coordinators(),
|
||||
Reference<ClusterConnectionMemoryRecord>(new ClusterConnectionMemoryRecord(old)),
|
||||
result));
|
||||
*desiredCoordinators = _desiredCoordinators;
|
||||
conn->coords = desiredCoordinatorAddresses;
|
||||
}
|
||||
|
||||
if (result != CoordinatorsResult::SUCCESS)
|
||||
return result;
|
||||
|
||||
if (!desiredCoordinators->size())
|
||||
if (!conn->coordinators().size())
|
||||
return CoordinatorsResult::INVALID_NETWORK_ADDRESSES;
|
||||
|
||||
std::sort(desiredCoordinators->begin(), desiredCoordinators->end());
|
||||
std::sort(conn->coords.begin(), conn->coords.end());
|
||||
std::sort(conn->hostnames.begin(), conn->hostnames.end());
|
||||
|
||||
std::string newName = change->getDesiredClusterKeyName();
|
||||
if (newName.empty())
|
||||
newName = old.clusterKeyName().toString();
|
||||
|
||||
if (old.coordinators() == *desiredCoordinators && old.clusterKeyName() == newName)
|
||||
if (old.coordinators() == conn->coordinators() && old.clusterKeyName() == newName)
|
||||
return CoordinatorsResult::SAME_NETWORK_ADDRESSES;
|
||||
|
||||
state ClusterConnectionString conn(*desiredCoordinators,
|
||||
StringRef(newName + ':' + deterministicRandom()->randomAlphaNumeric(32)));
|
||||
std::string key(newName + ':' + deterministicRandom()->randomAlphaNumeric(32));
|
||||
conn->parseKey(key);
|
||||
conn->resetConnectionString();
|
||||
|
||||
if (g_network->isSimulated()) {
|
||||
int i = 0;
|
||||
int protectedCount = 0;
|
||||
while ((protectedCount < ((desiredCoordinators->size() / 2) + 1)) && (i < desiredCoordinators->size())) {
|
||||
auto process = g_simulator.getProcessByAddress((*desiredCoordinators)[i]);
|
||||
while ((protectedCount < ((conn->coordinators().size() / 2) + 1)) && (i < conn->coordinators().size())) {
|
||||
auto process = g_simulator.getProcessByAddress(conn->coordinators()[i]);
|
||||
auto addresses = process->addresses;
|
||||
|
||||
if (!process->isReliable()) {
|
||||
|
@ -832,14 +835,14 @@ ACTOR Future<Optional<CoordinatorsResult>> changeQuorumChecker(Transaction* tr,
|
|||
if (addresses.secondaryAddress.present()) {
|
||||
g_simulator.protectedAddresses.insert(process->addresses.secondaryAddress.get());
|
||||
}
|
||||
TraceEvent("ProtectCoordinator").detail("Address", (*desiredCoordinators)[i]).backtrace();
|
||||
TraceEvent("ProtectCoordinator").detail("Address", conn->coordinators()[i]).backtrace();
|
||||
protectedCount++;
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<Future<Optional<LeaderInfo>>> leaderServers;
|
||||
ClientCoordinators coord(Reference<ClusterConnectionMemoryRecord>(new ClusterConnectionMemoryRecord(conn)));
|
||||
ClientCoordinators coord(Reference<ClusterConnectionMemoryRecord>(new ClusterConnectionMemoryRecord(*conn)));
|
||||
|
||||
leaderServers.reserve(coord.clientLeaderServers.size());
|
||||
for (int i = 0; i < coord.clientLeaderServers.size(); i++)
|
||||
|
@ -851,7 +854,7 @@ ACTOR Future<Optional<CoordinatorsResult>> changeQuorumChecker(Transaction* tr,
|
|||
when(wait(waitForAll(leaderServers))) {}
|
||||
when(wait(delay(5.0))) { return CoordinatorsResult::COORDINATOR_UNREACHABLE; }
|
||||
}
|
||||
tr->set(coordinatorsKey, conn.toString());
|
||||
tr->set(coordinatorsKey, conn->toString());
|
||||
return Optional<CoordinatorsResult>();
|
||||
}
|
||||
|
||||
|
@ -1273,7 +1276,7 @@ ACTOR Future<Void> excludeServers(Database cx, std::vector<AddressExclusion> ser
|
|||
wait(ryw.commit());
|
||||
return Void();
|
||||
} catch (Error& e) {
|
||||
TraceEvent("ExcludeServersError").error(e, true);
|
||||
TraceEvent("ExcludeServersError").errorUnsuppressed(e);
|
||||
wait(ryw.onError(e));
|
||||
}
|
||||
}
|
||||
|
@ -1285,7 +1288,7 @@ ACTOR Future<Void> excludeServers(Database cx, std::vector<AddressExclusion> ser
|
|||
wait(tr.commit());
|
||||
return Void();
|
||||
} catch (Error& e) {
|
||||
TraceEvent("ExcludeServersError").error(e, true);
|
||||
TraceEvent("ExcludeServersError").errorUnsuppressed(e);
|
||||
wait(tr.onError(e));
|
||||
}
|
||||
}
|
||||
|
@ -1336,7 +1339,7 @@ ACTOR Future<Void> excludeLocalities(Database cx, std::unordered_set<std::string
|
|||
wait(ryw.commit());
|
||||
return Void();
|
||||
} catch (Error& e) {
|
||||
TraceEvent("ExcludeLocalitiesError").error(e, true);
|
||||
TraceEvent("ExcludeLocalitiesError").errorUnsuppressed(e);
|
||||
wait(ryw.onError(e));
|
||||
}
|
||||
}
|
||||
|
@ -1348,7 +1351,7 @@ ACTOR Future<Void> excludeLocalities(Database cx, std::unordered_set<std::string
|
|||
wait(tr.commit());
|
||||
return Void();
|
||||
} catch (Error& e) {
|
||||
TraceEvent("ExcludeLocalitiesError").error(e, true);
|
||||
TraceEvent("ExcludeLocalitiesError").errorUnsuppressed(e);
|
||||
wait(tr.onError(e));
|
||||
}
|
||||
}
|
||||
|
@ -1392,7 +1395,7 @@ ACTOR Future<Void> includeServers(Database cx, std::vector<AddressExclusion> ser
|
|||
wait(ryw.commit());
|
||||
return Void();
|
||||
} catch (Error& e) {
|
||||
TraceEvent("IncludeServersError").error(e, true);
|
||||
TraceEvent("IncludeServersError").errorUnsuppressed(e);
|
||||
wait(ryw.onError(e));
|
||||
}
|
||||
}
|
||||
|
@ -1449,7 +1452,7 @@ ACTOR Future<Void> includeServers(Database cx, std::vector<AddressExclusion> ser
|
|||
wait(tr.commit());
|
||||
return Void();
|
||||
} catch (Error& e) {
|
||||
TraceEvent("IncludeServersError").error(e, true);
|
||||
TraceEvent("IncludeServersError").errorUnsuppressed(e);
|
||||
wait(tr.onError(e));
|
||||
}
|
||||
}
|
||||
|
@ -1487,7 +1490,7 @@ ACTOR Future<Void> includeLocalities(Database cx, std::vector<std::string> local
|
|||
wait(ryw.commit());
|
||||
return Void();
|
||||
} catch (Error& e) {
|
||||
TraceEvent("IncludeLocalitiesError").error(e, true);
|
||||
TraceEvent("IncludeLocalitiesError").errorUnsuppressed(e);
|
||||
wait(ryw.onError(e));
|
||||
}
|
||||
}
|
||||
|
@ -1535,7 +1538,7 @@ ACTOR Future<Void> includeLocalities(Database cx, std::vector<std::string> local
|
|||
wait(tr.commit());
|
||||
return Void();
|
||||
} catch (Error& e) {
|
||||
TraceEvent("IncludeLocalitiesError").error(e, true);
|
||||
TraceEvent("IncludeLocalitiesError").errorUnsuppressed(e);
|
||||
wait(tr.onError(e));
|
||||
}
|
||||
}
|
||||
|
@ -1907,7 +1910,7 @@ ACTOR Future<Void> mgmtSnapCreate(Database cx, Standalone<StringRef> snapCmd, UI
|
|||
TraceEvent("SnapCreateSucceeded").detail("snapUID", snapUID);
|
||||
return Void();
|
||||
} catch (Error& e) {
|
||||
TraceEvent(SevWarn, "SnapCreateFailed").detail("snapUID", snapUID).error(e);
|
||||
TraceEvent(SevWarn, "SnapCreateFailed").error(e).detail("snapUID", snapUID);
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
@ -2198,7 +2201,7 @@ ACTOR Future<Void> advanceVersion(Database cx, Version v) {
|
|||
tr.set(minRequiredCommitVersionKey, BinaryWriter::toValue(v + 1, Unversioned()));
|
||||
wait(tr.commit());
|
||||
} else {
|
||||
printf("Current read version is %ld\n", rv);
|
||||
fmt::print("Current read version is {}\n", rv);
|
||||
return Void();
|
||||
}
|
||||
} catch (Error& e) {
|
||||
|
|
|
@ -56,7 +56,7 @@ struct IQuorumChange : ReferenceCounted<IQuorumChange> {
|
|||
// Change to use the given set of coordination servers
|
||||
ACTOR Future<Optional<CoordinatorsResult>> changeQuorumChecker(Transaction* tr,
|
||||
Reference<IQuorumChange> change,
|
||||
std::vector<NetworkAddress>* desiredCoordinators);
|
||||
ClusterConnectionString* conn);
|
||||
ACTOR Future<CoordinatorsResult> changeQuorum(Database cx, Reference<IQuorumChange> change);
|
||||
Reference<IQuorumChange> autoQuorumChange(int desired = -1);
|
||||
Reference<IQuorumChange> noQuorumChange();
|
||||
|
|
|
@ -77,8 +77,8 @@ void IClusterConnectionRecord::setPersisted() {
|
|||
connectionStringNeedsPersisted = false;
|
||||
}
|
||||
|
||||
bool IClusterConnectionRecord::hasUnresolvedHostnames() const {
|
||||
return cs.hasUnresolvedHostnames;
|
||||
ClusterConnectionString::ConnectionStringStatus IClusterConnectionRecord::connectionStringStatus() const {
|
||||
return cs.status;
|
||||
}
|
||||
|
||||
Future<Void> IClusterConnectionRecord::resolveHostnames() {
|
||||
|
@ -98,39 +98,56 @@ std::string ClusterConnectionString::getErrorString(std::string const& source, E
|
|||
}
|
||||
|
||||
ACTOR Future<Void> resolveHostnamesImpl(ClusterConnectionString* self) {
|
||||
std::vector<Future<Void>> fs;
|
||||
for (auto const& hostName : self->hostnames) {
|
||||
fs.push_back(map(INetworkConnections::net()->resolveTCPEndpoint(hostName.host, hostName.service),
|
||||
[=](std::vector<NetworkAddress> const& addresses) -> Void {
|
||||
NetworkAddress addr = addresses[deterministicRandom()->randomInt(0, addresses.size())];
|
||||
addr.flags = 0; // Reset the parsed address to public
|
||||
addr.fromHostname = NetworkAddressFromHostname::True;
|
||||
if (hostName.isTLS) {
|
||||
addr.flags |= NetworkAddress::FLAG_TLS;
|
||||
}
|
||||
self->addResolved(hostName, addr);
|
||||
return Void();
|
||||
}));
|
||||
loop {
|
||||
if (self->status == ClusterConnectionString::UNRESOLVED) {
|
||||
self->status = ClusterConnectionString::RESOLVING;
|
||||
std::vector<Future<Void>> fs;
|
||||
for (auto const& hostname : self->hostnames) {
|
||||
fs.push_back(map(INetworkConnections::net()->resolveTCPEndpoint(hostname.host, hostname.service),
|
||||
[=](std::vector<NetworkAddress> const& addresses) -> Void {
|
||||
NetworkAddress address =
|
||||
addresses[deterministicRandom()->randomInt(0, addresses.size())];
|
||||
address.flags = 0; // Reset the parsed address to public
|
||||
address.fromHostname = NetworkAddressFromHostname::True;
|
||||
if (hostname.isTLS) {
|
||||
address.flags |= NetworkAddress::FLAG_TLS;
|
||||
}
|
||||
self->addResolved(hostname, address);
|
||||
return Void();
|
||||
}));
|
||||
}
|
||||
wait(waitForAll(fs));
|
||||
std::sort(self->coords.begin(), self->coords.end());
|
||||
if (std::unique(self->coords.begin(), self->coords.end()) != self->coords.end()) {
|
||||
self->status = ClusterConnectionString::UNRESOLVED;
|
||||
self->resolveFinish.trigger();
|
||||
throw connection_string_invalid();
|
||||
}
|
||||
self->status = ClusterConnectionString::RESOLVED;
|
||||
self->resolveFinish.trigger();
|
||||
break;
|
||||
} else if (self->status == ClusterConnectionString::RESOLVING) {
|
||||
wait(self->resolveFinish.onTrigger());
|
||||
if (self->status == ClusterConnectionString::RESOLVED) {
|
||||
break;
|
||||
}
|
||||
// Otherwise, this means other threads failed on resolve, so here we go back to the loop and try to resolve
|
||||
// again.
|
||||
} else {
|
||||
// status is RESOLVED, nothing to do.
|
||||
break;
|
||||
}
|
||||
}
|
||||
wait(waitForAll(fs));
|
||||
std::sort(self->coords.begin(), self->coords.end());
|
||||
if (std::unique(self->coords.begin(), self->coords.end()) != self->coords.end()) {
|
||||
throw connection_string_invalid();
|
||||
}
|
||||
self->hasUnresolvedHostnames = false;
|
||||
return Void();
|
||||
}
|
||||
|
||||
Future<Void> ClusterConnectionString::resolveHostnames() {
|
||||
if (!hasUnresolvedHostnames) {
|
||||
return Void();
|
||||
} else {
|
||||
return resolveHostnamesImpl(this);
|
||||
}
|
||||
return resolveHostnamesImpl(this);
|
||||
}
|
||||
|
||||
void ClusterConnectionString::resolveHostnamesBlocking() {
|
||||
if (hasUnresolvedHostnames) {
|
||||
if (status != RESOLVED) {
|
||||
status = RESOLVING;
|
||||
for (auto const& hostname : hostnames) {
|
||||
std::vector<NetworkAddress> addresses =
|
||||
INetworkConnections::net()->resolveTCPEndpointBlocking(hostname.host, hostname.service);
|
||||
|
@ -140,14 +157,14 @@ void ClusterConnectionString::resolveHostnamesBlocking() {
|
|||
if (hostname.isTLS) {
|
||||
address.flags |= NetworkAddress::FLAG_TLS;
|
||||
}
|
||||
coords.push_back(address);
|
||||
networkAddressToHostname.emplace(address, hostname);
|
||||
addResolved(hostname, address);
|
||||
}
|
||||
std::sort(coords.begin(), coords.end());
|
||||
if (std::unique(coords.begin(), coords.end()) != coords.end()) {
|
||||
status = UNRESOLVED;
|
||||
throw connection_string_invalid();
|
||||
}
|
||||
hasUnresolvedHostnames = false;
|
||||
status = RESOLVED;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -156,11 +173,15 @@ void ClusterConnectionString::resetToUnresolved() {
|
|||
coords.clear();
|
||||
hostnames.clear();
|
||||
networkAddressToHostname.clear();
|
||||
hasUnresolvedHostnames = true;
|
||||
status = UNRESOLVED;
|
||||
parseConnString();
|
||||
}
|
||||
}
|
||||
|
||||
void ClusterConnectionString::resetConnectionString() {
|
||||
connectionString = toString();
|
||||
}
|
||||
|
||||
void ClusterConnectionString::parseConnString() {
|
||||
// Split on '@' into key@addrs
|
||||
int pAt = connectionString.find_first_of('@');
|
||||
|
@ -184,7 +205,9 @@ void ClusterConnectionString::parseConnString() {
|
|||
}
|
||||
p = pComma + 1;
|
||||
}
|
||||
hasUnresolvedHostnames = hostnames.size() > 0;
|
||||
if (hostnames.size() > 0) {
|
||||
status = UNRESOLVED;
|
||||
}
|
||||
ASSERT((coords.size() + hostnames.size()) > 0);
|
||||
|
||||
std::sort(coords.begin(), coords.end());
|
||||
|
@ -256,7 +279,7 @@ TEST_CASE("/fdbclient/MonitorLeader/parseConnectionString/hostnames") {
|
|||
{
|
||||
input = "asdf:2345@localhost:1234";
|
||||
ClusterConnectionString cs(input);
|
||||
ASSERT(cs.hasUnresolvedHostnames);
|
||||
ASSERT(cs.status == ClusterConnectionString::UNRESOLVED);
|
||||
ASSERT(cs.hostnames.size() == 1);
|
||||
ASSERT(input == cs.toString());
|
||||
}
|
||||
|
@ -264,7 +287,7 @@ TEST_CASE("/fdbclient/MonitorLeader/parseConnectionString/hostnames") {
|
|||
{
|
||||
input = "0xxdeadbeef:100100100@localhost:34534,host-name:23443";
|
||||
ClusterConnectionString cs(input);
|
||||
ASSERT(cs.hasUnresolvedHostnames);
|
||||
ASSERT(cs.status == ClusterConnectionString::UNRESOLVED);
|
||||
ASSERT(cs.hostnames.size() == 2);
|
||||
ASSERT(input == cs.toString());
|
||||
}
|
||||
|
@ -277,7 +300,7 @@ TEST_CASE("/fdbclient/MonitorLeader/parseConnectionString/hostnames") {
|
|||
commented += "# asdfasdf ##";
|
||||
|
||||
ClusterConnectionString cs(commented);
|
||||
ASSERT(cs.hasUnresolvedHostnames);
|
||||
ASSERT(cs.status == ClusterConnectionString::UNRESOLVED);
|
||||
ASSERT(cs.hostnames.size() == 2);
|
||||
ASSERT(input == cs.toString());
|
||||
}
|
||||
|
@ -290,7 +313,7 @@ TEST_CASE("/fdbclient/MonitorLeader/parseConnectionString/hostnames") {
|
|||
commented += "# asdfasdf ##";
|
||||
|
||||
ClusterConnectionString cs(commented);
|
||||
ASSERT(cs.hasUnresolvedHostnames);
|
||||
ASSERT(cs.status == ClusterConnectionString::UNRESOLVED);
|
||||
ASSERT(cs.hostnames.size() == 2);
|
||||
ASSERT(input == cs.toString());
|
||||
}
|
||||
|
@ -314,16 +337,16 @@ TEST_CASE("/fdbclient/MonitorLeader/ConnectionString") {
|
|||
INetworkConnections::net()->addMockTCPEndpoint(hn2, port2, { address2 });
|
||||
|
||||
state ClusterConnectionString cs(hostnames, LiteralStringRef("TestCluster:0"));
|
||||
ASSERT(cs.hasUnresolvedHostnames);
|
||||
ASSERT(cs.status == ClusterConnectionString::UNRESOLVED);
|
||||
ASSERT(cs.hostnames.size() == 2);
|
||||
ASSERT(cs.coordinators().size() == 0);
|
||||
wait(cs.resolveHostnames());
|
||||
ASSERT(!cs.hasUnresolvedHostnames);
|
||||
ASSERT(cs.status == ClusterConnectionString::RESOLVED);
|
||||
ASSERT(cs.hostnames.size() == 2);
|
||||
ASSERT(cs.coordinators().size() == 2);
|
||||
ASSERT(cs.toString() == connectionString);
|
||||
cs.resetToUnresolved();
|
||||
ASSERT(cs.hasUnresolvedHostnames);
|
||||
ASSERT(cs.status == ClusterConnectionString::UNRESOLVED);
|
||||
ASSERT(cs.hostnames.size() == 2);
|
||||
ASSERT(cs.coordinators().size() == 0);
|
||||
ASSERT(cs.toString() == connectionString);
|
||||
|
@ -422,29 +445,17 @@ TEST_CASE("/fdbclient/MonitorLeader/parseConnectionString/fuzz") {
|
|||
}
|
||||
|
||||
ClusterConnectionString::ClusterConnectionString(const std::vector<NetworkAddress>& servers, Key key)
|
||||
: coords(servers) {
|
||||
: status(RESOLVED), coords(servers) {
|
||||
std::string keyString = key.toString();
|
||||
parseKey(keyString);
|
||||
connectionString = keyString + "@";
|
||||
for (int i = 0; i < coords.size(); i++) {
|
||||
if (i) {
|
||||
connectionString += ',';
|
||||
}
|
||||
connectionString += coords[i].toString();
|
||||
}
|
||||
resetConnectionString();
|
||||
}
|
||||
|
||||
ClusterConnectionString::ClusterConnectionString(const std::vector<Hostname>& hosts, Key key)
|
||||
: hasUnresolvedHostnames(true), hostnames(hosts) {
|
||||
: status(UNRESOLVED), hostnames(hosts) {
|
||||
std::string keyString = key.toString();
|
||||
parseKey(keyString);
|
||||
connectionString = keyString + "@";
|
||||
for (int i = 0; i < hostnames.size(); i++) {
|
||||
if (i) {
|
||||
connectionString += ',';
|
||||
}
|
||||
connectionString += hostnames[i].toString();
|
||||
}
|
||||
resetConnectionString();
|
||||
}
|
||||
|
||||
void ClusterConnectionString::parseKey(const std::string& key) {
|
||||
|
@ -497,6 +508,7 @@ std::string ClusterConnectionString::toString() const {
|
|||
}
|
||||
|
||||
ClientCoordinators::ClientCoordinators(Reference<IClusterConnectionRecord> ccr) : ccr(ccr) {
|
||||
ASSERT(ccr->connectionStringStatus() == ClusterConnectionString::RESOLVED);
|
||||
ClusterConnectionString cs = ccr->getConnectionString();
|
||||
for (auto s = cs.coordinators().begin(); s != cs.coordinators().end(); ++s)
|
||||
clientLeaderServers.push_back(ClientLeaderRegInterface(*s));
|
||||
|
@ -525,15 +537,44 @@ ClientLeaderRegInterface::ClientLeaderRegInterface(INetwork* local) {
|
|||
|
||||
// Nominee is the worker among all workers that are considered as leader by one coordinator
|
||||
// This function contacts a coordinator coord to ask who is its nominee.
|
||||
// Note: for coordinators whose NetworkAddress is parsed out of a hostname, a connection failure will cause this actor
|
||||
// to throw `coordinators_changed()` error
|
||||
ACTOR Future<Void> monitorNominee(Key key,
|
||||
ClientLeaderRegInterface coord,
|
||||
AsyncTrigger* nomineeChange,
|
||||
Optional<LeaderInfo>* info) {
|
||||
Optional<LeaderInfo>* info,
|
||||
Optional<Hostname> hostname = Optional<Hostname>()) {
|
||||
loop {
|
||||
state Optional<LeaderInfo> li =
|
||||
wait(retryBrokenPromise(coord.getLeader,
|
||||
GetLeaderRequest(key, info->present() ? info->get().changeID : UID()),
|
||||
TaskPriority::CoordinationReply));
|
||||
state Optional<LeaderInfo> li;
|
||||
|
||||
if (coord.getLeader.getEndpoint().getPrimaryAddress().fromHostname) {
|
||||
state ErrorOr<Optional<LeaderInfo>> rep =
|
||||
wait(coord.getLeader.tryGetReply(GetLeaderRequest(key, info->present() ? info->get().changeID : UID()),
|
||||
TaskPriority::CoordinationReply));
|
||||
if (rep.isError()) {
|
||||
// Connecting to nominee failed, most likely due to connection failed.
|
||||
TraceEvent("MonitorNomineeError")
|
||||
.error(rep.getError())
|
||||
.detail("Hostname", hostname.present() ? hostname.get().toString() : "UnknownHostname")
|
||||
.detail("OldAddr", coord.getLeader.getEndpoint().getPrimaryAddress().toString());
|
||||
if (rep.getError().code() == error_code_request_maybe_delivered) {
|
||||
// 50 milliseconds delay to prevent tight resolving loop due to outdated DNS cache
|
||||
wait(delay(0.05));
|
||||
throw coordinators_changed();
|
||||
} else {
|
||||
throw rep.getError();
|
||||
}
|
||||
} else if (rep.present()) {
|
||||
li = rep.get();
|
||||
}
|
||||
} else {
|
||||
Optional<LeaderInfo> tmp =
|
||||
wait(retryBrokenPromise(coord.getLeader,
|
||||
GetLeaderRequest(key, info->present() ? info->get().changeID : UID()),
|
||||
TaskPriority::CoordinationReply));
|
||||
li = tmp;
|
||||
}
|
||||
|
||||
wait(Future<Void>(Void())); // Make sure we weren't cancelled
|
||||
|
||||
TraceEvent("GetLeaderReply")
|
||||
|
@ -608,53 +649,74 @@ Optional<std::pair<LeaderInfo, bool>> getLeader(const std::vector<Optional<Leade
|
|||
ACTOR Future<MonitorLeaderInfo> monitorLeaderOneGeneration(Reference<IClusterConnectionRecord> connRecord,
|
||||
Reference<AsyncVar<Value>> outSerializedLeaderInfo,
|
||||
MonitorLeaderInfo info) {
|
||||
state ClientCoordinators coordinators(info.intermediateConnRecord);
|
||||
state AsyncTrigger nomineeChange;
|
||||
state std::vector<Optional<LeaderInfo>> nominees;
|
||||
state Future<Void> allActors;
|
||||
|
||||
nominees.resize(coordinators.clientLeaderServers.size());
|
||||
|
||||
std::vector<Future<Void>> actors;
|
||||
// Ask all coordinators if the worker is considered as a leader (leader nominee) by the coordinator.
|
||||
actors.reserve(coordinators.clientLeaderServers.size());
|
||||
for (int i = 0; i < coordinators.clientLeaderServers.size(); i++)
|
||||
actors.push_back(
|
||||
monitorNominee(coordinators.clusterKey, coordinators.clientLeaderServers[i], &nomineeChange, &nominees[i]));
|
||||
allActors = waitForAll(actors);
|
||||
|
||||
loop {
|
||||
Optional<std::pair<LeaderInfo, bool>> leader = getLeader(nominees);
|
||||
TraceEvent("MonitorLeaderChange")
|
||||
.detail("NewLeader", leader.present() ? leader.get().first.changeID : UID(1, 1));
|
||||
if (leader.present()) {
|
||||
if (leader.get().first.forward) {
|
||||
TraceEvent("MonitorLeaderForwarding")
|
||||
.detail("NewConnStr", leader.get().first.serializedInfo.toString())
|
||||
.detail("OldConnStr", info.intermediateConnRecord->getConnectionString().toString())
|
||||
.trackLatest("MonitorLeaderForwarding");
|
||||
info.intermediateConnRecord = connRecord->makeIntermediateRecord(
|
||||
ClusterConnectionString(leader.get().first.serializedInfo.toString()));
|
||||
return info;
|
||||
}
|
||||
if (connRecord != info.intermediateConnRecord) {
|
||||
if (!info.hasConnected) {
|
||||
TraceEvent(SevWarnAlways, "IncorrectClusterFileContentsAtConnection")
|
||||
.detail("ClusterFile", connRecord->toString())
|
||||
.detail("StoredConnectionString", connRecord->getConnectionString().toString())
|
||||
.detail("CurrentConnectionString",
|
||||
info.intermediateConnRecord->getConnectionString().toString());
|
||||
}
|
||||
connRecord->setAndPersistConnectionString(info.intermediateConnRecord->getConnectionString());
|
||||
info.intermediateConnRecord = connRecord;
|
||||
}
|
||||
wait(connRecord->resolveHostnames());
|
||||
wait(info.intermediateConnRecord->resolveHostnames());
|
||||
state ClientCoordinators coordinators(info.intermediateConnRecord);
|
||||
state AsyncTrigger nomineeChange;
|
||||
state std::vector<Optional<LeaderInfo>> nominees;
|
||||
state Future<Void> allActors;
|
||||
|
||||
info.hasConnected = true;
|
||||
connRecord->notifyConnected();
|
||||
nominees.resize(coordinators.clientLeaderServers.size());
|
||||
|
||||
outSerializedLeaderInfo->set(leader.get().first.serializedInfo);
|
||||
state std::vector<Future<Void>> actors;
|
||||
// Ask all coordinators if the worker is considered as a leader (leader nominee) by the coordinator.
|
||||
actors.reserve(coordinators.clientLeaderServers.size());
|
||||
for (int i = 0; i < coordinators.clientLeaderServers.size(); i++) {
|
||||
Optional<Hostname> hostname;
|
||||
auto r = connRecord->getConnectionString().networkAddressToHostname.find(
|
||||
coordinators.clientLeaderServers[i].getLeader.getEndpoint().getPrimaryAddress());
|
||||
if (r != connRecord->getConnectionString().networkAddressToHostname.end()) {
|
||||
hostname = r->second;
|
||||
}
|
||||
actors.push_back(monitorNominee(
|
||||
coordinators.clusterKey, coordinators.clientLeaderServers[i], &nomineeChange, &nominees[i], hostname));
|
||||
}
|
||||
allActors = waitForAll(actors);
|
||||
|
||||
loop {
|
||||
Optional<std::pair<LeaderInfo, bool>> leader = getLeader(nominees);
|
||||
TraceEvent("MonitorLeaderChange")
|
||||
.detail("NewLeader", leader.present() ? leader.get().first.changeID : UID(1, 1));
|
||||
if (leader.present()) {
|
||||
if (leader.get().first.forward) {
|
||||
TraceEvent("MonitorLeaderForwarding")
|
||||
.detail("NewConnStr", leader.get().first.serializedInfo.toString())
|
||||
.detail("OldConnStr", info.intermediateConnRecord->getConnectionString().toString())
|
||||
.trackLatest("MonitorLeaderForwarding");
|
||||
info.intermediateConnRecord = connRecord->makeIntermediateRecord(
|
||||
ClusterConnectionString(leader.get().first.serializedInfo.toString()));
|
||||
return info;
|
||||
}
|
||||
if (connRecord != info.intermediateConnRecord) {
|
||||
if (!info.hasConnected) {
|
||||
TraceEvent(SevWarnAlways, "IncorrectClusterFileContentsAtConnection")
|
||||
.detail("ClusterFile", connRecord->toString())
|
||||
.detail("StoredConnectionString", connRecord->getConnectionString().toString())
|
||||
.detail("CurrentConnectionString",
|
||||
info.intermediateConnRecord->getConnectionString().toString());
|
||||
}
|
||||
connRecord->setAndPersistConnectionString(info.intermediateConnRecord->getConnectionString());
|
||||
info.intermediateConnRecord = connRecord;
|
||||
}
|
||||
|
||||
info.hasConnected = true;
|
||||
connRecord->notifyConnected();
|
||||
|
||||
outSerializedLeaderInfo->set(leader.get().first.serializedInfo);
|
||||
}
|
||||
try {
|
||||
wait(nomineeChange.onTrigger() || allActors);
|
||||
} catch (Error& e) {
|
||||
if (e.code() == error_code_coordinators_changed) {
|
||||
TraceEvent("MonitorLeaderCoordinatorsChanged").suppressFor(1.0);
|
||||
connRecord->getConnectionString().resetToUnresolved();
|
||||
break;
|
||||
} else {
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
}
|
||||
wait(nomineeChange.onTrigger() || allActors);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -774,8 +836,8 @@ ACTOR Future<Void> getClientInfoFromLeader(Reference<AsyncVar<Optional<ClusterCo
|
|||
when(ClientDBInfo ni =
|
||||
wait(brokenPromiseToNever(knownLeader->get().get().clientInterface.openDatabase.getReply(req)))) {
|
||||
TraceEvent("GetClientInfoFromLeaderGotClientInfo", knownLeader->get().get().clientInterface.id())
|
||||
.detail("CommitProxy0", ni.commitProxies.size() ? ni.commitProxies[0].id() : UID())
|
||||
.detail("GrvProxy0", ni.grvProxies.size() ? ni.grvProxies[0].id() : UID())
|
||||
.detail("CommitProxy0", ni.commitProxies.size() ? ni.commitProxies[0].address().toString() : "")
|
||||
.detail("GrvProxy0", ni.grvProxies.size() ? ni.grvProxies[0].address().toString() : "")
|
||||
.detail("ClientID", ni.id);
|
||||
clientData->clientInfo->set(CachedSerialization<ClientDBInfo>(ni));
|
||||
}
|
||||
|
@ -787,7 +849,8 @@ ACTOR Future<Void> getClientInfoFromLeader(Reference<AsyncVar<Optional<ClusterCo
|
|||
ACTOR Future<Void> monitorLeaderAndGetClientInfo(Key clusterKey,
|
||||
std::vector<NetworkAddress> coordinators,
|
||||
ClientData* clientData,
|
||||
Reference<AsyncVar<Optional<LeaderInfo>>> leaderInfo) {
|
||||
Reference<AsyncVar<Optional<LeaderInfo>>> leaderInfo,
|
||||
Reference<AsyncVar<Void>> coordinatorsChanged) {
|
||||
state std::vector<ClientLeaderRegInterface> clientLeaderServers;
|
||||
state AsyncTrigger nomineeChange;
|
||||
state std::vector<Optional<LeaderInfo>> nominees;
|
||||
|
@ -835,7 +898,14 @@ ACTOR Future<Void> monitorLeaderAndGetClientInfo(Key clusterKey,
|
|||
leaderInfo->set(leader.get().first);
|
||||
}
|
||||
}
|
||||
wait(nomineeChange.onTrigger() || allActors);
|
||||
try {
|
||||
wait(nomineeChange.onTrigger() || allActors);
|
||||
} catch (Error& e) {
|
||||
if (e.code() == error_code_coordinators_changed) {
|
||||
coordinatorsChanged->trigger();
|
||||
}
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -964,9 +1034,15 @@ ACTOR Future<MonitorLeaderInfo> monitorProxiesOneGeneration(
|
|||
successIndex = index;
|
||||
} else {
|
||||
TEST(rep.getError().code() == error_code_failed_to_progress); // Coordinator cant talk to cluster controller
|
||||
if (rep.getError().code() == error_code_coordinators_changed) {
|
||||
throw coordinators_changed();
|
||||
}
|
||||
index = (index + 1) % addrs.size();
|
||||
if (index == successIndex) {
|
||||
wait(delay(CLIENT_KNOBS->COORDINATOR_RECONNECTION_DELAY));
|
||||
// When the client fails talking to all coordinators, we throw coordinators_changed() and let the caller
|
||||
// re-resolve the connection string and retry.
|
||||
throw coordinators_changed();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -978,16 +1054,27 @@ ACTOR Future<Void> monitorProxies(
|
|||
Reference<AsyncVar<Optional<ClientLeaderRegInterface>>> coordinator,
|
||||
Reference<ReferencedObject<Standalone<VectorRef<ClientVersionRef>>>> supportedVersions,
|
||||
Key traceLogGroup) {
|
||||
wait(connRecord->get()->resolveHostnames());
|
||||
state MonitorLeaderInfo info(connRecord->get());
|
||||
loop {
|
||||
choose {
|
||||
when(MonitorLeaderInfo _info = wait(monitorProxiesOneGeneration(
|
||||
connRecord->get(), clientInfo, coordinator, info, supportedVersions, traceLogGroup))) {
|
||||
info = _info;
|
||||
try {
|
||||
wait(info.intermediateConnRecord->resolveHostnames());
|
||||
choose {
|
||||
when(MonitorLeaderInfo _info = wait(monitorProxiesOneGeneration(
|
||||
connRecord->get(), clientInfo, coordinator, info, supportedVersions, traceLogGroup))) {
|
||||
info = _info;
|
||||
}
|
||||
when(wait(connRecord->onChange())) {
|
||||
info.hasConnected = false;
|
||||
info.intermediateConnRecord = connRecord->get();
|
||||
}
|
||||
}
|
||||
when(wait(connRecord->onChange())) {
|
||||
info.hasConnected = false;
|
||||
info.intermediateConnRecord = connRecord->get();
|
||||
} catch (Error& e) {
|
||||
if (e.code() == error_code_coordinators_changed) {
|
||||
TraceEvent("MonitorProxiesCoordinatorsChanged").suppressFor(1.0);
|
||||
info.intermediateConnRecord->getConnectionString().resetToUnresolved();
|
||||
} else {
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -74,10 +74,11 @@ Future<Void> monitorLeader(Reference<IClusterConnectionRecord> const& connFile,
|
|||
// This is one place where the leader election algorithm is run. The coodinator contacts all coodinators to collect
|
||||
// nominees, the nominee with the most nomination is the leader, and collects client data from the leader. This function
|
||||
// also monitors the change of the leader.
|
||||
Future<Void> monitorLeaderAndGetClientInfo(Value const& key,
|
||||
Future<Void> monitorLeaderAndGetClientInfo(Key const& clusterKey,
|
||||
std::vector<NetworkAddress> const& coordinators,
|
||||
ClientData* const& clientData,
|
||||
Reference<AsyncVar<Optional<LeaderInfo>>> const& leaderInfo);
|
||||
Reference<AsyncVar<Optional<LeaderInfo>>> const& leaderInfo,
|
||||
Reference<AsyncVar<Void>> const& coordinatorsChanged);
|
||||
|
||||
Future<Void> monitorProxies(
|
||||
Reference<AsyncVar<Reference<IClusterConnectionRecord>>> const& connRecord,
|
||||
|
|
|
@ -1202,9 +1202,9 @@ MultiVersionDatabase::MultiVersionDatabase(MultiVersionApi* api,
|
|||
// but we may not see trace logs from this client until a successful connection
|
||||
// is established.
|
||||
TraceEvent(SevWarnAlways, "FailedToInitializeExternalClient")
|
||||
.error(e)
|
||||
.detail("LibraryPath", client->libPath)
|
||||
.detail("ClusterFilePath", clusterFilePath)
|
||||
.error(e);
|
||||
.detail("ClusterFilePath", clusterFilePath);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
@ -1218,9 +1218,9 @@ MultiVersionDatabase::MultiVersionDatabase(MultiVersionApi* api,
|
|||
} catch (Error& e) {
|
||||
// This connection is discarded
|
||||
TraceEvent(SevWarnAlways, "FailedToCreateLegacyDatabaseConnection")
|
||||
.error(e)
|
||||
.detail("LibraryPath", client->libPath)
|
||||
.detail("ClusterFilePath", clusterFilePath)
|
||||
.error(e);
|
||||
.detail("ClusterFilePath", clusterFilePath);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
@ -1360,8 +1360,8 @@ ThreadFuture<Void> MultiVersionDatabase::DatabaseState::monitorProtocolVersion()
|
|||
}
|
||||
|
||||
TraceEvent("ErrorGettingClusterProtocolVersion")
|
||||
.detail("ExpectedProtocolVersion", expected)
|
||||
.error(cv.getError());
|
||||
.error(cv.getError())
|
||||
.detail("ExpectedProtocolVersion", expected);
|
||||
}
|
||||
|
||||
ProtocolVersion clusterVersion =
|
||||
|
@ -1409,10 +1409,10 @@ void MultiVersionDatabase::DatabaseState::protocolVersionChanged(ProtocolVersion
|
|||
newDb = client->api->createDatabase(clusterFilePath.c_str());
|
||||
} catch (Error& e) {
|
||||
TraceEvent(SevWarnAlways, "MultiVersionClientFailedToCreateDatabase")
|
||||
.error(e)
|
||||
.detail("LibraryPath", client->libPath)
|
||||
.detail("External", client->external)
|
||||
.detail("ClusterFilePath", clusterFilePath)
|
||||
.error(e);
|
||||
.detail("ClusterFilePath", clusterFilePath);
|
||||
|
||||
// Put the client in a disconnected state until the version changes again
|
||||
updateDatabase(Reference<IDatabase>(), Reference<ClientInfo>());
|
||||
|
@ -1486,8 +1486,8 @@ void MultiVersionDatabase::DatabaseState::updateDatabase(Reference<IDatabase> ne
|
|||
// We can't create a new database to monitor the cluster version. This means we will continue using the
|
||||
// previous one, which should hopefully continue to work.
|
||||
TraceEvent(SevWarnAlways, "FailedToCreateDatabaseForVersionMonitoring")
|
||||
.detail("ClusterFilePath", clusterFilePath)
|
||||
.error(e);
|
||||
.error(e)
|
||||
.detail("ClusterFilePath", clusterFilePath);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
|
@ -1499,8 +1499,8 @@ void MultiVersionDatabase::DatabaseState::updateDatabase(Reference<IDatabase> ne
|
|||
// We can't create a new database to monitor the cluster version. This means we will continue using the
|
||||
// previous one, which should hopefully continue to work.
|
||||
TraceEvent(SevWarnAlways, "FailedToCreateDatabaseForVersionMonitoring")
|
||||
.detail("ClusterFilePath", clusterFilePath)
|
||||
.error(e);
|
||||
.error(e)
|
||||
.detail("ClusterFilePath", clusterFilePath);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -732,16 +732,18 @@ Future<Void> attemptGRVFromOldProxies(std::vector<GrvProxyInterface> oldProxies,
|
|||
|
||||
ACTOR static Future<Void> monitorClientDBInfoChange(DatabaseContext* cx,
|
||||
Reference<AsyncVar<ClientDBInfo> const> clientDBInfo,
|
||||
AsyncTrigger* proxyChangeTrigger) {
|
||||
AsyncTrigger* proxiesChangeTrigger) {
|
||||
state std::vector<CommitProxyInterface> curCommitProxies;
|
||||
state std::vector<GrvProxyInterface> curGrvProxies;
|
||||
state ActorCollection actors(false);
|
||||
state Future<Void> clientDBInfoOnChange = clientDBInfo->onChange();
|
||||
curCommitProxies = clientDBInfo->get().commitProxies;
|
||||
curGrvProxies = clientDBInfo->get().grvProxies;
|
||||
|
||||
loop {
|
||||
choose {
|
||||
when(wait(clientDBInfo->onChange())) {
|
||||
when(wait(clientDBInfoOnChange)) {
|
||||
clientDBInfoOnChange = clientDBInfo->onChange();
|
||||
if (clientDBInfo->get().commitProxies != curCommitProxies ||
|
||||
clientDBInfo->get().grvProxies != curGrvProxies) {
|
||||
// This condition is a bit complicated. Here we want to verify that we're unable to receive a read
|
||||
|
@ -758,7 +760,7 @@ ACTOR static Future<Void> monitorClientDBInfoChange(DatabaseContext* cx,
|
|||
}
|
||||
curCommitProxies = clientDBInfo->get().commitProxies;
|
||||
curGrvProxies = clientDBInfo->get().grvProxies;
|
||||
proxyChangeTrigger->trigger();
|
||||
proxiesChangeTrigger->trigger();
|
||||
}
|
||||
}
|
||||
when(wait(actors.getResult())) { UNSTOPPABLE_ASSERT(false); }
|
||||
|
@ -1596,6 +1598,32 @@ void DatabaseContext::invalidateCache(const KeyRangeRef& keys) {
|
|||
locationCache.insert(KeyRangeRef(begin, end), Reference<LocationInfo>());
|
||||
}
|
||||
|
||||
void DatabaseContext::setFailedEndpointOnHealthyServer(const Endpoint& endpoint) {
|
||||
if (failedEndpointsOnHealthyServersInfo.find(endpoint) == failedEndpointsOnHealthyServersInfo.end()) {
|
||||
failedEndpointsOnHealthyServersInfo[endpoint] =
|
||||
EndpointFailureInfo{ .startTime = now(), .lastRefreshTime = now() };
|
||||
}
|
||||
}
|
||||
|
||||
void DatabaseContext::updateFailedEndpointRefreshTime(const Endpoint& endpoint) {
|
||||
if (failedEndpointsOnHealthyServersInfo.find(endpoint) == failedEndpointsOnHealthyServersInfo.end()) {
|
||||
// The endpoint is not failed. Nothing to update.
|
||||
return;
|
||||
}
|
||||
failedEndpointsOnHealthyServersInfo[endpoint].lastRefreshTime = now();
|
||||
}
|
||||
|
||||
Optional<EndpointFailureInfo> DatabaseContext::getEndpointFailureInfo(const Endpoint& endpoint) {
|
||||
if (failedEndpointsOnHealthyServersInfo.find(endpoint) == failedEndpointsOnHealthyServersInfo.end()) {
|
||||
return Optional<EndpointFailureInfo>();
|
||||
}
|
||||
return failedEndpointsOnHealthyServersInfo[endpoint];
|
||||
}
|
||||
|
||||
void DatabaseContext::clearFailedEndpointOnHealthyServer(const Endpoint& endpoint) {
|
||||
failedEndpointsOnHealthyServersInfo.erase(endpoint);
|
||||
}
|
||||
|
||||
Future<Void> DatabaseContext::onProxiesChanged() const {
|
||||
return this->proxiesChangeTrigger.onTrigger();
|
||||
}
|
||||
|
@ -2449,6 +2477,35 @@ ACTOR Future<std::pair<KeyRange, Reference<LocationInfo>>> getKeyLocation_intern
|
|||
}
|
||||
}
|
||||
|
||||
// Checks if `endpoint` is failed on a healthy server or not. Returns true if we need to refresh the location cache for
|
||||
// the endpoint.
|
||||
bool checkOnlyEndpointFailed(const Database& cx, const Endpoint& endpoint) {
|
||||
if (IFailureMonitor::failureMonitor().onlyEndpointFailed(endpoint)) {
|
||||
// This endpoint is failed, but the server is still healthy. There are two cases this can happen:
|
||||
// - There is a recent bounce in the cluster where the endpoints in SSes get updated.
|
||||
// - The SS is failed and terminated on a server, but the server is kept running.
|
||||
// To account for the first case, we invalidate the cache and issue GetKeyLocation requests to the proxy to
|
||||
// update the cache with the new SS points. However, if the failure is caused by the second case, the
|
||||
// requested key location will continue to be the failed endpoint until the data movement is finished. But
|
||||
// every read will generate a GetKeyLocation request to the proxies (and still getting the failed endpoint
|
||||
// back), which may overload the proxy and affect data movement speed. Therefore, we only refresh the
|
||||
// location cache for short period of time, and after the initial grace period that we keep retrying
|
||||
// resolving key location, we will slow it down to resolve it only once every
|
||||
// `LOCATION_CACHE_FAILED_ENDPOINT_RETRY_INTERVAL`.
|
||||
cx->setFailedEndpointOnHealthyServer(endpoint);
|
||||
const auto& failureInfo = cx->getEndpointFailureInfo(endpoint);
|
||||
ASSERT(failureInfo.present());
|
||||
if (now() - failureInfo.get().startTime < CLIENT_KNOBS->LOCATION_CACHE_ENDPOINT_FAILURE_GRACE_PERIOD ||
|
||||
now() - failureInfo.get().lastRefreshTime > CLIENT_KNOBS->LOCATION_CACHE_FAILED_ENDPOINT_RETRY_INTERVAL) {
|
||||
cx->updateFailedEndpointRefreshTime(endpoint);
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
cx->clearFailedEndpointOnHealthyServer(endpoint);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
template <class F>
|
||||
Future<std::pair<KeyRange, Reference<LocationInfo>>> getKeyLocation(Database const& cx,
|
||||
Key const& key,
|
||||
|
@ -2463,14 +2520,19 @@ Future<std::pair<KeyRange, Reference<LocationInfo>>> getKeyLocation(Database con
|
|||
return getKeyLocation_internal(cx, key, spanID, debugID, useProvisionalProxies, isBackward);
|
||||
}
|
||||
|
||||
bool onlyEndpointFailedAndNeedRefresh = false;
|
||||
for (int i = 0; i < ssi.second->size(); i++) {
|
||||
if (IFailureMonitor::failureMonitor().onlyEndpointFailed(ssi.second->get(i, member).getEndpoint())) {
|
||||
cx->invalidateCache(key);
|
||||
ssi.second.clear();
|
||||
return getKeyLocation_internal(cx, key, spanID, debugID, useProvisionalProxies, isBackward);
|
||||
if (checkOnlyEndpointFailed(cx, ssi.second->get(i, member).getEndpoint())) {
|
||||
onlyEndpointFailedAndNeedRefresh = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (onlyEndpointFailedAndNeedRefresh) {
|
||||
cx->invalidateCache(key);
|
||||
// Refresh the cache with a new getKeyLocations made to proxies.
|
||||
return getKeyLocation_internal(cx, key, spanID, debugID, useProvisionalProxies, isBackward);
|
||||
}
|
||||
|
||||
return ssi;
|
||||
}
|
||||
|
||||
|
@ -2553,21 +2615,21 @@ Future<std::vector<std::pair<KeyRange, Reference<LocationInfo>>>> getKeyRangeLoc
|
|||
|
||||
bool foundFailed = false;
|
||||
for (const auto& [range, locInfo] : locations) {
|
||||
bool onlyEndpointFailed = false;
|
||||
bool onlyEndpointFailedAndNeedRefresh = false;
|
||||
for (int i = 0; i < locInfo->size(); i++) {
|
||||
if (IFailureMonitor::failureMonitor().onlyEndpointFailed(locInfo->get(i, member).getEndpoint())) {
|
||||
onlyEndpointFailed = true;
|
||||
break;
|
||||
if (checkOnlyEndpointFailed(cx, locInfo->get(i, member).getEndpoint())) {
|
||||
onlyEndpointFailedAndNeedRefresh = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (onlyEndpointFailed) {
|
||||
if (onlyEndpointFailedAndNeedRefresh) {
|
||||
cx->invalidateCache(range.begin);
|
||||
foundFailed = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (foundFailed) {
|
||||
// Refresh the cache with a new getKeyRangeLocations made to proxies.
|
||||
return getKeyRangeLocations_internal(cx, keys, limit, reverse, spanID, debugID, useProvisionalProxies);
|
||||
}
|
||||
|
||||
|
@ -5095,7 +5157,7 @@ ACTOR static Future<Void> commitDummyTransaction(Reference<TransactionState> trS
|
|||
return Void();
|
||||
} catch (Error& e) {
|
||||
TraceEvent("CommitDummyTransactionError")
|
||||
.error(e, true)
|
||||
.errorUnsuppressed(e)
|
||||
.detail("Key", range.begin)
|
||||
.detail("Retries", retries);
|
||||
wait(tr.onError(e));
|
||||
|
@ -5713,9 +5775,10 @@ ACTOR Future<GetReadVersionReply> getConsistentReadVersion(SpanID parentSpan,
|
|||
loop {
|
||||
try {
|
||||
state GetReadVersionRequest req(span.context, transactionCount, priority, flags, tags, debugID);
|
||||
state Future<Void> onProxiesChanged = cx->onProxiesChanged();
|
||||
|
||||
choose {
|
||||
when(wait(cx->onProxiesChanged())) {}
|
||||
when(wait(onProxiesChanged)) { onProxiesChanged = cx->onProxiesChanged(); }
|
||||
when(GetReadVersionReply v =
|
||||
wait(basicLoadBalance(cx->getGrvProxies(UseProvisionalProxies(
|
||||
flags & GetReadVersionRequest::FLAG_USE_PROVISIONAL_PROXIES)),
|
||||
|
@ -6846,7 +6909,7 @@ ACTOR Future<Void> snapCreate(Database cx, Standalone<StringRef> snapCmd, UID sn
|
|||
}
|
||||
}
|
||||
} catch (Error& e) {
|
||||
TraceEvent("SnapCreateError").detail("SnapCmd", snapCmd.toString()).detail("UID", snapUID).error(e);
|
||||
TraceEvent("SnapCreateError").error(e).detail("SnapCmd", snapCmd.toString()).detail("UID", snapUID);
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
@ -6874,13 +6937,14 @@ ACTOR Future<bool> checkSafeExclusions(Database cx, std::vector<AddressExclusion
|
|||
} catch (Error& e) {
|
||||
if (e.code() != error_code_actor_cancelled) {
|
||||
TraceEvent("ExclusionSafetyCheckError")
|
||||
.error(e)
|
||||
.detail("NumExclusion", exclusions.size())
|
||||
.detail("Exclusions", describe(exclusions))
|
||||
.error(e);
|
||||
.detail("Exclusions", describe(exclusions));
|
||||
}
|
||||
throw;
|
||||
}
|
||||
TraceEvent("ExclusionSafetyCheckCoordinators").log();
|
||||
wait(cx->getConnectionRecord()->resolveHostnames());
|
||||
state ClientCoordinators coordinatorList(cx->getConnectionRecord());
|
||||
state std::vector<Future<Optional<LeaderInfo>>> leaderServers;
|
||||
leaderServers.reserve(coordinatorList.clientLeaderServers.size());
|
||||
|
|
|
@ -2585,7 +2585,7 @@ void ReadYourWritesTransaction::debugLogRetries(Optional<Error> error) {
|
|||
{
|
||||
TraceEvent trace = TraceEvent("LongTransaction");
|
||||
if (error.present())
|
||||
trace.error(error.get(), true);
|
||||
trace.errorUnsuppressed(error.get());
|
||||
if (!transactionDebugInfo->transactionName.empty())
|
||||
trace.detail("TransactionName", transactionDebugInfo->transactionName);
|
||||
trace.detail("Elapsed", elapsed).detail("Retries", retries).detail("Committed", committed);
|
||||
|
|
|
@ -500,7 +500,7 @@ ACTOR Future<Optional<json_spirit::mObject>> tryReadJSONFile(std::string path) {
|
|||
|
||||
} catch (Error& e) {
|
||||
if (e.code() != error_code_actor_cancelled)
|
||||
TraceEvent(SevWarn, errorEventType).error(e).suppressFor(60).detail("File", path);
|
||||
TraceEvent(SevWarn, errorEventType).errorUnsuppressed(e).suppressFor(60).detail("File", path);
|
||||
}
|
||||
|
||||
return Optional<json_spirit::mObject>();
|
||||
|
@ -744,7 +744,7 @@ ACTOR Future<Reference<HTTP::Response>> doRequest_impl(Reference<S3BlobStoreEndp
|
|||
|
||||
// Attach err to trace event if present, otherwise extract some stuff from the response
|
||||
if (err.present()) {
|
||||
event.error(err.get());
|
||||
event.errorUnsuppressed(err.get());
|
||||
}
|
||||
event.suppressFor(60);
|
||||
if (!err.present()) {
|
||||
|
@ -954,7 +954,7 @@ ACTOR Future<Void> listObjectsStream_impl(Reference<S3BlobStoreEndpoint> bstore,
|
|||
} catch (Error& e) {
|
||||
if (e.code() != error_code_actor_cancelled)
|
||||
TraceEvent(SevWarn, "S3BlobStoreEndpointListResultParseError")
|
||||
.error(e)
|
||||
.errorUnsuppressed(e)
|
||||
.suppressFor(60)
|
||||
.detail("Resource", fullResource);
|
||||
throw http_bad_response();
|
||||
|
@ -1080,7 +1080,7 @@ ACTOR Future<std::vector<std::string>> listBuckets_impl(Reference<S3BlobStoreEnd
|
|||
} catch (Error& e) {
|
||||
if (e.code() != error_code_actor_cancelled)
|
||||
TraceEvent(SevWarn, "S3BlobStoreEndpointListBucketResultParseError")
|
||||
.error(e)
|
||||
.errorUnsuppressed(e)
|
||||
.suppressFor(60)
|
||||
.detail("Resource", fullResource);
|
||||
throw http_bad_response();
|
||||
|
|
|
@ -103,6 +103,8 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
|
|||
init( TLOG_POP_BATCH_SIZE, 1000 ); if ( randomize && BUGGIFY ) TLOG_POP_BATCH_SIZE = 10;
|
||||
init( TLOG_POPPED_VER_LAG_THRESHOLD_FOR_TLOGPOP_TRACE, 250e6 );
|
||||
init( ENABLE_DETAILED_TLOG_POP_TRACE, false ); if ( randomize && BUGGIFY ) ENABLE_DETAILED_TLOG_POP_TRACE = true;
|
||||
init( PEEK_BATCHING_EMPTY_MSG, false ); if ( randomize && BUGGIFY ) PEEK_BATCHING_EMPTY_MSG = true;
|
||||
init( PEEK_BATCHING_EMPTY_MSG_INTERVAL, 0.001 ); if ( randomize && BUGGIFY ) PEEK_BATCHING_EMPTY_MSG_INTERVAL = 0.01;
|
||||
|
||||
// disk snapshot max timeout, to be put in TLog, storage and coordinator nodes
|
||||
init( MAX_FORKED_PROCESS_OUTPUT, 1024 );
|
||||
|
@ -362,7 +364,8 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
|
|||
init( ROCKSDB_WRITE_RATE_LIMITER_BYTES_PER_SEC, 0 );
|
||||
// If true, enables dynamic adjustment of ROCKSDB_WRITE_RATE_LIMITER_BYTES according to the recent demand of background IO.
|
||||
init( ROCKSDB_WRITE_RATE_LIMITER_AUTO_TUNE, true );
|
||||
|
||||
init( ROCKSDB_PERFCONTEXT_ENABLE, false ); if( randomize && BUGGIFY ) ROCKSDB_PERFCONTEXT_ENABLE = deterministicRandom()->coinflip() ? false : true;
|
||||
init( ROCKSDB_PERFCONTEXT_SAMPLE_RATE, 0.0001 );
|
||||
|
||||
// Leader election
|
||||
bool longLeaderElection = randomize && BUGGIFY;
|
||||
|
@ -579,6 +582,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
|
|||
|
||||
init( MIN_AVAILABLE_SPACE, 1e8 );
|
||||
init( MIN_AVAILABLE_SPACE_RATIO, 0.05 );
|
||||
init( MIN_AVAILABLE_SPACE_RATIO_SAFETY_BUFFER, 0.01 );
|
||||
init( TARGET_AVAILABLE_SPACE_RATIO, 0.30 );
|
||||
init( AVAILABLE_SPACE_UPDATE_DELAY, 5.0 );
|
||||
|
||||
|
|
|
@ -106,6 +106,8 @@ public:
|
|||
double PUSH_STATS_SLOW_AMOUNT;
|
||||
double PUSH_STATS_SLOW_RATIO;
|
||||
int TLOG_POP_BATCH_SIZE;
|
||||
bool PEEK_BATCHING_EMPTY_MSG;
|
||||
double PEEK_BATCHING_EMPTY_MSG_INTERVAL;
|
||||
|
||||
// Data distribution queue
|
||||
double HEALTH_POLL_TIME;
|
||||
|
@ -293,6 +295,8 @@ public:
|
|||
bool ROCKSDB_READ_RANGE_REUSE_ITERATORS;
|
||||
int64_t ROCKSDB_WRITE_RATE_LIMITER_BYTES_PER_SEC;
|
||||
bool ROCKSDB_WRITE_RATE_LIMITER_AUTO_TUNE;
|
||||
bool ROCKSDB_PERFCONTEXT_ENABLE; // Enable rocks perf context metrics. May cause performance overhead
|
||||
double ROCKSDB_PERFCONTEXT_SAMPLE_RATE;
|
||||
|
||||
// Leader election
|
||||
int MAX_NOTIFICATIONS;
|
||||
|
@ -525,6 +529,7 @@ public:
|
|||
|
||||
int64_t MIN_AVAILABLE_SPACE;
|
||||
double MIN_AVAILABLE_SPACE_RATIO;
|
||||
double MIN_AVAILABLE_SPACE_RATIO_SAFETY_BUFFER;
|
||||
double TARGET_AVAILABLE_SPACE_RATIO;
|
||||
double AVAILABLE_SPACE_UPDATE_DELAY;
|
||||
|
||||
|
|
|
@ -1628,8 +1628,9 @@ Future<RangeResult> CoordinatorsImpl::getRange(ReadYourWritesTransaction* ryw, K
|
|||
|
||||
ACTOR static Future<Optional<std::string>> coordinatorsCommitActor(ReadYourWritesTransaction* ryw, KeyRangeRef kr) {
|
||||
state Reference<IQuorumChange> change;
|
||||
state std::vector<NetworkAddress> addressesVec;
|
||||
state std::vector<std::string> process_address_strs;
|
||||
state ClusterConnectionString
|
||||
conn; // We don't care about the Key here, it will be overrode in changeQuorumChecker().
|
||||
state std::vector<std::string> process_address_or_hostname_strs;
|
||||
state Optional<std::string> msg;
|
||||
state int index;
|
||||
state bool parse_error = false;
|
||||
|
@ -1640,38 +1641,45 @@ ACTOR static Future<Optional<std::string>> coordinatorsCommitActor(ReadYourWrite
|
|||
if (processes_entry.first) {
|
||||
ASSERT(processes_entry.second.present()); // no clear should be seen here
|
||||
auto processesStr = processes_entry.second.get().toString();
|
||||
boost::split(process_address_strs, processesStr, [](char c) { return c == ','; });
|
||||
if (!process_address_strs.size()) {
|
||||
boost::split(process_address_or_hostname_strs, processesStr, [](char c) { return c == ','; });
|
||||
if (!process_address_or_hostname_strs.size()) {
|
||||
return ManagementAPIError::toJsonString(
|
||||
false,
|
||||
"coordinators",
|
||||
"New coordinators\' processes are empty, please specify new processes\' network addresses with format "
|
||||
"\"IP:PORT,IP:PORT,...,IP:PORT\"");
|
||||
"\"IP:PORT,IP:PORT,...,IP:PORT\" or \"HOSTNAME:PORT,HOSTNAME:PORT,...,HOSTNAME:PORT\"");
|
||||
}
|
||||
for (index = 0; index < process_address_strs.size(); index++) {
|
||||
for (index = 0; index < process_address_or_hostname_strs.size(); index++) {
|
||||
try {
|
||||
auto a = NetworkAddress::parse(process_address_strs[index]);
|
||||
if (!a.isValid())
|
||||
parse_error = true;
|
||||
else
|
||||
addressesVec.push_back(a);
|
||||
if (Hostname::isHostname(process_address_or_hostname_strs[index])) {
|
||||
conn.hostnames.push_back(Hostname::parse(process_address_or_hostname_strs[index]));
|
||||
conn.status = ClusterConnectionString::ConnectionStringStatus::UNRESOLVED;
|
||||
} else {
|
||||
NetworkAddress a = NetworkAddress::parse(process_address_or_hostname_strs[index]);
|
||||
if (!a.isValid()) {
|
||||
parse_error = true;
|
||||
} else {
|
||||
conn.coords.push_back(a);
|
||||
}
|
||||
}
|
||||
} catch (Error& e) {
|
||||
TraceEvent(SevDebug, "SpecialKeysNetworkParseError").error(e);
|
||||
parse_error = true;
|
||||
}
|
||||
|
||||
if (parse_error) {
|
||||
std::string error =
|
||||
"ERROR: \'" + process_address_strs[index] + "\' is not a valid network endpoint address\n";
|
||||
if (process_address_strs[index].find(":tls") != std::string::npos)
|
||||
std::string error = "ERROR: \'" + process_address_or_hostname_strs[index] +
|
||||
"\' is not a valid network endpoint address\n";
|
||||
if (process_address_or_hostname_strs[index].find(":tls") != std::string::npos)
|
||||
error += " Do not include the `:tls' suffix when naming a process\n";
|
||||
return ManagementAPIError::toJsonString(false, "coordinators", error);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (addressesVec.size())
|
||||
change = specifiedQuorumChange(addressesVec);
|
||||
wait(conn.resolveHostnames());
|
||||
if (conn.coordinators().size())
|
||||
change = specifiedQuorumChange(conn.coordinators());
|
||||
else
|
||||
change = noQuorumChange();
|
||||
|
||||
|
@ -1693,10 +1701,11 @@ ACTOR static Future<Optional<std::string>> coordinatorsCommitActor(ReadYourWrite
|
|||
ASSERT(change.isValid());
|
||||
|
||||
TraceEvent(SevDebug, "SKSChangeCoordinatorsStart")
|
||||
.detail("NewAddresses", describe(addressesVec))
|
||||
.detail("NewHostnames", conn.hostnames.size() ? describe(conn.hostnames) : "N/A")
|
||||
.detail("NewAddresses", describe(conn.coordinators()))
|
||||
.detail("Description", entry.first ? entry.second.get().toString() : "");
|
||||
|
||||
Optional<CoordinatorsResult> r = wait(changeQuorumChecker(&ryw->getTransaction(), change, &addressesVec));
|
||||
Optional<CoordinatorsResult> r = wait(changeQuorumChecker(&ryw->getTransaction(), change, &conn));
|
||||
|
||||
TraceEvent(SevDebug, "SKSChangeCoordinatorsFinish")
|
||||
.detail("Result", r.present() ? static_cast<int>(r.get()) : -1); // -1 means success
|
||||
|
|
|
@ -306,6 +306,7 @@ ACTOR Future<Optional<StatusObject>> clientCoordinatorsStatusFetcher(Reference<I
|
|||
bool* quorum_reachable,
|
||||
int* coordinatorsFaultTolerance) {
|
||||
try {
|
||||
wait(connRecord->resolveHostnames());
|
||||
state ClientCoordinators coord(connRecord);
|
||||
state StatusObject statusObj;
|
||||
|
||||
|
|
|
@ -35,3 +35,13 @@ add_custom_target(start_sandbox
|
|||
--lockfile ${CMAKE_BINARY_DIR}/sandbox/fdbmonitor.lock)
|
||||
|
||||
add_dependencies(start_sandbox fdbmonitor fdbserver)
|
||||
|
||||
if(NOT EXISTS ${CMAKE_BINARY_DIR}/contrib/generate_profile.sh)
|
||||
configure_file(${CMAKE_SOURCE_DIR}/contrib/generate_profile.sh
|
||||
${CMAKE_BINARY_DIR}/contrib/generate_profile.sh)
|
||||
endif()
|
||||
|
||||
add_custom_target(generate_profile
|
||||
COMMAND ${CMAKE_BINARY_DIR}/contrib/generate_profile.sh ${CMAKE_BINARY_DIR})
|
||||
|
||||
add_dependencies(generate_profile fdbmonitor fdbserver mako fdbcli)
|
||||
|
|
|
@ -260,7 +260,7 @@ public:
|
|||
std::string currentFilename =
|
||||
(wrappedFile.isReady() && !wrappedFile.isError()) ? wrappedFile.get()->getFilename() : actualFilename;
|
||||
currentProcess->machine->openFiles.erase(currentFilename);
|
||||
//TraceEvent("AsyncFileNonDurableOpenError").error(e, true).detail("Filename", filename).detail("Address", currentProcess->address).detail("Addr", g_simulator.getCurrentProcess()->address);
|
||||
//TraceEvent("AsyncFileNonDurableOpenError").errorUnsuppressed(e).detail("Filename", filename).detail("Address", currentProcess->address).detail("Addr", g_simulator.getCurrentProcess()->address);
|
||||
wait(g_simulator.onProcess(currentProcess, currentTaskID));
|
||||
throw err;
|
||||
}
|
||||
|
|
|
@ -732,13 +732,13 @@ ACTOR Future<Void> connectionKeeper(Reference<Peer> self,
|
|||
|
||||
if (self->compatible) {
|
||||
TraceEvent(ok ? SevInfo : SevWarnAlways, "ConnectionClosed", conn ? conn->getDebugID() : UID())
|
||||
.error(e, true)
|
||||
.errorUnsuppressed(e)
|
||||
.suppressFor(1.0)
|
||||
.detail("PeerAddr", self->destination);
|
||||
} else {
|
||||
TraceEvent(
|
||||
ok ? SevInfo : SevWarnAlways, "IncompatibleConnectionClosed", conn ? conn->getDebugID() : UID())
|
||||
.error(e, true)
|
||||
.errorUnsuppressed(e)
|
||||
.suppressFor(1.0)
|
||||
.detail("PeerAddr", self->destination);
|
||||
}
|
||||
|
@ -783,7 +783,7 @@ ACTOR Future<Void> connectionKeeper(Reference<Peer> self,
|
|||
|
||||
if (self->peerReferences <= 0 && self->reliable.empty() && self->unsent.empty() &&
|
||||
self->outstandingReplies == 0) {
|
||||
TraceEvent("PeerDestroy").error(e).suppressFor(1.0).detail("PeerAddr", self->destination);
|
||||
TraceEvent("PeerDestroy").errorUnsuppressed(e).suppressFor(1.0).detail("PeerAddr", self->destination);
|
||||
self->connect.cancel();
|
||||
self->transport->peers.erase(self->destination);
|
||||
self->transport->orderedAddresses.erase(self->destination);
|
||||
|
@ -1330,10 +1330,12 @@ ACTOR static Future<Void> connectionIncoming(TransportData* self, Reference<ICon
|
|||
}
|
||||
return Void();
|
||||
} catch (Error& e) {
|
||||
TraceEvent("IncomingConnectionError", conn->getDebugID())
|
||||
.error(e)
|
||||
.suppressFor(1.0)
|
||||
.detail("FromAddress", conn->getPeerAddress());
|
||||
if (e.code() != error_code_actor_cancelled) {
|
||||
TraceEvent("IncomingConnectionError", conn->getDebugID())
|
||||
.errorUnsuppressed(e)
|
||||
.suppressFor(1.0)
|
||||
.detail("FromAddress", conn->getPeerAddress());
|
||||
}
|
||||
conn->close();
|
||||
return Void();
|
||||
}
|
||||
|
|
|
@ -29,12 +29,12 @@ void HealthMonitor::reportPeerClosed(const NetworkAddress& peerAddress) {
|
|||
}
|
||||
|
||||
void HealthMonitor::purgeOutdatedHistory() {
|
||||
for (auto it = peerClosedHistory.begin(); it != peerClosedHistory.end();) {
|
||||
if (it->first < now() - FLOW_KNOBS->HEALTH_MONITOR_CLIENT_REQUEST_INTERVAL_SECS) {
|
||||
auto& count = peerClosedNum[it->second];
|
||||
while (!peerClosedHistory.empty()) {
|
||||
auto const& p = peerClosedHistory.front();
|
||||
if (p.first < now() - FLOW_KNOBS->HEALTH_MONITOR_CLIENT_REQUEST_INTERVAL_SECS) {
|
||||
auto& count = peerClosedNum[p.second];
|
||||
--count;
|
||||
ASSERT(count >= 0);
|
||||
++it; // Increment before pop_front to avoid iterator invalidation
|
||||
peerClosedHistory.pop_front();
|
||||
} else {
|
||||
break;
|
||||
|
|
|
@ -1123,11 +1123,9 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
ACTOR static Future<Void> runLoop(Sim2* self) {
|
||||
state ISimulator::ProcessInfo* callingMachine = self->currentProcess;
|
||||
static void runLoop(Sim2* self) {
|
||||
ISimulator::ProcessInfo* callingMachine = self->currentProcess;
|
||||
while (!self->isStopped) {
|
||||
wait(self->net2->yield(TaskPriority::DefaultYield));
|
||||
|
||||
self->mutex.enter();
|
||||
if (self->tasks.size() == 0) {
|
||||
self->mutex.leave();
|
||||
|
@ -1144,18 +1142,13 @@ public:
|
|||
self->yielded = false;
|
||||
}
|
||||
self->currentProcess = callingMachine;
|
||||
self->net2->stop();
|
||||
for (auto& fn : self->stopCallbacks) {
|
||||
fn();
|
||||
}
|
||||
return Void();
|
||||
}
|
||||
|
||||
// Implement ISimulator interface
|
||||
void run() override {
|
||||
Future<Void> loopFuture = runLoop(this);
|
||||
net2->run();
|
||||
}
|
||||
void run() override { runLoop(this); }
|
||||
ProcessInfo* newProcess(const char* name,
|
||||
IPAddress ip,
|
||||
uint16_t port,
|
||||
|
@ -2094,7 +2087,7 @@ public:
|
|||
t.action.send(Void());
|
||||
ASSERT(this->currentProcess == t.machine);
|
||||
} catch (Error& e) {
|
||||
TraceEvent(SevError, "UnhandledSimulationEventError").error(e, true);
|
||||
TraceEvent(SevError, "UnhandledSimulationEventError").errorUnsuppressed(e);
|
||||
killProcess(t.machine, KillInstantly);
|
||||
}
|
||||
|
||||
|
|
|
@ -1101,10 +1101,10 @@ ACTOR Future<Void> backupWorker(BackupInterface interf,
|
|||
try {
|
||||
wait(done);
|
||||
} catch (Error& e) {
|
||||
TraceEvent("BackupWorkerShutdownError", self.myId).error(e, true);
|
||||
TraceEvent("BackupWorkerShutdownError", self.myId).errorUnsuppressed(e);
|
||||
}
|
||||
}
|
||||
TraceEvent("BackupWorkerTerminated", self.myId).error(err, true);
|
||||
TraceEvent("BackupWorkerTerminated", self.myId).errorUnsuppressed(err);
|
||||
if (err.code() != error_code_actor_cancelled && err.code() != error_code_worker_removed) {
|
||||
throw err;
|
||||
}
|
||||
|
|
|
@ -843,8 +843,8 @@ ACTOR Future<Void> monitorBlobWorkerStatus(BlobManagerData* bmData, BlobWorkerIn
|
|||
}
|
||||
// TODO change back from SevError?
|
||||
TraceEvent(SevError, "BWStatusMonitoringFailed", bmData->id)
|
||||
.detail("BlobWorkerID", bwInterf.id())
|
||||
.error(e);
|
||||
.error(e)
|
||||
.detail("BlobWorkerID", bwInterf.id());
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
@ -877,7 +877,7 @@ ACTOR Future<Void> monitorBlobWorker(BlobManagerData* bmData, BlobWorkerInterfac
|
|||
printf("BM got unexpected error %s monitoring BW %s\n", e.name(), bwInterf.id().toString().c_str());
|
||||
}
|
||||
// TODO change back from SevError?
|
||||
TraceEvent(SevError, "BWMonitoringFailed", bmData->id).detail("BlobWorkerID", bwInterf.id()).error(e);
|
||||
TraceEvent(SevError, "BWMonitoringFailed", bmData->id).error(e).detail("BlobWorkerID", bwInterf.id());
|
||||
throw e;
|
||||
}
|
||||
|
||||
|
@ -1152,7 +1152,7 @@ ACTOR Future<Void> blobManager(BlobManagerInterface bmInterf,
|
|||
}
|
||||
}
|
||||
} catch (Error& err) {
|
||||
TraceEvent("BlobManagerDied", bmInterf.id()).error(err, true);
|
||||
TraceEvent("BlobManagerDied", bmInterf.id()).errorUnsuppressed(err);
|
||||
}
|
||||
return Void();
|
||||
}
|
||||
|
|
|
@ -1589,7 +1589,7 @@ ACTOR Future<Void> blobGranuleUpdateFiles(Reference<BlobWorkerData> bwData,
|
|||
metadata->keyRange.end.printable().c_str(),
|
||||
e.name());
|
||||
}
|
||||
TraceEvent(SevWarn, "GranuleFileUpdaterError", bwData->id).detail("Granule", metadata->keyRange).error(e);
|
||||
TraceEvent(SevWarn, "GranuleFileUpdaterError", bwData->id).error(e).detail("Granule", metadata->keyRange);
|
||||
|
||||
if (granuleCanRetry(e)) {
|
||||
// explicitly cancel all outstanding write futures BEFORE updating promise stream, to ensure they
|
||||
|
@ -2621,7 +2621,7 @@ ACTOR Future<Void> blobWorker(BlobWorkerInterface bwInterf,
|
|||
if (BW_DEBUG) {
|
||||
printf("Blob worker got error %s. Exiting...\n", e.name());
|
||||
}
|
||||
TraceEvent("BlobWorkerDied", self->id).error(e, true);
|
||||
TraceEvent("BlobWorkerDied", self->id).errorUnsuppressed(e);
|
||||
}
|
||||
|
||||
wait(self->granuleMetadata.clearAsync());
|
||||
|
|
|
@ -90,6 +90,7 @@ set(FDBSERVER_SRCS
|
|||
QuietDatabase.actor.cpp
|
||||
QuietDatabase.h
|
||||
RadixTree.h
|
||||
Ratekeeper.h
|
||||
Ratekeeper.actor.cpp
|
||||
RatekeeperInterface.h
|
||||
RecoveryState.h
|
||||
|
@ -130,6 +131,8 @@ set(FDBSERVER_SRCS
|
|||
storageserver.actor.cpp
|
||||
TagPartitionedLogSystem.actor.cpp
|
||||
TagPartitionedLogSystem.actor.h
|
||||
TagThrottler.actor.cpp
|
||||
TagThrottler.h
|
||||
template_fdb.h
|
||||
TCInfo.actor.cpp
|
||||
TCInfo.h
|
||||
|
|
|
@ -296,7 +296,7 @@ ACTOR Future<Void> clusterWatchDatabase(ClusterControllerData* cluster,
|
|||
TraceEvent(SevWarn, "DetectedFailedRecovery", cluster->id).detail("OldMaster", iMaster.id());
|
||||
} catch (Error& e) {
|
||||
state Error err = e;
|
||||
TraceEvent("CCWDB", cluster->id).error(e, true).detail("Master", iMaster.id());
|
||||
TraceEvent("CCWDB", cluster->id).errorUnsuppressed(e).detail("Master", iMaster.id());
|
||||
if (e.code() != error_code_actor_cancelled)
|
||||
wait(delay(0.0));
|
||||
|
||||
|
@ -313,7 +313,7 @@ ACTOR Future<Void> clusterWatchDatabase(ClusterControllerData* cluster,
|
|||
TEST(err.code() == error_code_restart_cluster_controller); // Terminated due to cluster-controller restart.
|
||||
|
||||
if (cluster->shouldCommitSuicide || err.code() == error_code_coordinators_changed) {
|
||||
TraceEvent("ClusterControllerTerminate", cluster->id).error(err, true);
|
||||
TraceEvent("ClusterControllerTerminate", cluster->id).errorUnsuppressed(err);
|
||||
throw restart_cluster_controller();
|
||||
}
|
||||
|
||||
|
@ -427,10 +427,10 @@ void checkOutstandingStorageRequests(ClusterControllerData* self) {
|
|||
} catch (Error& e) {
|
||||
if (e.code() == error_code_no_more_servers) {
|
||||
TraceEvent(SevWarn, "RecruitStorageNotAvailable", self->id)
|
||||
.errorUnsuppressed(e)
|
||||
.suppressFor(1.0)
|
||||
.detail("OutstandingReq", i)
|
||||
.detail("IsCriticalRecruitment", req.first.criticalRecruitment)
|
||||
.error(e);
|
||||
.detail("IsCriticalRecruitment", req.first.criticalRecruitment);
|
||||
} else {
|
||||
TraceEvent(SevError, "RecruitStorageError", self->id).error(e);
|
||||
throw;
|
||||
|
@ -464,9 +464,9 @@ void checkOutstandingBlobWorkerRequests(ClusterControllerData* self) {
|
|||
} catch (Error& e) {
|
||||
if (e.code() == error_code_no_more_servers) {
|
||||
TraceEvent(SevWarn, "RecruitBlobWorkerNotAvailable", self->id)
|
||||
.errorUnsuppressed(e)
|
||||
.suppressFor(1.0)
|
||||
.detail("OutstandingReq", i)
|
||||
.error(e);
|
||||
.detail("OutstandingReq", i);
|
||||
} else {
|
||||
TraceEvent(SevError, "RecruitBlobWorkerError", self->id).error(e);
|
||||
throw;
|
||||
|
@ -876,8 +876,8 @@ void clusterRecruitStorage(ClusterControllerData* self, RecruitStorageRequest re
|
|||
if (e.code() == error_code_no_more_servers) {
|
||||
self->outstandingStorageRequests.emplace_back(req, now() + SERVER_KNOBS->RECRUITMENT_TIMEOUT);
|
||||
TraceEvent(SevWarn, "RecruitStorageNotAvailable", self->id)
|
||||
.detail("IsCriticalRecruitment", req.criticalRecruitment)
|
||||
.error(e);
|
||||
.error(e)
|
||||
.detail("IsCriticalRecruitment", req.criticalRecruitment);
|
||||
} else {
|
||||
TraceEvent(SevError, "RecruitStorageError", self->id).error(e);
|
||||
throw; // Any other error will bring down the cluster controller
|
||||
|
@ -2599,6 +2599,7 @@ ACTOR Future<Void> clusterController(Reference<IClusterConnectionRecord> connRec
|
|||
state bool hasConnected = false;
|
||||
loop {
|
||||
try {
|
||||
wait(connRecord->resolveHostnames());
|
||||
ServerCoordinators coordinators(connRecord);
|
||||
wait(clusterController(coordinators, currentCC, hasConnected, asyncPriorityInfo, locality, configDBType));
|
||||
} catch (Error& e) {
|
||||
|
|
|
@ -1896,8 +1896,8 @@ public:
|
|||
throw;
|
||||
}
|
||||
TraceEvent(SevWarn, "AttemptingRecruitmentInRemoteDc", id)
|
||||
.detail("SetPrimaryDesired", setPrimaryDesired)
|
||||
.error(e);
|
||||
.error(e)
|
||||
.detail("SetPrimaryDesired", setPrimaryDesired);
|
||||
auto reply = findWorkersForConfigurationFromDC(req, regions[1].dcId, checkGoodRecruitment);
|
||||
if (!setPrimaryDesired) {
|
||||
std::vector<Optional<Key>> dcPriority;
|
||||
|
|
|
@ -673,7 +673,9 @@ ACTOR Future<Void> changeCoordinators(Reference<ClusterRecoveryData> self) {
|
|||
}
|
||||
|
||||
try {
|
||||
wait(self->cstate.move(ClusterConnectionString(changeCoordinatorsRequest.newConnectionString.toString())));
|
||||
state ClusterConnectionString conn(changeCoordinatorsRequest.newConnectionString.toString());
|
||||
wait(conn.resolveHostnames());
|
||||
wait(self->cstate.move(conn));
|
||||
} catch (Error& e) {
|
||||
if (e.code() != error_code_actor_cancelled)
|
||||
changeCoordinatorsRequest.reply.sendError(e);
|
||||
|
|
|
@ -1769,17 +1769,17 @@ ACTOR Future<Void> proxySnapCreate(ProxySnapRequest snapReq, ProxyCommitData* co
|
|||
wait(throwErrorOr(ddSnapReq));
|
||||
} catch (Error& e) {
|
||||
TraceEvent("SnapCommitProxy_DDSnapResponseError")
|
||||
.errorUnsuppressed(e)
|
||||
.detail("SnapPayload", snapReq.snapPayload)
|
||||
.detail("SnapUID", snapReq.snapUID)
|
||||
.error(e, true /*includeCancelled*/);
|
||||
.detail("SnapUID", snapReq.snapUID);
|
||||
throw e;
|
||||
}
|
||||
snapReq.reply.send(Void());
|
||||
} catch (Error& e) {
|
||||
TraceEvent("SnapCommitProxy_SnapReqError")
|
||||
.errorUnsuppressed(e)
|
||||
.detail("SnapPayload", snapReq.snapPayload)
|
||||
.detail("SnapUID", snapReq.snapUID)
|
||||
.error(e, true /*includeCancelled*/);
|
||||
.detail("SnapUID", snapReq.snapUID);
|
||||
if (e.code() != error_code_operation_cancelled) {
|
||||
snapReq.reply.sendError(e);
|
||||
} else {
|
||||
|
@ -2188,7 +2188,7 @@ ACTOR Future<Void> commitProxyServer(CommitProxyInterface proxy,
|
|||
whitelistBinPaths);
|
||||
wait(core || checkRemoved(db, req.recoveryCount, proxy));
|
||||
} catch (Error& e) {
|
||||
TraceEvent("CommitProxyTerminated", proxy.id()).error(e, true);
|
||||
TraceEvent("CommitProxyTerminated", proxy.id()).errorUnsuppressed(e);
|
||||
|
||||
if (e.code() != error_code_worker_removed && e.code() != error_code_tlog_stopped &&
|
||||
e.code() != error_code_tlog_failed && e.code() != error_code_coordinators_changed &&
|
||||
|
|
|
@ -61,7 +61,7 @@ class WriteToTransactionEnvironment {
|
|||
Version lastWrittenVersion{ 0 };
|
||||
|
||||
static Value longToValue(int64_t v) {
|
||||
auto s = format("%ld", v);
|
||||
auto s = format("%lld", v);
|
||||
return StringRef(reinterpret_cast<uint8_t const*>(s.c_str()), s.size());
|
||||
}
|
||||
|
||||
|
|
|
@ -96,6 +96,7 @@ LeaderElectionRegInterface::LeaderElectionRegInterface(INetwork* local) : Client
|
|||
}
|
||||
|
||||
ServerCoordinators::ServerCoordinators(Reference<IClusterConnectionRecord> ccr) : ClientCoordinators(ccr) {
|
||||
ASSERT(ccr->connectionStringStatus() == ClusterConnectionString::RESOLVED);
|
||||
ClusterConnectionString cs = ccr->getConnectionString();
|
||||
for (auto s = cs.coordinators().begin(); s != cs.coordinators().end(); ++s) {
|
||||
leaderElectionServers.emplace_back(*s);
|
||||
|
@ -205,8 +206,11 @@ ACTOR Future<Void> openDatabase(ClientData* db,
|
|||
int* clientCount,
|
||||
Reference<AsyncVar<bool>> hasConnectedClients,
|
||||
OpenDatabaseCoordRequest req,
|
||||
Future<Void> checkStuck) {
|
||||
Future<Void> checkStuck,
|
||||
Reference<AsyncVar<Void>> coordinatorsChanged) {
|
||||
state ErrorOr<CachedSerialization<ClientDBInfo>> replyContents;
|
||||
state Future<Void> coordinatorsChangedOnChange = coordinatorsChanged->onChange();
|
||||
state Future<Void> clientInfoOnChange = db->clientInfo->onChange();
|
||||
|
||||
++(*clientCount);
|
||||
hasConnectedClients->set(true);
|
||||
|
@ -223,7 +227,15 @@ ACTOR Future<Void> openDatabase(ClientData* db,
|
|||
replyContents = failed_to_progress();
|
||||
break;
|
||||
}
|
||||
when(wait(yieldedFuture(db->clientInfo->onChange()))) { replyContents = db->clientInfo->get(); }
|
||||
when(wait(yieldedFuture(clientInfoOnChange))) {
|
||||
clientInfoOnChange = db->clientInfo->onChange();
|
||||
replyContents = db->clientInfo->get();
|
||||
}
|
||||
when(wait(coordinatorsChangedOnChange)) {
|
||||
coordinatorsChangedOnChange = coordinatorsChanged->onChange();
|
||||
replyContents = coordinators_changed();
|
||||
break;
|
||||
}
|
||||
when(wait(delayJittered(SERVER_KNOBS->CLIENT_REGISTER_INTERVAL))) {
|
||||
if (db->clientInfo->get().read().id.isValid()) {
|
||||
replyContents = db->clientInfo->get();
|
||||
|
@ -254,18 +266,33 @@ ACTOR Future<Void> openDatabase(ClientData* db,
|
|||
ACTOR Future<Void> remoteMonitorLeader(int* clientCount,
|
||||
Reference<AsyncVar<bool>> hasConnectedClients,
|
||||
Reference<AsyncVar<Optional<LeaderInfo>>> currentElectedLeader,
|
||||
ElectionResultRequest req) {
|
||||
ElectionResultRequest req,
|
||||
Reference<AsyncVar<Void>> coordinatorsChanged) {
|
||||
state bool coordinatorsChangeDetected = false;
|
||||
state Future<Void> coordinatorsChangedOnChange = coordinatorsChanged->onChange();
|
||||
state Future<Void> currentElectedLeaderOnChange = currentElectedLeader->onChange();
|
||||
++(*clientCount);
|
||||
hasConnectedClients->set(true);
|
||||
|
||||
while (!currentElectedLeader->get().present() || req.knownLeader == currentElectedLeader->get().get().changeID) {
|
||||
choose {
|
||||
when(wait(yieldedFuture(currentElectedLeader->onChange()))) {}
|
||||
when(wait(yieldedFuture(currentElectedLeaderOnChange))) {
|
||||
currentElectedLeaderOnChange = currentElectedLeader->onChange();
|
||||
}
|
||||
when(wait(coordinatorsChangedOnChange)) {
|
||||
coordinatorsChangedOnChange = coordinatorsChanged->onChange();
|
||||
coordinatorsChangeDetected = true;
|
||||
break;
|
||||
}
|
||||
when(wait(delayJittered(SERVER_KNOBS->CLIENT_REGISTER_INTERVAL))) { break; }
|
||||
}
|
||||
}
|
||||
|
||||
req.reply.send(currentElectedLeader->get());
|
||||
if (coordinatorsChangeDetected) {
|
||||
req.reply.sendError(coordinators_changed());
|
||||
} else {
|
||||
req.reply.send(currentElectedLeader->get());
|
||||
}
|
||||
|
||||
if (--(*clientCount) == 0) {
|
||||
hasConnectedClients->set(false);
|
||||
|
@ -296,6 +323,9 @@ ACTOR Future<Void> leaderRegister(LeaderElectionRegInterface interf, Key key) {
|
|||
state Reference<AsyncVar<Optional<LeaderInfo>>> currentElectedLeader =
|
||||
makeReference<AsyncVar<Optional<LeaderInfo>>>();
|
||||
state LivenessChecker canConnectToLeader(SERVER_KNOBS->COORDINATOR_LEADER_CONNECTION_TIMEOUT);
|
||||
state Reference<AsyncVar<Void>> coordinatorsChanged = makeReference<AsyncVar<Void>>();
|
||||
state Future<Void> coordinatorsChangedOnChange = coordinatorsChanged->onChange();
|
||||
state Future<Void> hasConnectedClientsOnChange = hasConnectedClients->onChange();
|
||||
|
||||
loop choose {
|
||||
when(OpenDatabaseCoordRequest req = waitNext(interf.openDatabase.getFuture())) {
|
||||
|
@ -306,10 +336,14 @@ ACTOR Future<Void> leaderRegister(LeaderElectionRegInterface interf, Key key) {
|
|||
} else {
|
||||
if (!leaderMon.isValid()) {
|
||||
leaderMon = monitorLeaderAndGetClientInfo(
|
||||
req.clusterKey, req.coordinators, &clientData, currentElectedLeader);
|
||||
req.clusterKey, req.coordinators, &clientData, currentElectedLeader, coordinatorsChanged);
|
||||
}
|
||||
actors.add(
|
||||
openDatabase(&clientData, &clientCount, hasConnectedClients, req, canConnectToLeader.checkStuck()));
|
||||
actors.add(openDatabase(&clientData,
|
||||
&clientCount,
|
||||
hasConnectedClients,
|
||||
req,
|
||||
canConnectToLeader.checkStuck(),
|
||||
coordinatorsChanged));
|
||||
}
|
||||
}
|
||||
when(ElectionResultRequest req = waitNext(interf.electionResult.getFuture())) {
|
||||
|
@ -318,10 +352,11 @@ ACTOR Future<Void> leaderRegister(LeaderElectionRegInterface interf, Key key) {
|
|||
req.reply.send(currentElectedLeader->get());
|
||||
} else {
|
||||
if (!leaderMon.isValid()) {
|
||||
leaderMon =
|
||||
monitorLeaderAndGetClientInfo(req.key, req.coordinators, &clientData, currentElectedLeader);
|
||||
leaderMon = monitorLeaderAndGetClientInfo(
|
||||
req.key, req.coordinators, &clientData, currentElectedLeader, coordinatorsChanged);
|
||||
}
|
||||
actors.add(remoteMonitorLeader(&clientCount, hasConnectedClients, currentElectedLeader, req));
|
||||
actors.add(remoteMonitorLeader(
|
||||
&clientCount, hasConnectedClients, currentElectedLeader, req, coordinatorsChanged));
|
||||
}
|
||||
}
|
||||
when(GetLeaderRequest req = waitNext(interf.getLeader.getFuture())) {
|
||||
|
@ -454,13 +489,18 @@ ACTOR Future<Void> leaderRegister(LeaderElectionRegInterface interf, Key key) {
|
|||
notify.pop_front();
|
||||
}
|
||||
}
|
||||
when(wait(hasConnectedClients->onChange())) {
|
||||
when(wait(hasConnectedClientsOnChange)) {
|
||||
hasConnectedClientsOnChange = hasConnectedClients->onChange();
|
||||
if (!hasConnectedClients->get() && !nextInterval.isValid()) {
|
||||
TraceEvent("LeaderRegisterUnneeded").detail("Key", key);
|
||||
return Void();
|
||||
}
|
||||
}
|
||||
when(wait(actors.getResult())) {}
|
||||
when(wait(coordinatorsChangedOnChange)) {
|
||||
leaderMon = Future<Void>();
|
||||
coordinatorsChangedOnChange = coordinatorsChanged->onChange();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -756,7 +796,7 @@ ACTOR Future<Void> coordinationServer(std::string dataFolder,
|
|||
store.getError() || configDatabaseServer);
|
||||
throw internal_error();
|
||||
} catch (Error& e) {
|
||||
TraceEvent("CoordinationServerError", myID).error(e, true);
|
||||
TraceEvent("CoordinationServerError", myID).errorUnsuppressed(e);
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -178,7 +178,7 @@ class WorkPool final : public IThreadPool, public ReferenceCounted<WorkPool<Thre
|
|||
stopped.send(Void());
|
||||
return;
|
||||
} catch (Error& e) {
|
||||
TraceEvent("WorkPoolError").error(e, true);
|
||||
TraceEvent("WorkPoolError").errorUnsuppressed(e);
|
||||
error.sendError(e);
|
||||
} catch (...) {
|
||||
TraceEvent("WorkPoolError").log();
|
||||
|
@ -256,10 +256,10 @@ public:
|
|||
|
||||
pool->queueLock.enter();
|
||||
TraceEvent("WorkPool_Stop")
|
||||
.errorUnsuppressed(e)
|
||||
.detail("Workers", pool->workers.size())
|
||||
.detail("Idle", pool->idle.size())
|
||||
.detail("Work", pool->work.size())
|
||||
.error(e, true);
|
||||
.detail("Work", pool->work.size());
|
||||
|
||||
for (uint32_t i = 0; i < pool->work.size(); i++)
|
||||
pool->work[i]->cancel(); // What if cancel() does something to this?
|
||||
|
|
|
@ -154,7 +154,7 @@ class WorkPool final : public IThreadPool, public ReferenceCounted<WorkPool<Thre
|
|||
stopped.send(Void());
|
||||
return;
|
||||
} catch (Error& e) {
|
||||
TraceEvent("WorkPoolError").error(e, true);
|
||||
TraceEvent("WorkPoolError").errorUnsuppressed(e);
|
||||
error.sendError(e);
|
||||
} catch (...) {
|
||||
TraceEvent("WorkPoolError").log();
|
||||
|
@ -232,10 +232,10 @@ public:
|
|||
|
||||
pool->queueLock.enter();
|
||||
TraceEvent("WorkPool_Stop")
|
||||
.errorUnsuppressed(e)
|
||||
.detail("Workers", pool->workers.size())
|
||||
.detail("Idle", pool->idle.size())
|
||||
.detail("Work", pool->work.size())
|
||||
.error(e, true);
|
||||
.detail("Work", pool->work.size());
|
||||
|
||||
for (uint32_t i = 0; i < pool->work.size(); i++)
|
||||
pool->work[i]->cancel(); // What if cancel() does something to this?
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -171,6 +171,7 @@ typedef AsyncMap<UID, ServerStatus> ServerStatusMap;
|
|||
|
||||
class DDTeamCollection : public ReferenceCounted<DDTeamCollection> {
|
||||
friend class DDTeamCollectionImpl;
|
||||
friend class DDTeamCollectionUnitTest;
|
||||
|
||||
enum class Status { NONE = 0, WIGGLING = 1, EXCLUDED = 2, FAILED = 3 };
|
||||
|
||||
|
@ -521,6 +522,37 @@ class DDTeamCollection : public ReferenceCounted<DDTeamCollection> {
|
|||
|
||||
void noHealthyTeams() const;
|
||||
|
||||
// To enable verbose debug info, set shouldPrint to true
|
||||
void traceAllInfo(bool shouldPrint = false) const;
|
||||
|
||||
// Check if the server belongs to a machine; if not, create the machine.
|
||||
// Establish the two-direction link between server and machine
|
||||
Reference<TCMachineInfo> checkAndCreateMachine(Reference<TCServerInfo> server);
|
||||
|
||||
// Group storage servers (process) based on their machineId in LocalityData
|
||||
// All created machines are healthy
|
||||
// Return The number of healthy servers we grouped into machines
|
||||
int constructMachinesFromServers();
|
||||
|
||||
// Create machineTeamsToBuild number of machine teams
|
||||
// No operation if machineTeamsToBuild is 0
|
||||
// Note: The creation of machine teams should not depend on server teams:
|
||||
// No matter how server teams will be created, we will create the same set of machine teams;
|
||||
// We should never use server team number in building machine teams.
|
||||
//
|
||||
// Five steps to create each machine team, which are document in the function
|
||||
// Reuse ReplicationPolicy selectReplicas func to select machine team
|
||||
// return number of added machine teams
|
||||
int addBestMachineTeams(int machineTeamsToBuild);
|
||||
|
||||
// Sanity check the property of teams in unit test
|
||||
// Return true if all server teams belong to machine teams
|
||||
bool sanityCheckTeams() const;
|
||||
|
||||
void disableBuildingTeams() { doBuildTeams = false; }
|
||||
|
||||
void setCheckTeamDelay() { this->checkTeamDelay = Void(); }
|
||||
|
||||
public:
|
||||
Database cx;
|
||||
|
||||
|
@ -595,39 +627,6 @@ public:
|
|||
|
||||
void addTeam(std::set<UID> const& team, bool isInitialTeam) { addTeam(team.begin(), team.end(), isInitialTeam); }
|
||||
|
||||
// FIXME: Public for testing only
|
||||
void disableBuildingTeams() { doBuildTeams = false; }
|
||||
|
||||
// FIXME: Public for testing only
|
||||
void setCheckTeamDelay() { this->checkTeamDelay = Void(); }
|
||||
|
||||
// FIXME: Public for testing only
|
||||
// Group storage servers (process) based on their machineId in LocalityData
|
||||
// All created machines are healthy
|
||||
// Return The number of healthy servers we grouped into machines
|
||||
int constructMachinesFromServers();
|
||||
|
||||
// FIXME: Public for testing only
|
||||
// To enable verbose debug info, set shouldPrint to true
|
||||
void traceAllInfo(bool shouldPrint = false) const;
|
||||
|
||||
// FIXME: Public for testing only
|
||||
// Create machineTeamsToBuild number of machine teams
|
||||
// No operation if machineTeamsToBuild is 0
|
||||
// Note: The creation of machine teams should not depend on server teams:
|
||||
// No matter how server teams will be created, we will create the same set of machine teams;
|
||||
// We should never use server team number in building machine teams.
|
||||
//
|
||||
// Five steps to create each machine team, which are document in the function
|
||||
// Reuse ReplicationPolicy selectReplicas func to select machine team
|
||||
// return number of added machine teams
|
||||
int addBestMachineTeams(int machineTeamsToBuild);
|
||||
|
||||
// FIXME: Public for testing only
|
||||
// Sanity check the property of teams in unit test
|
||||
// Return true if all server teams belong to machine teams
|
||||
bool sanityCheckTeams() const;
|
||||
|
||||
// Create server teams based on machine teams
|
||||
// Before the number of machine teams reaches the threshold, build a machine team for each server team
|
||||
// When it reaches the threshold, first try to build a server team with existing machine teams; if failed,
|
||||
|
@ -642,11 +641,6 @@ public:
|
|||
|
||||
bool removeTeam(Reference<TCTeamInfo> team);
|
||||
|
||||
// FIXME: Public for testing only
|
||||
// Check if the server belongs to a machine; if not, create the machine.
|
||||
// Establish the two-direction link between server and machine
|
||||
Reference<TCMachineInfo> checkAndCreateMachine(Reference<TCServerInfo> server);
|
||||
|
||||
void removeTSS(UID removedServer);
|
||||
|
||||
void removeServer(UID removedServer);
|
||||
|
|
|
@ -865,7 +865,7 @@ ACTOR Future<Void> dataDistribution(Reference<DataDistributorData> self,
|
|||
}
|
||||
|
||||
bool ddEnabled = wait(isDataDistributionEnabled(cx, ddEnabledState));
|
||||
TraceEvent("DataDistributionMoveKeysConflict").detail("DataDistributionEnabled", ddEnabled).error(err);
|
||||
TraceEvent("DataDistributionMoveKeysConflict").error(err).detail("DataDistributionEnabled", ddEnabled);
|
||||
if (ddEnabled) {
|
||||
throw err;
|
||||
}
|
||||
|
@ -891,7 +891,7 @@ Future<Void> sendSnapReq(RequestStream<Req> stream, Req req, Error e) {
|
|||
ErrorOr<REPLY_TYPE(Req)> reply = wait(stream.tryGetReply(req));
|
||||
if (reply.isError()) {
|
||||
TraceEvent("SnapDataDistributor_ReqError")
|
||||
.error(reply.getError(), true)
|
||||
.errorUnsuppressed(reply.getError())
|
||||
.detail("ConvertedErrorType", e.what())
|
||||
.detail("Peer", stream.getEndpoint().getPrimaryAddress());
|
||||
throw e;
|
||||
|
@ -1012,9 +1012,9 @@ ACTOR Future<Void> ddSnapCreateCore(DistributorSnapRequest snapReq, Reference<As
|
|||
} catch (Error& err) {
|
||||
state Error e = err;
|
||||
TraceEvent("SnapDataDistributor_SnapReqExit")
|
||||
.errorUnsuppressed(e)
|
||||
.detail("SnapPayload", snapReq.snapPayload)
|
||||
.detail("SnapUID", snapReq.snapUID)
|
||||
.error(e, true /*includeCancelled */);
|
||||
.detail("SnapUID", snapReq.snapUID);
|
||||
if (e.code() == error_code_snap_storage_failed || e.code() == error_code_snap_tlog_failed ||
|
||||
e.code() == error_code_operation_cancelled || e.code() == error_code_snap_disable_tlog_pop_failed) {
|
||||
// enable tlog pop on local tlog nodes
|
||||
|
@ -1072,9 +1072,9 @@ ACTOR Future<Void> ddSnapCreate(DistributorSnapRequest snapReq,
|
|||
}
|
||||
} catch (Error& e) {
|
||||
TraceEvent("SnapDDCreateError")
|
||||
.errorUnsuppressed(e)
|
||||
.detail("SnapPayload", snapReq.snapPayload)
|
||||
.detail("SnapUID", snapReq.snapUID)
|
||||
.error(e, true /*includeCancelled */);
|
||||
.detail("SnapUID", snapReq.snapUID);
|
||||
if (e.code() != error_code_operation_cancelled) {
|
||||
snapReq.reply.sendError(e);
|
||||
} else {
|
||||
|
@ -1251,10 +1251,10 @@ ACTOR Future<Void> dataDistributor(DataDistributorInterface di, Reference<AsyncV
|
|||
}
|
||||
} catch (Error& err) {
|
||||
if (normalDataDistributorErrors().count(err.code()) == 0) {
|
||||
TraceEvent("DataDistributorError", di.id()).error(err, true);
|
||||
TraceEvent("DataDistributorError", di.id()).errorUnsuppressed(err);
|
||||
throw err;
|
||||
}
|
||||
TraceEvent("DataDistributorDied", di.id()).error(err, true);
|
||||
TraceEvent("DataDistributorDied", di.id()).errorUnsuppressed(err);
|
||||
}
|
||||
|
||||
return Void();
|
||||
|
|
|
@ -1265,10 +1265,12 @@ ACTOR Future<Void> dataDistributionRelocator(DDQueueData* self, RelocateData rd,
|
|||
}
|
||||
}
|
||||
} catch (Error& e) {
|
||||
TraceEvent(relocateShardInterval.end(), distributorId).error(e, true).detail("Duration", now() - startTime);
|
||||
TraceEvent(relocateShardInterval.end(), distributorId)
|
||||
.errorUnsuppressed(e)
|
||||
.detail("Duration", now() - startTime);
|
||||
if (now() - startTime > 600) {
|
||||
TraceEvent(SevWarnAlways, "RelocateShardTooLong")
|
||||
.error(e, true)
|
||||
.errorUnsuppressed(e)
|
||||
.detail("Duration", now() - startTime)
|
||||
.detail("Dest", describe(destIds))
|
||||
.detail("Src", describe(rd.src));
|
||||
|
@ -1540,8 +1542,8 @@ ACTOR Future<Void> BgDDMountainChopper(DDQueueData* self, int teamCollectionInde
|
|||
traceEvent.detail("ResetCount", resetCount);
|
||||
tr.reset();
|
||||
} catch (Error& e) {
|
||||
traceEvent.error(
|
||||
e, true); // Log actor_cancelled because it's not legal to suppress an event that's initialized
|
||||
// Log actor_cancelled because it's not legal to suppress an event that's initialized
|
||||
traceEvent.errorUnsuppressed(e);
|
||||
wait(tr.onError(e));
|
||||
}
|
||||
|
||||
|
@ -1655,8 +1657,8 @@ ACTOR Future<Void> BgDDValleyFiller(DDQueueData* self, int teamCollectionIndex)
|
|||
traceEvent.detail("ResetCount", resetCount);
|
||||
tr.reset();
|
||||
} catch (Error& e) {
|
||||
traceEvent.error(
|
||||
e, true); // Log actor_cancelled because it's not legal to suppress an event that's initialized
|
||||
// Log actor_cancelled because it's not legal to suppress an event that's initialized
|
||||
traceEvent.errorUnsuppressed(e);
|
||||
wait(tr.onError(e));
|
||||
}
|
||||
|
||||
|
|
|
@ -492,7 +492,9 @@ public:
|
|||
delete pageMem;
|
||||
TEST(true); // push error
|
||||
TEST(2 == syncFiles.size()); // push spanning both files error
|
||||
TraceEvent(SevError, "RDQPushAndCommitError", dbgid).error(e, true).detail("InitialFilename0", filename);
|
||||
TraceEvent(SevError, "RDQPushAndCommitError", dbgid)
|
||||
.errorUnsuppressed(e)
|
||||
.detail("InitialFilename0", filename);
|
||||
|
||||
if (errorPromise.canBeSet())
|
||||
errorPromise.sendError(e);
|
||||
|
@ -612,7 +614,7 @@ public:
|
|||
.detail("File0", self->filename(0));
|
||||
} catch (Error& e) {
|
||||
TraceEvent(SevError, "DiskQueueShutdownError", self->dbgid)
|
||||
.error(e, true)
|
||||
.errorUnsuppressed(e)
|
||||
.detail("Reason", e.code() == error_code_platform_error ? "could not delete database" : "unknown");
|
||||
error = e;
|
||||
}
|
||||
|
@ -731,7 +733,7 @@ public:
|
|||
} catch (Error& e) {
|
||||
bool ok = e.code() == error_code_file_not_found;
|
||||
TraceEvent(ok ? SevInfo : SevError, "RDQReadFirstAndLastPagesError", self->dbgid)
|
||||
.error(e, true)
|
||||
.errorUnsuppressed(e)
|
||||
.detail("File0Name", self->files[0].dbgFilename);
|
||||
if (!self->error.isSet())
|
||||
self->error.sendError(e);
|
||||
|
@ -804,7 +806,7 @@ public:
|
|||
} catch (Error& e) {
|
||||
TEST(true); // Read next page error
|
||||
TraceEvent(SevError, "RDQReadNextPageError", self->dbgid)
|
||||
.error(e, true)
|
||||
.errorUnsuppressed(e)
|
||||
.detail("File0Name", self->files[0].dbgFilename);
|
||||
if (!self->error.isSet())
|
||||
self->error.sendError(e);
|
||||
|
|
|
@ -58,7 +58,7 @@ ACTOR Future<Void> encryptKeyProxyServer(EncryptKeyProxyInterface ekpInterface,
|
|||
}
|
||||
}
|
||||
} catch (Error& e) {
|
||||
TraceEvent("EKP_Terminated", ekpInterface.id()).error(e, true);
|
||||
TraceEvent("EKP_Terminated", ekpInterface.id()).errorUnsuppressed(e);
|
||||
}
|
||||
|
||||
return Void();
|
||||
|
|
|
@ -991,7 +991,7 @@ ACTOR Future<Void> grvProxyServer(GrvProxyInterface proxy,
|
|||
state Future<Void> core = grvProxyServerCore(proxy, req.master, req.masterLifetime, db);
|
||||
wait(core || checkRemoved(db, req.recoveryCount, proxy));
|
||||
} catch (Error& e) {
|
||||
TraceEvent("GrvProxyTerminated", proxy.id()).error(e, true);
|
||||
TraceEvent("GrvProxyTerminated", proxy.id()).errorUnsuppressed(e);
|
||||
|
||||
if (e.code() != error_code_worker_removed && e.code() != error_code_tlog_stopped &&
|
||||
e.code() != error_code_tlog_failed && e.code() != error_code_coordinators_changed &&
|
||||
|
|
|
@ -634,7 +634,7 @@ private:
|
|||
} catch (Error& e) {
|
||||
bool ok = e.code() == error_code_operation_cancelled || e.code() == error_code_file_not_found ||
|
||||
e.code() == error_code_disk_adapter_reset;
|
||||
TraceEvent(ok ? SevInfo : SevError, "ErrorDuringRecovery", dbgid).error(e, true);
|
||||
TraceEvent(ok ? SevInfo : SevError, "ErrorDuringRecovery", dbgid).errorUnsuppressed(e);
|
||||
if (e.code() != error_code_disk_adapter_reset) {
|
||||
throw e;
|
||||
}
|
||||
|
|
|
@ -11,6 +11,8 @@
|
|||
#include <rocksdb/version.h>
|
||||
#include <rocksdb/utilities/table_properties_collectors.h>
|
||||
#include <rocksdb/rate_limiter.h>
|
||||
#include <rocksdb/perf_context.h>
|
||||
#include <rocksdb/c.h>
|
||||
#if defined __has_include
|
||||
#if __has_include(<liburing.h>)
|
||||
#include <liburing.h>
|
||||
|
@ -312,6 +314,271 @@ private:
|
|||
uint64_t iteratorsReuseCount;
|
||||
};
|
||||
|
||||
class PerfContextMetrics {
|
||||
public:
|
||||
PerfContextMetrics();
|
||||
void reset();
|
||||
void set(int index);
|
||||
void log(bool ignoreZeroMetric);
|
||||
|
||||
private:
|
||||
std::vector<std::tuple<const char*, int, std::vector<uint64_t>>> metrics;
|
||||
uint64_t getRocksdbPerfcontextMetric(int metric);
|
||||
};
|
||||
|
||||
PerfContextMetrics::PerfContextMetrics() {
|
||||
metrics = {
|
||||
{ "UserKeyComparisonCount", rocksdb_user_key_comparison_count, {} },
|
||||
{ "BlockCacheHitCount", rocksdb_block_cache_hit_count, {} },
|
||||
{ "BlockReadCount", rocksdb_block_read_count, {} },
|
||||
{ "BlockReadByte", rocksdb_block_read_byte, {} },
|
||||
{ "BlockReadTime", rocksdb_block_read_time, {} },
|
||||
{ "BlockChecksumTime", rocksdb_block_checksum_time, {} },
|
||||
{ "BlockDecompressTime", rocksdb_block_decompress_time, {} },
|
||||
{ "GetReadBytes", rocksdb_get_read_bytes, {} },
|
||||
{ "MultigetReadBytes", rocksdb_multiget_read_bytes, {} },
|
||||
{ "IterReadBytes", rocksdb_iter_read_bytes, {} },
|
||||
{ "InternalKeySkippedCount", rocksdb_internal_key_skipped_count, {} },
|
||||
{ "InternalDeleteSkippedCount", rocksdb_internal_delete_skipped_count, {} },
|
||||
{ "InternalRecentSkippedCount", rocksdb_internal_recent_skipped_count, {} },
|
||||
{ "InternalMergeCount", rocksdb_internal_merge_count, {} },
|
||||
{ "GetSnapshotTime", rocksdb_get_snapshot_time, {} },
|
||||
{ "GetFromMemtableTime", rocksdb_get_from_memtable_time, {} },
|
||||
{ "GetFromMemtableCount", rocksdb_get_from_memtable_count, {} },
|
||||
{ "GetPostProcessTime", rocksdb_get_post_process_time, {} },
|
||||
{ "GetFromOutputFilesTime", rocksdb_get_from_output_files_time, {} },
|
||||
{ "SeekOnMemtableTime", rocksdb_seek_on_memtable_time, {} },
|
||||
{ "SeekOnMemtableCount", rocksdb_seek_on_memtable_count, {} },
|
||||
{ "NextOnMemtableCount", rocksdb_next_on_memtable_count, {} },
|
||||
{ "PrevOnMemtableCount", rocksdb_prev_on_memtable_count, {} },
|
||||
{ "SeekChildSeekTime", rocksdb_seek_child_seek_time, {} },
|
||||
{ "SeekChildSeekCount", rocksdb_seek_child_seek_count, {} },
|
||||
{ "SeekMinHeapTime", rocksdb_seek_min_heap_time, {} },
|
||||
{ "SeekMaxHeapTime", rocksdb_seek_max_heap_time, {} },
|
||||
{ "SeekInternalSeekTime", rocksdb_seek_internal_seek_time, {} },
|
||||
{ "FindNextUserEntryTime", rocksdb_find_next_user_entry_time, {} },
|
||||
{ "WriteWalTime", rocksdb_write_wal_time, {} },
|
||||
{ "WriteMemtableTime", rocksdb_write_memtable_time, {} },
|
||||
{ "WriteDelayTime", rocksdb_write_delay_time, {} },
|
||||
{ "WritePreAndPostProcessTime", rocksdb_write_pre_and_post_process_time, {} },
|
||||
{ "DbMutexLockNanos", rocksdb_db_mutex_lock_nanos, {} },
|
||||
{ "DbConditionWaitNanos", rocksdb_db_condition_wait_nanos, {} },
|
||||
{ "MergeOperatorTimeNanos", rocksdb_merge_operator_time_nanos, {} },
|
||||
{ "ReadIndexBlockNanos", rocksdb_read_index_block_nanos, {} },
|
||||
{ "ReadFilterBlockNanos", rocksdb_read_filter_block_nanos, {} },
|
||||
{ "NewTableBlockIterNanos", rocksdb_new_table_block_iter_nanos, {} },
|
||||
{ "NewTableIteratorNanos", rocksdb_new_table_iterator_nanos, {} },
|
||||
{ "BlockSeekNanos", rocksdb_block_seek_nanos, {} },
|
||||
{ "FindTableNanos", rocksdb_find_table_nanos, {} },
|
||||
{ "BloomMemtableHitCount", rocksdb_bloom_memtable_hit_count, {} },
|
||||
{ "BloomMemtableMissCount", rocksdb_bloom_memtable_miss_count, {} },
|
||||
{ "BloomSstHitCount", rocksdb_bloom_sst_hit_count, {} },
|
||||
{ "BloomSstMissCount", rocksdb_bloom_sst_miss_count, {} },
|
||||
{ "KeyLockWaitTime", rocksdb_key_lock_wait_time, {} },
|
||||
{ "KeyLockWaitCount", rocksdb_key_lock_wait_count, {} },
|
||||
{ "EnvNewSequentialFileNanos", rocksdb_env_new_sequential_file_nanos, {} },
|
||||
{ "EnvNewRandomAccessFileNanos", rocksdb_env_new_random_access_file_nanos, {} },
|
||||
{ "EnvNewWritableFileNanos", rocksdb_env_new_writable_file_nanos, {} },
|
||||
{ "EnvReuseWritableFileNanos", rocksdb_env_reuse_writable_file_nanos, {} },
|
||||
{ "EnvNewRandomRwFileNanos", rocksdb_env_new_random_rw_file_nanos, {} },
|
||||
{ "EnvNewDirectoryNanos", rocksdb_env_new_directory_nanos, {} },
|
||||
{ "EnvFileExistsNanos", rocksdb_env_file_exists_nanos, {} },
|
||||
{ "EnvGetChildrenNanos", rocksdb_env_get_children_nanos, {} },
|
||||
{ "EnvGetChildrenFileAttributesNanos", rocksdb_env_get_children_file_attributes_nanos, {} },
|
||||
{ "EnvDeleteFileNanos", rocksdb_env_delete_file_nanos, {} },
|
||||
{ "EnvCreateDirNanos", rocksdb_env_create_dir_nanos, {} },
|
||||
{ "EnvCreateDirIfMissingNanos", rocksdb_env_create_dir_if_missing_nanos, {} },
|
||||
{ "EnvDeleteDirNanos", rocksdb_env_delete_dir_nanos, {} },
|
||||
{ "EnvGetFileSizeNanos", rocksdb_env_get_file_size_nanos, {} },
|
||||
{ "EnvGetFileModificationTimeNanos", rocksdb_env_get_file_modification_time_nanos, {} },
|
||||
{ "EnvRenameFileNanos", rocksdb_env_rename_file_nanos, {} },
|
||||
{ "EnvLinkFileNanos", rocksdb_env_link_file_nanos, {} },
|
||||
{ "EnvLockFileNanos", rocksdb_env_lock_file_nanos, {} },
|
||||
{ "EnvUnlockFileNanos", rocksdb_env_unlock_file_nanos, {} },
|
||||
{ "EnvNewLoggerNanos", rocksdb_env_new_logger_nanos, {} },
|
||||
};
|
||||
for (auto& [name, metric, vals] : metrics) { // readers, then writer
|
||||
for (int i = 0; i < SERVER_KNOBS->ROCKSDB_READ_PARALLELISM; i++) {
|
||||
vals.push_back(0); // add reader
|
||||
}
|
||||
vals.push_back(0); // add writer
|
||||
}
|
||||
}
|
||||
|
||||
void PerfContextMetrics::reset() {
|
||||
rocksdb::get_perf_context()->Reset();
|
||||
}
|
||||
|
||||
void PerfContextMetrics::set(int index) {
|
||||
for (auto& [name, metric, vals] : metrics) {
|
||||
vals[index] = getRocksdbPerfcontextMetric(metric);
|
||||
}
|
||||
}
|
||||
|
||||
void PerfContextMetrics::log(bool ignoreZeroMetric) {
|
||||
TraceEvent e("RocksDBPerfContextMetrics");
|
||||
e.setMaxEventLength(20000);
|
||||
for (auto& [name, metric, vals] : metrics) {
|
||||
uint64_t s = 0;
|
||||
for (auto& v : vals) {
|
||||
s = s + v;
|
||||
}
|
||||
if (ignoreZeroMetric && s == 0)
|
||||
continue;
|
||||
e.detail("Sum" + (std::string)name, s);
|
||||
for (int i = 0; i < SERVER_KNOBS->ROCKSDB_READ_PARALLELISM; i++) {
|
||||
if (vals[i] != 0)
|
||||
e.detail("RD" + std::to_string(i) + name, vals[i]);
|
||||
}
|
||||
if (vals[SERVER_KNOBS->ROCKSDB_READ_PARALLELISM] != 0)
|
||||
e.detail("WR" + (std::string)name, vals[SERVER_KNOBS->ROCKSDB_READ_PARALLELISM]);
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t PerfContextMetrics::getRocksdbPerfcontextMetric(int metric) {
|
||||
switch (metric) {
|
||||
case rocksdb_user_key_comparison_count:
|
||||
return rocksdb::get_perf_context()->user_key_comparison_count;
|
||||
case rocksdb_block_cache_hit_count:
|
||||
return rocksdb::get_perf_context()->block_cache_hit_count;
|
||||
case rocksdb_block_read_count:
|
||||
return rocksdb::get_perf_context()->block_read_count;
|
||||
case rocksdb_block_read_byte:
|
||||
return rocksdb::get_perf_context()->block_read_byte;
|
||||
case rocksdb_block_read_time:
|
||||
return rocksdb::get_perf_context()->block_read_time;
|
||||
case rocksdb_block_checksum_time:
|
||||
return rocksdb::get_perf_context()->block_checksum_time;
|
||||
case rocksdb_block_decompress_time:
|
||||
return rocksdb::get_perf_context()->block_decompress_time;
|
||||
case rocksdb_get_read_bytes:
|
||||
return rocksdb::get_perf_context()->get_read_bytes;
|
||||
case rocksdb_multiget_read_bytes:
|
||||
return rocksdb::get_perf_context()->multiget_read_bytes;
|
||||
case rocksdb_iter_read_bytes:
|
||||
return rocksdb::get_perf_context()->iter_read_bytes;
|
||||
case rocksdb_internal_key_skipped_count:
|
||||
return rocksdb::get_perf_context()->internal_key_skipped_count;
|
||||
case rocksdb_internal_delete_skipped_count:
|
||||
return rocksdb::get_perf_context()->internal_delete_skipped_count;
|
||||
case rocksdb_internal_recent_skipped_count:
|
||||
return rocksdb::get_perf_context()->internal_recent_skipped_count;
|
||||
case rocksdb_internal_merge_count:
|
||||
return rocksdb::get_perf_context()->internal_merge_count;
|
||||
case rocksdb_get_snapshot_time:
|
||||
return rocksdb::get_perf_context()->get_snapshot_time;
|
||||
case rocksdb_get_from_memtable_time:
|
||||
return rocksdb::get_perf_context()->get_from_memtable_time;
|
||||
case rocksdb_get_from_memtable_count:
|
||||
return rocksdb::get_perf_context()->get_from_memtable_count;
|
||||
case rocksdb_get_post_process_time:
|
||||
return rocksdb::get_perf_context()->get_post_process_time;
|
||||
case rocksdb_get_from_output_files_time:
|
||||
return rocksdb::get_perf_context()->get_from_output_files_time;
|
||||
case rocksdb_seek_on_memtable_time:
|
||||
return rocksdb::get_perf_context()->seek_on_memtable_time;
|
||||
case rocksdb_seek_on_memtable_count:
|
||||
return rocksdb::get_perf_context()->seek_on_memtable_count;
|
||||
case rocksdb_next_on_memtable_count:
|
||||
return rocksdb::get_perf_context()->next_on_memtable_count;
|
||||
case rocksdb_prev_on_memtable_count:
|
||||
return rocksdb::get_perf_context()->prev_on_memtable_count;
|
||||
case rocksdb_seek_child_seek_time:
|
||||
return rocksdb::get_perf_context()->seek_child_seek_time;
|
||||
case rocksdb_seek_child_seek_count:
|
||||
return rocksdb::get_perf_context()->seek_child_seek_count;
|
||||
case rocksdb_seek_min_heap_time:
|
||||
return rocksdb::get_perf_context()->seek_min_heap_time;
|
||||
case rocksdb_seek_max_heap_time:
|
||||
return rocksdb::get_perf_context()->seek_max_heap_time;
|
||||
case rocksdb_seek_internal_seek_time:
|
||||
return rocksdb::get_perf_context()->seek_internal_seek_time;
|
||||
case rocksdb_find_next_user_entry_time:
|
||||
return rocksdb::get_perf_context()->find_next_user_entry_time;
|
||||
case rocksdb_write_wal_time:
|
||||
return rocksdb::get_perf_context()->write_wal_time;
|
||||
case rocksdb_write_memtable_time:
|
||||
return rocksdb::get_perf_context()->write_memtable_time;
|
||||
case rocksdb_write_delay_time:
|
||||
return rocksdb::get_perf_context()->write_delay_time;
|
||||
case rocksdb_write_pre_and_post_process_time:
|
||||
return rocksdb::get_perf_context()->write_pre_and_post_process_time;
|
||||
case rocksdb_db_mutex_lock_nanos:
|
||||
return rocksdb::get_perf_context()->db_mutex_lock_nanos;
|
||||
case rocksdb_db_condition_wait_nanos:
|
||||
return rocksdb::get_perf_context()->db_condition_wait_nanos;
|
||||
case rocksdb_merge_operator_time_nanos:
|
||||
return rocksdb::get_perf_context()->merge_operator_time_nanos;
|
||||
case rocksdb_read_index_block_nanos:
|
||||
return rocksdb::get_perf_context()->read_index_block_nanos;
|
||||
case rocksdb_read_filter_block_nanos:
|
||||
return rocksdb::get_perf_context()->read_filter_block_nanos;
|
||||
case rocksdb_new_table_block_iter_nanos:
|
||||
return rocksdb::get_perf_context()->new_table_block_iter_nanos;
|
||||
case rocksdb_new_table_iterator_nanos:
|
||||
return rocksdb::get_perf_context()->new_table_iterator_nanos;
|
||||
case rocksdb_block_seek_nanos:
|
||||
return rocksdb::get_perf_context()->block_seek_nanos;
|
||||
case rocksdb_find_table_nanos:
|
||||
return rocksdb::get_perf_context()->find_table_nanos;
|
||||
case rocksdb_bloom_memtable_hit_count:
|
||||
return rocksdb::get_perf_context()->bloom_memtable_hit_count;
|
||||
case rocksdb_bloom_memtable_miss_count:
|
||||
return rocksdb::get_perf_context()->bloom_memtable_miss_count;
|
||||
case rocksdb_bloom_sst_hit_count:
|
||||
return rocksdb::get_perf_context()->bloom_sst_hit_count;
|
||||
case rocksdb_bloom_sst_miss_count:
|
||||
return rocksdb::get_perf_context()->bloom_sst_miss_count;
|
||||
case rocksdb_key_lock_wait_time:
|
||||
return rocksdb::get_perf_context()->key_lock_wait_time;
|
||||
case rocksdb_key_lock_wait_count:
|
||||
return rocksdb::get_perf_context()->key_lock_wait_count;
|
||||
case rocksdb_env_new_sequential_file_nanos:
|
||||
return rocksdb::get_perf_context()->env_new_sequential_file_nanos;
|
||||
case rocksdb_env_new_random_access_file_nanos:
|
||||
return rocksdb::get_perf_context()->env_new_random_access_file_nanos;
|
||||
case rocksdb_env_new_writable_file_nanos:
|
||||
return rocksdb::get_perf_context()->env_new_writable_file_nanos;
|
||||
case rocksdb_env_reuse_writable_file_nanos:
|
||||
return rocksdb::get_perf_context()->env_reuse_writable_file_nanos;
|
||||
case rocksdb_env_new_random_rw_file_nanos:
|
||||
return rocksdb::get_perf_context()->env_new_random_rw_file_nanos;
|
||||
case rocksdb_env_new_directory_nanos:
|
||||
return rocksdb::get_perf_context()->env_new_directory_nanos;
|
||||
case rocksdb_env_file_exists_nanos:
|
||||
return rocksdb::get_perf_context()->env_file_exists_nanos;
|
||||
case rocksdb_env_get_children_nanos:
|
||||
return rocksdb::get_perf_context()->env_get_children_nanos;
|
||||
case rocksdb_env_get_children_file_attributes_nanos:
|
||||
return rocksdb::get_perf_context()->env_get_children_file_attributes_nanos;
|
||||
case rocksdb_env_delete_file_nanos:
|
||||
return rocksdb::get_perf_context()->env_delete_file_nanos;
|
||||
case rocksdb_env_create_dir_nanos:
|
||||
return rocksdb::get_perf_context()->env_create_dir_nanos;
|
||||
case rocksdb_env_create_dir_if_missing_nanos:
|
||||
return rocksdb::get_perf_context()->env_create_dir_if_missing_nanos;
|
||||
case rocksdb_env_delete_dir_nanos:
|
||||
return rocksdb::get_perf_context()->env_delete_dir_nanos;
|
||||
case rocksdb_env_get_file_size_nanos:
|
||||
return rocksdb::get_perf_context()->env_get_file_size_nanos;
|
||||
case rocksdb_env_get_file_modification_time_nanos:
|
||||
return rocksdb::get_perf_context()->env_get_file_modification_time_nanos;
|
||||
case rocksdb_env_rename_file_nanos:
|
||||
return rocksdb::get_perf_context()->env_rename_file_nanos;
|
||||
case rocksdb_env_link_file_nanos:
|
||||
return rocksdb::get_perf_context()->env_link_file_nanos;
|
||||
case rocksdb_env_lock_file_nanos:
|
||||
return rocksdb::get_perf_context()->env_lock_file_nanos;
|
||||
case rocksdb_env_unlock_file_nanos:
|
||||
return rocksdb::get_perf_context()->env_unlock_file_nanos;
|
||||
case rocksdb_env_new_logger_nanos:
|
||||
return rocksdb::get_perf_context()->env_new_logger_nanos;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
ACTOR Future<Void> refreshReadIteratorPool(std::shared_ptr<ReadIteratorPool> readIterPool) {
|
||||
if (SERVER_KNOBS->ROCKSDB_READ_RANGE_REUSE_ITERATORS) {
|
||||
loop {
|
||||
|
@ -336,6 +603,7 @@ ACTOR Future<Void> flowLockLogger(const FlowLock* readLock, const FlowLock* fetc
|
|||
}
|
||||
|
||||
ACTOR Future<Void> rocksDBMetricLogger(std::shared_ptr<rocksdb::Statistics> statistics,
|
||||
std::shared_ptr<PerfContextMetrics> perfContextMetrics,
|
||||
rocksdb::DB* db,
|
||||
std::shared_ptr<ReadIteratorPool> readIterPool) {
|
||||
state std::vector<std::tuple<const char*, uint32_t, uint64_t>> tickerStats = {
|
||||
|
@ -431,6 +699,10 @@ ACTOR Future<Void> rocksDBMetricLogger(std::shared_ptr<rocksdb::Statistics> stat
|
|||
stat = readIterPool->numTimesReadIteratorsReused();
|
||||
e.detail("NumTimesReadIteratorsReused", stat - readIteratorPoolStats["NumTimesReadIteratorsReused"]);
|
||||
readIteratorPoolStats["NumTimesReadIteratorsReused"] = stat;
|
||||
|
||||
if (SERVER_KNOBS->ROCKSDB_PERFCONTEXT_ENABLE) {
|
||||
perfContextMetrics->log(true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -458,6 +730,7 @@ struct RocksDBKeyValueStore : IKeyValueStore {
|
|||
|
||||
struct Writer : IThreadPoolReceiver {
|
||||
DB& db;
|
||||
|
||||
UID id;
|
||||
std::shared_ptr<rocksdb::RateLimiter> rateLimiter;
|
||||
Reference<Histogram> commitLatencyHistogram;
|
||||
|
@ -466,9 +739,16 @@ struct RocksDBKeyValueStore : IKeyValueStore {
|
|||
Reference<Histogram> writeHistogram;
|
||||
Reference<Histogram> deleteCompactRangeHistogram;
|
||||
std::shared_ptr<ReadIteratorPool> readIterPool;
|
||||
std::shared_ptr<PerfContextMetrics> perfContextMetrics;
|
||||
int threadIndex;
|
||||
|
||||
explicit Writer(DB& db, UID id, std::shared_ptr<ReadIteratorPool> readIterPool)
|
||||
: db(db), id(id), readIterPool(readIterPool),
|
||||
explicit Writer(DB& db,
|
||||
UID id,
|
||||
std::shared_ptr<ReadIteratorPool> readIterPool,
|
||||
std::shared_ptr<PerfContextMetrics> perfContextMetrics,
|
||||
int threadIndex)
|
||||
: db(db), id(id), readIterPool(readIterPool), perfContextMetrics(perfContextMetrics),
|
||||
threadIndex(threadIndex),
|
||||
rateLimiter(SERVER_KNOBS->ROCKSDB_WRITE_RATE_LIMITER_BYTES_PER_SEC > 0
|
||||
? rocksdb::NewGenericRateLimiter(
|
||||
SERVER_KNOBS->ROCKSDB_WRITE_RATE_LIMITER_BYTES_PER_SEC, // rate_bytes_per_sec
|
||||
|
@ -491,7 +771,13 @@ struct RocksDBKeyValueStore : IKeyValueStore {
|
|||
Histogram::Unit::microseconds)),
|
||||
deleteCompactRangeHistogram(Histogram::getHistogram(ROCKSDBSTORAGE_HISTOGRAM_GROUP,
|
||||
ROCKSDB_DELETE_COMPACTRANGE_HISTOGRAM,
|
||||
Histogram::Unit::microseconds)) {}
|
||||
Histogram::Unit::microseconds)) {
|
||||
if (SERVER_KNOBS->ROCKSDB_PERFCONTEXT_ENABLE) {
|
||||
// Enable perf context on the same thread with the db thread
|
||||
rocksdb::SetPerfLevel(rocksdb::PerfLevel::kEnableTimeExceptForMutex);
|
||||
perfContextMetrics->reset();
|
||||
}
|
||||
}
|
||||
|
||||
~Writer() override {
|
||||
if (db) {
|
||||
|
@ -542,11 +828,11 @@ struct RocksDBKeyValueStore : IKeyValueStore {
|
|||
// The current thread and main thread are same when the code runs in simulation.
|
||||
// blockUntilReady() is getting the thread into deadlock state, so directly calling
|
||||
// the metricsLogger.
|
||||
a.metrics = rocksDBMetricLogger(options.statistics, db, readIterPool) &&
|
||||
a.metrics = rocksDBMetricLogger(options.statistics, perfContextMetrics, db, readIterPool) &&
|
||||
flowLockLogger(a.readLock, a.fetchLock) && refreshReadIteratorPool(readIterPool);
|
||||
} else {
|
||||
onMainThread([&] {
|
||||
a.metrics = rocksDBMetricLogger(options.statistics, db, readIterPool) &&
|
||||
a.metrics = rocksDBMetricLogger(options.statistics, perfContextMetrics, db, readIterPool) &&
|
||||
flowLockLogger(a.readLock, a.fetchLock) && refreshReadIteratorPool(readIterPool);
|
||||
return Future<bool>(true);
|
||||
}).blockUntilReady();
|
||||
|
@ -586,6 +872,12 @@ struct RocksDBKeyValueStore : IKeyValueStore {
|
|||
}
|
||||
};
|
||||
void action(CommitAction& a) {
|
||||
bool doPerfContextMetrics =
|
||||
SERVER_KNOBS->ROCKSDB_PERFCONTEXT_ENABLE &&
|
||||
(deterministicRandom()->random01() < SERVER_KNOBS->ROCKSDB_PERFCONTEXT_SAMPLE_RATE);
|
||||
if (doPerfContextMetrics) {
|
||||
perfContextMetrics->reset();
|
||||
}
|
||||
double commitBeginTime;
|
||||
if (a.getHistograms) {
|
||||
commitBeginTime = timer_monotonic();
|
||||
|
@ -632,6 +924,9 @@ struct RocksDBKeyValueStore : IKeyValueStore {
|
|||
commitActionHistogram->sampleSeconds(currTime - commitBeginTime);
|
||||
commitLatencyHistogram->sampleSeconds(currTime - a.startTime);
|
||||
}
|
||||
if (doPerfContextMetrics) {
|
||||
perfContextMetrics->set(threadIndex);
|
||||
}
|
||||
}
|
||||
|
||||
struct CloseAction : TypedAction<Writer, CloseAction> {
|
||||
|
@ -684,9 +979,14 @@ struct RocksDBKeyValueStore : IKeyValueStore {
|
|||
Reference<Histogram> readValueGetHistogram;
|
||||
Reference<Histogram> readPrefixGetHistogram;
|
||||
std::shared_ptr<ReadIteratorPool> readIterPool;
|
||||
std::shared_ptr<PerfContextMetrics> perfContextMetrics;
|
||||
int threadIndex;
|
||||
|
||||
explicit Reader(DB& db, std::shared_ptr<ReadIteratorPool> readIterPool)
|
||||
: db(db), readIterPool(readIterPool),
|
||||
explicit Reader(DB& db,
|
||||
std::shared_ptr<ReadIteratorPool> readIterPool,
|
||||
std::shared_ptr<PerfContextMetrics> perfContextMetrics,
|
||||
int threadIndex)
|
||||
: db(db), readIterPool(readIterPool), perfContextMetrics(perfContextMetrics), threadIndex(threadIndex),
|
||||
readRangeLatencyHistogram(Histogram::getHistogram(ROCKSDBSTORAGE_HISTOGRAM_GROUP,
|
||||
ROCKSDB_READRANGE_LATENCY_HISTOGRAM,
|
||||
Histogram::Unit::microseconds)),
|
||||
|
@ -734,6 +1034,11 @@ struct RocksDBKeyValueStore : IKeyValueStore {
|
|||
readValuePrefixTimeout = SERVER_KNOBS->ROCKSDB_READ_VALUE_PREFIX_TIMEOUT;
|
||||
readRangeTimeout = SERVER_KNOBS->ROCKSDB_READ_RANGE_TIMEOUT;
|
||||
}
|
||||
if (SERVER_KNOBS->ROCKSDB_PERFCONTEXT_ENABLE) {
|
||||
// Enable perf context on the same thread with the db thread
|
||||
rocksdb::SetPerfLevel(rocksdb::PerfLevel::kEnableTimeExceptForMutex);
|
||||
perfContextMetrics->reset();
|
||||
}
|
||||
}
|
||||
|
||||
void init() override {}
|
||||
|
@ -752,6 +1057,12 @@ struct RocksDBKeyValueStore : IKeyValueStore {
|
|||
double getTimeEstimate() const override { return SERVER_KNOBS->READ_VALUE_TIME_ESTIMATE; }
|
||||
};
|
||||
void action(ReadValueAction& a) {
|
||||
bool doPerfContextMetrics =
|
||||
SERVER_KNOBS->ROCKSDB_PERFCONTEXT_ENABLE &&
|
||||
(deterministicRandom()->random01() < SERVER_KNOBS->ROCKSDB_PERFCONTEXT_SAMPLE_RATE);
|
||||
if (doPerfContextMetrics) {
|
||||
perfContextMetrics->reset();
|
||||
}
|
||||
double readBeginTime = timer_monotonic();
|
||||
if (a.getHistograms) {
|
||||
readValueQueueWaitHistogram->sampleSeconds(readBeginTime - a.startTime);
|
||||
|
@ -801,6 +1112,9 @@ struct RocksDBKeyValueStore : IKeyValueStore {
|
|||
readValueActionHistogram->sampleSeconds(currTime - readBeginTime);
|
||||
readValueLatencyHistogram->sampleSeconds(currTime - a.startTime);
|
||||
}
|
||||
if (doPerfContextMetrics) {
|
||||
perfContextMetrics->set(threadIndex);
|
||||
}
|
||||
}
|
||||
|
||||
struct ReadValuePrefixAction : TypedAction<Reader, ReadValuePrefixAction> {
|
||||
|
@ -818,6 +1132,12 @@ struct RocksDBKeyValueStore : IKeyValueStore {
|
|||
double getTimeEstimate() const override { return SERVER_KNOBS->READ_VALUE_TIME_ESTIMATE; }
|
||||
};
|
||||
void action(ReadValuePrefixAction& a) {
|
||||
bool doPerfContextMetrics =
|
||||
SERVER_KNOBS->ROCKSDB_PERFCONTEXT_ENABLE &&
|
||||
(deterministicRandom()->random01() < SERVER_KNOBS->ROCKSDB_PERFCONTEXT_SAMPLE_RATE);
|
||||
if (doPerfContextMetrics) {
|
||||
perfContextMetrics->reset();
|
||||
}
|
||||
double readBeginTime = timer_monotonic();
|
||||
if (a.getHistograms) {
|
||||
readPrefixQueueWaitHistogram->sampleSeconds(readBeginTime - a.startTime);
|
||||
|
@ -871,6 +1191,9 @@ struct RocksDBKeyValueStore : IKeyValueStore {
|
|||
readPrefixActionHistogram->sampleSeconds(currTime - readBeginTime);
|
||||
readPrefixLatencyHistogram->sampleSeconds(currTime - a.startTime);
|
||||
}
|
||||
if (doPerfContextMetrics) {
|
||||
perfContextMetrics->set(threadIndex);
|
||||
}
|
||||
}
|
||||
|
||||
struct ReadRangeAction : TypedAction<Reader, ReadRangeAction>, FastAllocated<ReadRangeAction> {
|
||||
|
@ -887,6 +1210,12 @@ struct RocksDBKeyValueStore : IKeyValueStore {
|
|||
double getTimeEstimate() const override { return SERVER_KNOBS->READ_RANGE_TIME_ESTIMATE; }
|
||||
};
|
||||
void action(ReadRangeAction& a) {
|
||||
bool doPerfContextMetrics =
|
||||
SERVER_KNOBS->ROCKSDB_PERFCONTEXT_ENABLE &&
|
||||
(deterministicRandom()->random01() < SERVER_KNOBS->ROCKSDB_PERFCONTEXT_SAMPLE_RATE);
|
||||
if (doPerfContextMetrics) {
|
||||
perfContextMetrics->reset();
|
||||
}
|
||||
double readBeginTime = timer_monotonic();
|
||||
if (a.getHistograms) {
|
||||
readRangeQueueWaitHistogram->sampleSeconds(readBeginTime - a.startTime);
|
||||
|
@ -983,10 +1312,14 @@ struct RocksDBKeyValueStore : IKeyValueStore {
|
|||
readRangeActionHistogram->sampleSeconds(currTime - readBeginTime);
|
||||
readRangeLatencyHistogram->sampleSeconds(currTime - a.startTime);
|
||||
}
|
||||
if (doPerfContextMetrics) {
|
||||
perfContextMetrics->set(threadIndex);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
DB db = nullptr;
|
||||
std::shared_ptr<PerfContextMetrics> perfContextMetrics;
|
||||
std::string path;
|
||||
UID id;
|
||||
Reference<IThreadPool> writeThread;
|
||||
|
@ -1015,7 +1348,7 @@ struct RocksDBKeyValueStore : IKeyValueStore {
|
|||
Counters counters;
|
||||
|
||||
explicit RocksDBKeyValueStore(const std::string& path, UID id)
|
||||
: path(path), id(id), readIterPool(new ReadIteratorPool(db, path)),
|
||||
: path(path), id(id), perfContextMetrics(new PerfContextMetrics()), readIterPool(new ReadIteratorPool(db, path)),
|
||||
readSemaphore(SERVER_KNOBS->ROCKSDB_READ_QUEUE_SOFT_MAX),
|
||||
fetchSemaphore(SERVER_KNOBS->ROCKSDB_FETCH_QUEUE_SOFT_MAX),
|
||||
numReadWaiters(SERVER_KNOBS->ROCKSDB_READ_QUEUE_HARD_MAX - SERVER_KNOBS->ROCKSDB_READ_QUEUE_SOFT_MAX),
|
||||
|
@ -1038,10 +1371,12 @@ struct RocksDBKeyValueStore : IKeyValueStore {
|
|||
writeThread = createGenericThreadPool();
|
||||
readThreads = createGenericThreadPool();
|
||||
}
|
||||
writeThread->addThread(new Writer(db, id, readIterPool), "fdb-rocksdb-wr");
|
||||
writeThread->addThread(
|
||||
new Writer(db, id, readIterPool, perfContextMetrics, SERVER_KNOBS->ROCKSDB_READ_PARALLELISM),
|
||||
"fdb-rocksdb-wr");
|
||||
TraceEvent("RocksDBReadThreads").detail("KnobRocksDBReadParallelism", SERVER_KNOBS->ROCKSDB_READ_PARALLELISM);
|
||||
for (unsigned i = 0; i < SERVER_KNOBS->ROCKSDB_READ_PARALLELISM; ++i) {
|
||||
readThreads->addThread(new Reader(db, readIterPool), "fdb-rocksdb-re");
|
||||
readThreads->addThread(new Reader(db, readIterPool, perfContextMetrics, i), "fdb-rocksdb-re");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
*/
|
||||
|
||||
#define SQLITE_THREADSAFE 0 // also in sqlite3.amalgamation.c!
|
||||
#include "contrib/fmt-8.0.1/include/fmt/format.h"
|
||||
#include "flow/crc32c.h"
|
||||
#include "fdbserver/IKeyValueStore.h"
|
||||
#include "fdbserver/CoroFlow.h"
|
||||
|
@ -2061,8 +2062,8 @@ private:
|
|||
}
|
||||
} catch (Error& e) {
|
||||
TraceEvent(SevError, "KVDoCloseError", self->logID)
|
||||
.errorUnsuppressed(e)
|
||||
.detail("Filename", self->filename)
|
||||
.error(e, true)
|
||||
.detail("Reason", e.code() == error_code_platform_error ? "could not delete database" : "unknown");
|
||||
error = e;
|
||||
}
|
||||
|
@ -2359,7 +2360,7 @@ ACTOR Future<Void> KVFileDump(std::string filename) {
|
|||
k = keyAfter(kv[kv.size() - 1].key);
|
||||
}
|
||||
fflush(stdout);
|
||||
fprintf(stderr, "Counted: %ld\n", count);
|
||||
fmt::print(stderr, "Counted: {}\n", count);
|
||||
|
||||
if (store->getError().isError())
|
||||
wait(store->getError());
|
||||
|
|
|
@ -515,6 +515,8 @@ Future<Void> logRouterPeekMessages(PromiseType replyPromise,
|
|||
wait(delay(SERVER_KNOBS->TLOG_PEEK_DELAY, g_network->getCurrentTask()));
|
||||
}
|
||||
|
||||
state double startTime = now();
|
||||
|
||||
Version poppedVer = poppedVersion(self, reqTag);
|
||||
|
||||
if (poppedVer > reqBegin || reqBegin < self->startVersion) {
|
||||
|
@ -535,8 +537,33 @@ Future<Void> logRouterPeekMessages(PromiseType replyPromise,
|
|||
return Void();
|
||||
}
|
||||
|
||||
Version endVersion = self->version.get() + 1;
|
||||
peekMessagesFromMemory(self, reqTag, reqBegin, messages, endVersion);
|
||||
state Version endVersion;
|
||||
// Run the peek logic in a loop to account for the case where there is no data to return to the caller, and we may
|
||||
// want to wait a little bit instead of just sending back an empty message. This feature is controlled by a knob.
|
||||
loop {
|
||||
endVersion = self->version.get() + 1;
|
||||
peekMessagesFromMemory(self, reqTag, reqBegin, messages, endVersion);
|
||||
|
||||
// Reply the peek request when
|
||||
// - Have data return to the caller, or
|
||||
// - Batching empty peek is disabled, or
|
||||
// - Batching empty peek interval has been reached.
|
||||
if (messages.getLength() > 0 || !SERVER_KNOBS->PEEK_BATCHING_EMPTY_MSG ||
|
||||
now() - startTime > SERVER_KNOBS->PEEK_BATCHING_EMPTY_MSG_INTERVAL) {
|
||||
break;
|
||||
}
|
||||
|
||||
state Version waitUntilVersion = self->version.get() + 1;
|
||||
|
||||
// Currently, from `reqBegin` to self->version are all empty peeks. Wait for more version, or the empty batching
|
||||
// interval has expired.
|
||||
wait(self->version.whenAtLeast(waitUntilVersion) ||
|
||||
delay(SERVER_KNOBS->PEEK_BATCHING_EMPTY_MSG_INTERVAL - (now() - startTime)));
|
||||
if (self->version.get() < waitUntilVersion) {
|
||||
break; // We know that from `reqBegin` to self->version are all empty messages. Skip re-executing the peek
|
||||
// logic.
|
||||
}
|
||||
}
|
||||
|
||||
TLogPeekReply reply;
|
||||
reply.maxKnownVersion = self->version.get();
|
||||
|
@ -600,8 +627,8 @@ ACTOR Future<Void> logRouterPeekStream(LogRouterData* self, TLogPeekStreamReques
|
|||
} catch (Error& e) {
|
||||
self->activePeekStreams--;
|
||||
TraceEvent(SevDebug, "TLogPeekStreamEnd", self->dbgid)
|
||||
.detail("PeerAddr", req.reply.getEndpoint().getPrimaryAddress())
|
||||
.error(e, true);
|
||||
.errorUnsuppressed(e)
|
||||
.detail("PeerAddr", req.reply.getEndpoint().getPrimaryAddress());
|
||||
|
||||
if (e.code() == error_code_end_of_stream || e.code() == error_code_operation_obsolete) {
|
||||
req.reply.sendError(e);
|
||||
|
@ -737,7 +764,7 @@ ACTOR Future<Void> logRouter(TLogInterface interf,
|
|||
}
|
||||
} catch (Error& e) {
|
||||
if (e.code() == error_code_actor_cancelled || e.code() == error_code_worker_removed) {
|
||||
TraceEvent("LogRouterTerminated", interf.id()).error(e, true);
|
||||
TraceEvent("LogRouterTerminated", interf.id()).errorUnsuppressed(e);
|
||||
return Void();
|
||||
}
|
||||
throw;
|
||||
|
|
|
@ -367,7 +367,7 @@ ACTOR Future<Void> serverPeekStreamGetMore(ILogSystem::ServerPeekCursor* self, T
|
|||
}
|
||||
}
|
||||
} catch (Error& e) {
|
||||
DisabledTraceEvent(SevDebug, "SPC_GetMoreB_Error", self->randomID).error(e, true);
|
||||
DisabledTraceEvent(SevDebug, "SPC_GetMoreB_Error", self->randomID).errorUnsuppressed(e);
|
||||
if (e.code() == error_code_connection_failed || e.code() == error_code_operation_obsolete) {
|
||||
// NOTE: delay in order to avoid the endless retry loop block other tasks
|
||||
self->peekReplyStream.reset();
|
||||
|
|
|
@ -558,7 +558,7 @@ ACTOR static Future<Void> startMoveKeys(Database occ,
|
|||
.detail("Shards", shards)
|
||||
.detail("MaxRetries", maxRetries);
|
||||
} catch (Error& e) {
|
||||
TraceEvent(SevDebug, interval.end(), relocationIntervalId).error(e, true);
|
||||
TraceEvent(SevDebug, interval.end(), relocationIntervalId).errorUnsuppressed(e);
|
||||
throw;
|
||||
}
|
||||
|
||||
|
@ -992,7 +992,7 @@ ACTOR static Future<Void> finishMoveKeys(Database occ,
|
|||
|
||||
TraceEvent(SevDebug, interval.end(), relocationIntervalId);
|
||||
} catch (Error& e) {
|
||||
TraceEvent(SevDebug, interval.end(), relocationIntervalId).error(e, true);
|
||||
TraceEvent(SevDebug, interval.end(), relocationIntervalId).errorUnsuppressed(e);
|
||||
throw;
|
||||
}
|
||||
return Void();
|
||||
|
@ -1151,7 +1151,7 @@ ACTOR Future<std::pair<Version, Tag>> addStorageServer(Database cx, StorageServe
|
|||
tr->addReadConflictRange(conflictRange);
|
||||
tr->addWriteConflictRange(conflictRange);
|
||||
|
||||
StorageMetadataType metadata(timer_int());
|
||||
StorageMetadataType metadata(StorageMetadataType::currentTime());
|
||||
metadataMap.set(tr, server.id(), metadata);
|
||||
|
||||
if (SERVER_KNOBS->TSS_HACK_IDENTITY_MAPPING) {
|
||||
|
@ -1521,7 +1521,7 @@ void seedShardServers(Arena& arena, CommitTransactionRef& tr, std::vector<Storag
|
|||
tr.read_conflict_ranges.push_back_deep(arena, allKeys);
|
||||
KeyBackedObjectMap<UID, StorageMetadataType, decltype(IncludeVersion())> metadataMap(serverMetadataKeys.begin,
|
||||
IncludeVersion());
|
||||
StorageMetadataType metadata(timer_int());
|
||||
StorageMetadataType metadata(StorageMetadataType::currentTime());
|
||||
|
||||
for (auto& s : servers) {
|
||||
tr.set(arena, serverTagKeyFor(s.id()), serverTagValue(server_tag[s.id()]));
|
||||
|
|
|
@ -1161,8 +1161,8 @@ ACTOR Future<Void> tLogPeekStream(TLogData* self, TLogPeekStreamRequest req, Ref
|
|||
} catch (Error& e) {
|
||||
self->activePeekStreams--;
|
||||
TraceEvent(SevDebug, "TLogPeekStreamEnd", logData->logId)
|
||||
.detail("PeerAddr", req.reply.getEndpoint().getPrimaryAddress())
|
||||
.error(e, true);
|
||||
.errorUnsuppressed(e)
|
||||
.detail("PeerAddr", req.reply.getEndpoint().getPrimaryAddress());
|
||||
|
||||
if (e.code() == error_code_end_of_stream || e.code() == error_code_operation_obsolete) {
|
||||
req.reply.sendError(e);
|
||||
|
@ -1646,7 +1646,7 @@ ACTOR Future<Void> tLog(IKeyValueStore* persistentData,
|
|||
wait(error);
|
||||
throw internal_error();
|
||||
} catch (Error& e) {
|
||||
TraceEvent("TLogError", tlogId).error(e, true);
|
||||
TraceEvent("TLogError", tlogId).errorUnsuppressed(e);
|
||||
|
||||
for (auto& it : self.id_data) {
|
||||
if (it.second->recoverySuccessful.canBeSet()) {
|
||||
|
|
|
@ -1479,8 +1479,8 @@ ACTOR Future<Void> tLogPeekStream(TLogData* self, TLogPeekStreamRequest req, Ref
|
|||
} catch (Error& e) {
|
||||
self->activePeekStreams--;
|
||||
TraceEvent(SevDebug, "TLogPeekStreamEnd", logData->logId)
|
||||
.detail("PeerAddr", req.reply.getEndpoint().getPrimaryAddress())
|
||||
.error(e, true);
|
||||
.errorUnsuppressed(e)
|
||||
.detail("PeerAddr", req.reply.getEndpoint().getPrimaryAddress());
|
||||
|
||||
if (e.code() == error_code_end_of_stream || e.code() == error_code_operation_obsolete) {
|
||||
req.reply.sendError(e);
|
||||
|
@ -1912,7 +1912,7 @@ ACTOR Future<Void> tLogSnapCreate(TLogSnapRequest snapReq, TLogData* self, Refer
|
|||
}
|
||||
snapReq.reply.send(Void());
|
||||
} catch (Error& e) {
|
||||
TraceEvent("TLogSnapCreateError").error(e, true /*includeCancelled */);
|
||||
TraceEvent("TLogSnapCreateError").errorUnsuppressed(e);
|
||||
if (e.code() != error_code_operation_cancelled) {
|
||||
snapReq.reply.sendError(e);
|
||||
} else {
|
||||
|
@ -2555,7 +2555,7 @@ bool tlogTerminated(TLogData* self, IKeyValueStore* persistentData, TLogQueue* p
|
|||
|
||||
if (e.code() == error_code_worker_removed || e.code() == error_code_recruitment_failed ||
|
||||
e.code() == error_code_file_not_found) {
|
||||
TraceEvent("TLogTerminated", self->dbgid).error(e, true);
|
||||
TraceEvent("TLogTerminated", self->dbgid).errorUnsuppressed(e);
|
||||
return true;
|
||||
} else
|
||||
return false;
|
||||
|
@ -2848,7 +2848,7 @@ ACTOR Future<Void> tLog(IKeyValueStore* persistentData,
|
|||
}
|
||||
} catch (Error& e) {
|
||||
self.terminated.send(Void());
|
||||
TraceEvent("TLogError", tlogId).error(e, true);
|
||||
TraceEvent("TLogError", tlogId).errorUnsuppressed(e);
|
||||
if (recovered.canBeSet())
|
||||
recovered.send(Void());
|
||||
|
||||
|
|
|
@ -1908,8 +1908,8 @@ ACTOR Future<Void> tLogPeekStream(TLogData* self, TLogPeekStreamRequest req, Ref
|
|||
} catch (Error& e) {
|
||||
self->activePeekStreams--;
|
||||
TraceEvent(SevDebug, "TLogPeekStreamEnd", logData->logId)
|
||||
.detail("PeerAddr", req.reply.getEndpoint().getPrimaryAddress())
|
||||
.error(e, true);
|
||||
.errorUnsuppressed(e)
|
||||
.detail("PeerAddr", req.reply.getEndpoint().getPrimaryAddress());
|
||||
|
||||
if (e.code() == error_code_end_of_stream || e.code() == error_code_operation_obsolete) {
|
||||
req.reply.sendError(e);
|
||||
|
@ -2357,7 +2357,7 @@ ACTOR Future<Void> tLogSnapCreate(TLogSnapRequest snapReq, TLogData* self, Refer
|
|||
}
|
||||
snapReq.reply.send(Void());
|
||||
} catch (Error& e) {
|
||||
TraceEvent("TLogExecHelperError").error(e, true /*includeCancelled */);
|
||||
TraceEvent("TLogExecHelperError").errorUnsuppressed(e);
|
||||
if (e.code() != error_code_operation_cancelled) {
|
||||
snapReq.reply.sendError(e);
|
||||
} else {
|
||||
|
@ -3038,7 +3038,7 @@ bool tlogTerminated(TLogData* self, IKeyValueStore* persistentData, TLogQueue* p
|
|||
|
||||
if (e.code() == error_code_worker_removed || e.code() == error_code_recruitment_failed ||
|
||||
e.code() == error_code_file_not_found) {
|
||||
TraceEvent("TLogTerminated", self->dbgid).error(e, true);
|
||||
TraceEvent("TLogTerminated", self->dbgid).errorUnsuppressed(e);
|
||||
return true;
|
||||
} else
|
||||
return false;
|
||||
|
@ -3336,7 +3336,7 @@ ACTOR Future<Void> tLog(IKeyValueStore* persistentData,
|
|||
}
|
||||
} catch (Error& e) {
|
||||
self.terminated.send(Void());
|
||||
TraceEvent("TLogError", tlogId).error(e, true);
|
||||
TraceEvent("TLogError", tlogId).errorUnsuppressed(e);
|
||||
if (recovered.canBeSet())
|
||||
recovered.send(Void());
|
||||
|
||||
|
|
|
@ -113,7 +113,7 @@ struct ProxyStats {
|
|||
id,
|
||||
SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL,
|
||||
SERVER_KNOBS->LATENCY_SAMPLE_SIZE),
|
||||
commitLatencyBands("CommitLatencyMetrics", id, SERVER_KNOBS->STORAGE_LOGGING_DELAY),
|
||||
commitLatencyBands("CommitLatencyBands", id, SERVER_KNOBS->STORAGE_LOGGING_DELAY),
|
||||
commitBatchingEmptyMessageRatio("CommitBatchingEmptyMessageRatio",
|
||||
id,
|
||||
SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL,
|
||||
|
|
|
@ -158,8 +158,9 @@ ACTOR Future<std::vector<WorkerInterface>> getCoordWorkers(Database cx,
|
|||
if (!coordinators.present()) {
|
||||
throw operation_failed();
|
||||
}
|
||||
std::vector<NetworkAddress> coordinatorsAddr =
|
||||
ClusterConnectionString(coordinators.get().toString()).coordinators();
|
||||
state ClusterConnectionString ccs(coordinators.get().toString());
|
||||
wait(ccs.resolveHostnames());
|
||||
std::vector<NetworkAddress> coordinatorsAddr = ccs.coordinators();
|
||||
std::set<NetworkAddress> coordinatorsAddrSet;
|
||||
for (const auto& addr : coordinatorsAddr) {
|
||||
TraceEvent(SevDebug, "CoordinatorAddress").detail("Addr", addr);
|
||||
|
@ -731,7 +732,7 @@ ACTOR Future<Void> waitForQuietDatabase(Database cx,
|
|||
}
|
||||
}
|
||||
} catch (Error& e) {
|
||||
TraceEvent(("QuietDatabase" + phase + "Error").c_str()).error(e, true);
|
||||
TraceEvent(("QuietDatabase" + phase + "Error").c_str()).errorUnsuppressed(e);
|
||||
if (e.code() != error_code_actor_cancelled && e.code() != error_code_attribute_not_found &&
|
||||
e.code() != error_code_timed_out)
|
||||
TraceEvent(("QuietDatabase" + phase + "Error").c_str()).error(e);
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,207 @@
|
|||
/*
|
||||
* Ratekeeper.h
|
||||
*
|
||||
* This source file is part of the FoundationDB open source project
|
||||
*
|
||||
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "fdbclient/DatabaseConfiguration.h"
|
||||
#include "fdbclient/DatabaseContext.h"
|
||||
#include "fdbclient/StorageServerInterface.h"
|
||||
#include "fdbclient/TagThrottle.actor.h"
|
||||
#include "fdbrpc/Smoother.h"
|
||||
#include "fdbserver/Knobs.h"
|
||||
#include "fdbserver/RatekeeperInterface.h"
|
||||
#include "fdbserver/ServerDBInfo.h"
|
||||
#include "fdbserver/TLogInterface.h"
|
||||
|
||||
enum limitReason_t {
|
||||
unlimited, // TODO: rename to workload?
|
||||
storage_server_write_queue_size, // 1
|
||||
storage_server_write_bandwidth_mvcc,
|
||||
storage_server_readable_behind,
|
||||
log_server_mvcc_write_bandwidth,
|
||||
log_server_write_queue, // 5
|
||||
storage_server_min_free_space, // a storage server's normal limits are being reduced by low free space
|
||||
storage_server_min_free_space_ratio, // a storage server's normal limits are being reduced by a low free space ratio
|
||||
log_server_min_free_space,
|
||||
log_server_min_free_space_ratio,
|
||||
storage_server_durability_lag, // 10
|
||||
storage_server_list_fetch_failed,
|
||||
limitReason_t_end
|
||||
};
|
||||
|
||||
struct StorageQueueInfo {
|
||||
bool valid;
|
||||
UID id;
|
||||
LocalityData locality;
|
||||
StorageQueuingMetricsReply lastReply;
|
||||
StorageQueuingMetricsReply prevReply;
|
||||
Smoother smoothDurableBytes, smoothInputBytes, verySmoothDurableBytes;
|
||||
Smoother smoothDurableVersion, smoothLatestVersion;
|
||||
Smoother smoothFreeSpace;
|
||||
Smoother smoothTotalSpace;
|
||||
limitReason_t limitReason;
|
||||
|
||||
Optional<TransactionTag> busiestReadTag, busiestWriteTag;
|
||||
double busiestReadTagFractionalBusyness = 0, busiestWriteTagFractionalBusyness = 0;
|
||||
double busiestReadTagRate = 0, busiestWriteTagRate = 0;
|
||||
|
||||
Reference<EventCacheHolder> busiestWriteTagEventHolder;
|
||||
|
||||
// refresh periodically
|
||||
TransactionTagMap<TransactionCommitCostEstimation> tagCostEst;
|
||||
uint64_t totalWriteCosts = 0;
|
||||
int totalWriteOps = 0;
|
||||
|
||||
StorageQueueInfo(UID id, LocalityData locality)
|
||||
: valid(false), id(id), locality(locality), smoothDurableBytes(SERVER_KNOBS->SMOOTHING_AMOUNT),
|
||||
smoothInputBytes(SERVER_KNOBS->SMOOTHING_AMOUNT), verySmoothDurableBytes(SERVER_KNOBS->SLOW_SMOOTHING_AMOUNT),
|
||||
smoothDurableVersion(SERVER_KNOBS->SMOOTHING_AMOUNT), smoothLatestVersion(SERVER_KNOBS->SMOOTHING_AMOUNT),
|
||||
smoothFreeSpace(SERVER_KNOBS->SMOOTHING_AMOUNT), smoothTotalSpace(SERVER_KNOBS->SMOOTHING_AMOUNT),
|
||||
limitReason(limitReason_t::unlimited),
|
||||
busiestWriteTagEventHolder(makeReference<EventCacheHolder>(id.toString() + "/BusiestWriteTag")) {
|
||||
// FIXME: this is a tacky workaround for a potential uninitialized use in trackStorageServerQueueInfo
|
||||
lastReply.instanceID = -1;
|
||||
}
|
||||
};
|
||||
|
||||
struct TLogQueueInfo {
|
||||
bool valid;
|
||||
UID id;
|
||||
TLogQueuingMetricsReply lastReply;
|
||||
TLogQueuingMetricsReply prevReply;
|
||||
Smoother smoothDurableBytes, smoothInputBytes, verySmoothDurableBytes;
|
||||
Smoother smoothFreeSpace;
|
||||
Smoother smoothTotalSpace;
|
||||
TLogQueueInfo(UID id)
|
||||
: valid(false), id(id), smoothDurableBytes(SERVER_KNOBS->SMOOTHING_AMOUNT),
|
||||
smoothInputBytes(SERVER_KNOBS->SMOOTHING_AMOUNT), verySmoothDurableBytes(SERVER_KNOBS->SLOW_SMOOTHING_AMOUNT),
|
||||
smoothFreeSpace(SERVER_KNOBS->SMOOTHING_AMOUNT), smoothTotalSpace(SERVER_KNOBS->SMOOTHING_AMOUNT) {
|
||||
// FIXME: this is a tacky workaround for a potential uninitialized use in trackTLogQueueInfo (copied from
|
||||
// storageQueueInfO)
|
||||
lastReply.instanceID = -1;
|
||||
}
|
||||
};
|
||||
|
||||
struct RatekeeperLimits {
|
||||
double tpsLimit;
|
||||
Int64MetricHandle tpsLimitMetric;
|
||||
Int64MetricHandle reasonMetric;
|
||||
|
||||
int64_t storageTargetBytes;
|
||||
int64_t storageSpringBytes;
|
||||
int64_t logTargetBytes;
|
||||
int64_t logSpringBytes;
|
||||
double maxVersionDifference;
|
||||
|
||||
int64_t durabilityLagTargetVersions;
|
||||
int64_t lastDurabilityLag;
|
||||
double durabilityLagLimit;
|
||||
|
||||
TransactionPriority priority;
|
||||
std::string context;
|
||||
|
||||
Reference<EventCacheHolder> rkUpdateEventCacheHolder;
|
||||
|
||||
RatekeeperLimits(TransactionPriority priority,
|
||||
std::string context,
|
||||
int64_t storageTargetBytes,
|
||||
int64_t storageSpringBytes,
|
||||
int64_t logTargetBytes,
|
||||
int64_t logSpringBytes,
|
||||
double maxVersionDifference,
|
||||
int64_t durabilityLagTargetVersions)
|
||||
: tpsLimit(std::numeric_limits<double>::infinity()), tpsLimitMetric(StringRef("Ratekeeper.TPSLimit" + context)),
|
||||
reasonMetric(StringRef("Ratekeeper.Reason" + context)), storageTargetBytes(storageTargetBytes),
|
||||
storageSpringBytes(storageSpringBytes), logTargetBytes(logTargetBytes), logSpringBytes(logSpringBytes),
|
||||
maxVersionDifference(maxVersionDifference),
|
||||
durabilityLagTargetVersions(
|
||||
durabilityLagTargetVersions +
|
||||
SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS), // The read transaction life versions are expected to not
|
||||
// be durable on the storage servers
|
||||
lastDurabilityLag(0), durabilityLagLimit(std::numeric_limits<double>::infinity()), priority(priority),
|
||||
context(context), rkUpdateEventCacheHolder(makeReference<EventCacheHolder>("RkUpdate" + context)) {}
|
||||
};
|
||||
|
||||
class Ratekeeper {
|
||||
friend class RatekeeperImpl;
|
||||
|
||||
// Differentiate from GrvProxyInfo in DatabaseContext.h
|
||||
struct GrvProxyInfo {
|
||||
int64_t totalTransactions;
|
||||
int64_t batchTransactions;
|
||||
uint64_t lastThrottledTagChangeId;
|
||||
|
||||
double lastUpdateTime;
|
||||
double lastTagPushTime;
|
||||
|
||||
GrvProxyInfo()
|
||||
: totalTransactions(0), batchTransactions(0), lastThrottledTagChangeId(0), lastUpdateTime(0),
|
||||
lastTagPushTime(0) {}
|
||||
};
|
||||
|
||||
UID id;
|
||||
Database db;
|
||||
|
||||
Map<UID, StorageQueueInfo> storageQueueInfo;
|
||||
Map<UID, TLogQueueInfo> tlogQueueInfo;
|
||||
|
||||
std::map<UID, Ratekeeper::GrvProxyInfo> grvProxyInfo;
|
||||
Smoother smoothReleasedTransactions, smoothBatchReleasedTransactions, smoothTotalDurableBytes;
|
||||
HealthMetrics healthMetrics;
|
||||
DatabaseConfiguration configuration;
|
||||
PromiseStream<Future<Void>> addActor;
|
||||
|
||||
Int64MetricHandle actualTpsMetric;
|
||||
|
||||
double lastWarning;
|
||||
double lastSSListFetchedTimestamp;
|
||||
|
||||
std::unique_ptr<class TagThrottler> tagThrottler;
|
||||
|
||||
RatekeeperLimits normalLimits;
|
||||
RatekeeperLimits batchLimits;
|
||||
|
||||
Deque<double> actualTpsHistory;
|
||||
Optional<Key> remoteDC;
|
||||
|
||||
Future<Void> expiredTagThrottleCleanup;
|
||||
|
||||
double lastBusiestCommitTagPick;
|
||||
|
||||
Ratekeeper(UID id, Database db);
|
||||
|
||||
Future<Void> configurationMonitor();
|
||||
void updateCommitCostEstimation(UIDTransactionTagMap<TransactionCommitCostEstimation> const& costEstimation);
|
||||
void updateRate(RatekeeperLimits* limits);
|
||||
Future<Void> refreshStorageServerCommitCost();
|
||||
Future<Void> monitorServerListChange(PromiseStream<std::pair<UID, Optional<StorageServerInterface>>> serverChanges);
|
||||
Future<Void> trackEachStorageServer(FutureStream<std::pair<UID, Optional<StorageServerInterface>>> serverChanges);
|
||||
|
||||
// SOMEDAY: template trackStorageServerQueueInfo and trackTLogQueueInfo into one function
|
||||
Future<Void> trackStorageServerQueueInfo(StorageServerInterface);
|
||||
Future<Void> trackTLogQueueInfo(TLogInterface);
|
||||
|
||||
void tryAutoThrottleTag(TransactionTag, double rate, double busyness, TagThrottledReason);
|
||||
void tryAutoThrottleTag(StorageQueueInfo&, int64_t storageQueue, int64_t storageDurabilityLag);
|
||||
Future<Void> monitorThrottlingChanges();
|
||||
|
||||
public:
|
||||
static Future<Void> run(RatekeeperInterface rkInterf, Reference<AsyncVar<ServerDBInfo> const> dbInfo);
|
||||
};
|
|
@ -373,7 +373,7 @@ ACTOR Future<Void> resolver(ResolverInterface resolver,
|
|||
}
|
||||
} catch (Error& e) {
|
||||
if (e.code() == error_code_actor_cancelled || e.code() == error_code_worker_removed) {
|
||||
TraceEvent("ResolverTerminated", resolver.id()).error(e, true);
|
||||
TraceEvent("ResolverTerminated", resolver.id()).errorUnsuppressed(e);
|
||||
return Void();
|
||||
}
|
||||
throw;
|
||||
|
|
|
@ -98,8 +98,8 @@ ACTOR Future<Void> restoreApplierCore(RestoreApplierInterface applierInterf, int
|
|||
} catch (Error& e) {
|
||||
bool isError = e.code() != error_code_operation_cancelled;
|
||||
TraceEvent(isError ? SevError : SevWarnAlways, "FastRestoreApplierError", self->id())
|
||||
.detail("RequestType", requestTypeStr)
|
||||
.error(e, true);
|
||||
.errorUnsuppressed(e)
|
||||
.detail("RequestType", requestTypeStr);
|
||||
actors.clear(false);
|
||||
break;
|
||||
}
|
||||
|
@ -251,9 +251,9 @@ ACTOR static Future<Void> applyClearRangeMutations(Standalone<VectorRef<KeyRange
|
|||
retries++;
|
||||
if (retries > SERVER_KNOBS->FASTRESTORE_TXN_RETRY_MAX) {
|
||||
TraceEvent(SevWarnAlways, "RestoreApplierApplyClearRangeMutationsStuck", applierID)
|
||||
.error(e)
|
||||
.detail("BatchIndex", batchIndex)
|
||||
.detail("ClearRanges", ranges.size())
|
||||
.error(e);
|
||||
.detail("ClearRanges", ranges.size());
|
||||
}
|
||||
wait(tr->onError(e));
|
||||
}
|
||||
|
@ -314,11 +314,13 @@ ACTOR static Future<Void> getAndComputeStagingKeys(
|
|||
} catch (Error& e) {
|
||||
cc->fetchTxnRetries += 1;
|
||||
if (retries++ > incompleteStagingKeys.size()) {
|
||||
TraceEvent(SevWarnAlways, "GetAndComputeStagingKeys", applierID)
|
||||
.suppressFor(1.0)
|
||||
.detail("RandomUID", randomID)
|
||||
.detail("BatchIndex", batchIndex)
|
||||
.error(e);
|
||||
if (e.code() != error_code_actor_cancelled) {
|
||||
TraceEvent(SevWarnAlways, "GetAndComputeStagingKeys", applierID)
|
||||
.errorUnsuppressed(e)
|
||||
.suppressFor(1.0)
|
||||
.detail("RandomUID", randomID)
|
||||
.detail("BatchIndex", batchIndex);
|
||||
}
|
||||
}
|
||||
wait(tr->onError(e));
|
||||
}
|
||||
|
|
|
@ -136,7 +136,7 @@ ACTOR Future<Void> startRestoreController(Reference<RestoreWorkerData> controlle
|
|||
wait(startProcessRestoreRequests(self, cx) || error);
|
||||
} catch (Error& e) {
|
||||
if (e.code() != error_code_operation_cancelled) {
|
||||
TraceEvent(SevError, "FastRestoreControllerStart").detail("Reason", "Unexpected unhandled error").error(e);
|
||||
TraceEvent(SevError, "FastRestoreControllerStart").error(e).detail("Reason", "Unexpected unhandled error");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -224,7 +224,7 @@ ACTOR Future<Void> dispatchRequests(Reference<RestoreLoaderData> self) {
|
|||
}
|
||||
} catch (Error& e) {
|
||||
if (e.code() != error_code_actor_cancelled) {
|
||||
TraceEvent(SevError, "FastRestoreLoaderDispatchRequests").error(e, true);
|
||||
TraceEvent(SevError, "FastRestoreLoaderDispatchRequests").errorUnsuppressed(e);
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
@ -301,8 +301,8 @@ ACTOR Future<Void> restoreLoaderCore(RestoreLoaderInterface loaderInterf,
|
|||
} catch (Error& e) {
|
||||
bool isError = e.code() != error_code_operation_cancelled; // == error_code_broken_promise
|
||||
TraceEvent(isError ? SevError : SevWarnAlways, "FastRestoreLoaderError", self->id())
|
||||
.detail("RequestType", requestTypeStr)
|
||||
.error(e, true);
|
||||
.errorUnsuppressed(e)
|
||||
.detail("RequestType", requestTypeStr);
|
||||
actors.clear(false);
|
||||
break;
|
||||
}
|
||||
|
@ -513,8 +513,8 @@ ACTOR static Future<Void> parsePartitionedLogFileOnLoader(
|
|||
e.code() == error_code_timed_out || e.code() == error_code_lookup_failed) {
|
||||
// blob http request failure, retry
|
||||
TraceEvent(SevWarnAlways, "FastRestoreDecodedPartitionedLogFileConnectionFailure")
|
||||
.detail("Retries", ++readFileRetries)
|
||||
.error(e);
|
||||
.error(e)
|
||||
.detail("Retries", ++readFileRetries);
|
||||
wait(delayJittered(0.1));
|
||||
} else {
|
||||
TraceEvent(SevError, "FastRestoreParsePartitionedLogFileOnLoaderUnexpectedError").error(e);
|
||||
|
@ -659,10 +659,10 @@ ACTOR Future<Void> handleLoadFileRequest(RestoreLoadFileRequest req, Reference<R
|
|||
} catch (Error& e) { // In case ci.samples throws broken_promise due to unstable network
|
||||
if (e.code() == error_code_broken_promise || e.code() == error_code_operation_cancelled) {
|
||||
TraceEvent(SevWarnAlways, "FastRestoreLoaderPhaseLoadFileSendSamples")
|
||||
.detail("SamplesMessages", samplesMessages)
|
||||
.error(e, true);
|
||||
.errorUnsuppressed(e)
|
||||
.detail("SamplesMessages", samplesMessages);
|
||||
} else {
|
||||
TraceEvent(SevError, "FastRestoreLoaderPhaseLoadFileSendSamplesUnexpectedError").error(e, true);
|
||||
TraceEvent(SevError, "FastRestoreLoaderPhaseLoadFileSendSamplesUnexpectedError").errorUnsuppressed(e);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1230,8 +1230,8 @@ ACTOR static Future<Void> _parseRangeFileToMutationsOnLoader(
|
|||
e.code() == error_code_timed_out || e.code() == error_code_lookup_failed) {
|
||||
// blob http request failure, retry
|
||||
TraceEvent(SevWarnAlways, "FastRestoreDecodedRangeFileConnectionFailure")
|
||||
.detail("Retries", ++readFileRetries)
|
||||
.error(e);
|
||||
.error(e)
|
||||
.detail("Retries", ++readFileRetries);
|
||||
wait(delayJittered(0.1));
|
||||
} else {
|
||||
TraceEvent(SevError, "FastRestoreParseRangeFileOnLoaderUnexpectedError").error(e);
|
||||
|
@ -1355,8 +1355,8 @@ ACTOR static Future<Void> parseLogFileToMutationsOnLoader(NotifiedVersion* pProc
|
|||
e.code() == error_code_timed_out || e.code() == error_code_lookup_failed) {
|
||||
// blob http request failure, retry
|
||||
TraceEvent(SevWarnAlways, "FastRestoreDecodedLogFileConnectionFailure")
|
||||
.detail("Retries", ++readFileRetries)
|
||||
.error(e);
|
||||
.error(e)
|
||||
.detail("Retries", ++readFileRetries);
|
||||
wait(delayJittered(0.1));
|
||||
} else {
|
||||
TraceEvent(SevError, "FastRestoreParseLogFileToMutationsOnLoaderUnexpectedError").error(e);
|
||||
|
|
|
@ -264,7 +264,7 @@ ACTOR Future<Void> startRestoreWorker(Reference<RestoreWorkerData> self, Restore
|
|||
}
|
||||
}
|
||||
} catch (Error& e) {
|
||||
TraceEvent(SevWarn, "FastRestoreWorkerError").detail("RequestType", requestTypeStr).error(e, true);
|
||||
TraceEvent(SevWarn, "FastRestoreWorkerError").errorUnsuppressed(e).detail("RequestType", requestTypeStr);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -601,7 +601,7 @@ ACTOR Future<ISimulator::KillType> simulatedFDBDRebooter(Reference<IClusterConne
|
|||
? SevInfo
|
||||
: SevError,
|
||||
"SimulatedFDBDTerminated")
|
||||
.error(e, true)
|
||||
.errorUnsuppressed(e)
|
||||
.detail("ZoneId", localities.zoneId());
|
||||
}
|
||||
|
||||
|
@ -617,7 +617,7 @@ ACTOR Future<ISimulator::KillType> simulatedFDBDRebooter(Reference<IClusterConne
|
|||
onShutdown = ISimulator::InjectFaults;
|
||||
} catch (Error& e) {
|
||||
TraceEvent(destructed ? SevInfo : SevError, "SimulatedFDBDRebooterError")
|
||||
.error(e, true)
|
||||
.errorUnsuppressed(e)
|
||||
.detail("ZoneId", localities.zoneId())
|
||||
.detail("RandomId", randomId);
|
||||
onShutdown = e;
|
||||
|
@ -1905,8 +1905,8 @@ void setupSimulatedSystem(std::vector<Future<Void>>* systemActors,
|
|||
TEST(useIPv6); // Use IPv6
|
||||
TEST(!useIPv6); // Use IPv4
|
||||
|
||||
// TODO(renxuan): Use hostname 25% of the time, unless it is disabled
|
||||
bool useHostname = false; // !testConfig.disableHostname && deterministicRandom()->random01() < 0.25;
|
||||
// Use hostname 25% of the time, unless it is disabled
|
||||
bool useHostname = !testConfig.disableHostname && deterministicRandom()->random01() < 0.25;
|
||||
TEST(useHostname); // Use hostname
|
||||
TEST(!useHostname); // Use IP address
|
||||
NetworkAddressFromHostname fromHostname =
|
||||
|
|
|
@ -1038,7 +1038,7 @@ ACTOR static Future<JsonBuilderObject> processStatusFetcher(
|
|||
if (ssLag[address] >= 60) {
|
||||
messages.push_back(JsonString::makeMessage(
|
||||
"storage_server_lagging",
|
||||
format("Storage server lagging by %ld seconds.", (int64_t)ssLag[address]).c_str()));
|
||||
format("Storage server lagging by %lld seconds.", (int64_t)ssLag[address]).c_str()));
|
||||
}
|
||||
|
||||
// Store the message array into the status object that represents the worker process
|
||||
|
|
|
@ -1375,7 +1375,7 @@ ACTOR Future<Void> fetchKeys(StorageCacheData* data, AddingCacheRange* cacheRang
|
|||
break;
|
||||
} catch (Error& e) {
|
||||
TraceEvent("SCFKBlockFail", data->thisServerID)
|
||||
.error(e, true)
|
||||
.errorUnsuppressed(e)
|
||||
.suppressFor(1.0)
|
||||
.detail("FKID", interval.pairID);
|
||||
if (e.code() == error_code_transaction_too_old) {
|
||||
|
@ -1507,7 +1507,7 @@ ACTOR Future<Void> fetchKeys(StorageCacheData* data, AddingCacheRange* cacheRang
|
|||
|
||||
// TraceEvent(SevDebug, interval.end(), data->thisServerID);
|
||||
} catch (Error& e) {
|
||||
// TraceEvent(SevDebug, interval.end(), data->thisServerID).error(e, true).detail("Version", data->version.get());
|
||||
// TraceEvent(SevDebug, interval.end(), data->thisServerID).errorUnsuppressed(e).detail("Version", data->version.get());
|
||||
|
||||
// TODO define the shuttingDown state of cache server
|
||||
if (e.code() == error_code_actor_cancelled &&
|
||||
|
|
|
@ -137,6 +137,23 @@ TCServerInfo::TCServerInfo(StorageServerInterface ssi,
|
|||
}
|
||||
}
|
||||
|
||||
bool TCServerInfo::hasHealthyAvailableSpace(double minAvailableSpaceRatio) const {
|
||||
ASSERT(serverMetricsPresent());
|
||||
|
||||
auto& metrics = getServerMetrics();
|
||||
ASSERT(metrics.available.bytes >= 0);
|
||||
ASSERT(metrics.capacity.bytes >= 0);
|
||||
|
||||
double availableSpaceRatio;
|
||||
if (metrics.capacity.bytes == 0) {
|
||||
availableSpaceRatio = 0;
|
||||
} else {
|
||||
availableSpaceRatio = (((double)metrics.available.bytes) / metrics.capacity.bytes);
|
||||
}
|
||||
|
||||
return availableSpaceRatio >= minAvailableSpaceRatio;
|
||||
}
|
||||
|
||||
Future<Void> TCServerInfo::updateServerMetrics() {
|
||||
return TCServerInfoImpl::updateServerMetrics(this);
|
||||
}
|
||||
|
@ -396,8 +413,23 @@ double TCTeamInfo::getMinAvailableSpaceRatio(bool includeInFlight) const {
|
|||
return minRatio;
|
||||
}
|
||||
|
||||
bool TCTeamInfo::allServersHaveHealthyAvailableSpace() const {
|
||||
bool result = true;
|
||||
double minAvailableSpaceRatio =
|
||||
SERVER_KNOBS->MIN_AVAILABLE_SPACE_RATIO + SERVER_KNOBS->MIN_AVAILABLE_SPACE_RATIO_SAFETY_BUFFER;
|
||||
for (const auto& server : servers) {
|
||||
if (!server->serverMetricsPresent() || !server->hasHealthyAvailableSpace(minAvailableSpaceRatio)) {
|
||||
result = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
bool TCTeamInfo::hasHealthyAvailableSpace(double minRatio) const {
|
||||
return getMinAvailableSpaceRatio() >= minRatio && getMinAvailableSpace() > SERVER_KNOBS->MIN_AVAILABLE_SPACE;
|
||||
return getMinAvailableSpaceRatio() >= minRatio && getMinAvailableSpace() > SERVER_KNOBS->MIN_AVAILABLE_SPACE &&
|
||||
allServersHaveHealthyAvailableSpace();
|
||||
}
|
||||
|
||||
bool TCTeamInfo::isOptimal() const {
|
||||
|
|
|
@ -93,6 +93,8 @@ public:
|
|||
return (storeType == configStoreType || storeType == KeyValueStoreType::END);
|
||||
}
|
||||
|
||||
bool hasHealthyAvailableSpace(double minAvailableSpaceRatio) const;
|
||||
|
||||
Future<Void> updateServerMetrics();
|
||||
static Future<Void> updateServerMetrics(Reference<TCServerInfo> server);
|
||||
Future<Void> serverMetricsPolling();
|
||||
|
@ -220,4 +222,6 @@ private:
|
|||
// Calculate an "average" of the metrics replies that we received. Penalize teams from which we did not receive all
|
||||
// replies.
|
||||
int64_t getLoadAverage() const;
|
||||
|
||||
bool allServersHaveHealthyAvailableSpace() const;
|
||||
};
|
||||
|
|
|
@ -1740,140 +1740,168 @@ Future<Void> tLogPeekMessages(PromiseType replyPromise,
|
|||
return Void();
|
||||
}
|
||||
|
||||
state Version endVersion = logData->version.get() + 1;
|
||||
state bool onlySpilled = false;
|
||||
state Version endVersion;
|
||||
state bool onlySpilled;
|
||||
|
||||
// grab messages from disk
|
||||
//TraceEvent("TLogPeekMessages", self->dbgid).detail("ReqBeginEpoch", reqBegin.epoch).detail("ReqBeginSeq", reqBegin.sequence).detail("Epoch", self->epoch()).detail("PersistentDataSeq", self->persistentDataSequence).detail("Tag1", reqTag1).detail("Tag2", reqTag2);
|
||||
if (reqBegin <= logData->persistentDataDurableVersion) {
|
||||
// Just in case the durable version changes while we are waiting for the read, we grab this data from memory. We
|
||||
// may or may not actually send it depending on whether we get enough data from disk. SOMEDAY: Only do this if
|
||||
// an initial attempt to read from disk results in insufficient data and the required data is no longer in
|
||||
// memory SOMEDAY: Should we only send part of the messages we collected, to actually limit the size of the
|
||||
// result?
|
||||
// Run the peek logic in a loop to account for the case where there is no data to return to the caller, and we may
|
||||
// want to wait a little bit instead of just sending back an empty message. This feature is controlled by a knob.
|
||||
loop {
|
||||
endVersion = logData->version.get() + 1;
|
||||
onlySpilled = false;
|
||||
|
||||
if (reqOnlySpilled) {
|
||||
endVersion = logData->persistentDataDurableVersion + 1;
|
||||
} else {
|
||||
peekMessagesFromMemory(logData, reqTag, reqBegin, messages2, endVersion);
|
||||
}
|
||||
// grab messages from disk
|
||||
//TraceEvent("TLogPeekMessages", self->dbgid).detail("ReqBeginEpoch", reqBegin.epoch).detail("ReqBeginSeq", reqBegin.sequence).detail("Epoch", self->epoch()).detail("PersistentDataSeq", self->persistentDataSequence).detail("Tag1", reqTag1).detail("Tag2", reqTag2);
|
||||
if (reqBegin <= logData->persistentDataDurableVersion) {
|
||||
// Just in case the durable version changes while we are waiting for the read, we grab this data from
|
||||
// memory. We may or may not actually send it depending on whether we get enough data from disk. SOMEDAY:
|
||||
// Only do this if an initial attempt to read from disk results in insufficient data and the required data
|
||||
// is no longer in memory SOMEDAY: Should we only send part of the messages we collected, to actually limit
|
||||
// the size of the result?
|
||||
|
||||
if (logData->shouldSpillByValue(reqTag)) {
|
||||
RangeResult kvs = wait(self->persistentData->readRange(
|
||||
KeyRangeRef(persistTagMessagesKey(logData->logId, reqTag, reqBegin),
|
||||
persistTagMessagesKey(logData->logId, reqTag, logData->persistentDataDurableVersion + 1)),
|
||||
SERVER_KNOBS->DESIRED_TOTAL_BYTES,
|
||||
SERVER_KNOBS->DESIRED_TOTAL_BYTES));
|
||||
|
||||
for (auto& kv : kvs) {
|
||||
auto ver = decodeTagMessagesKey(kv.key);
|
||||
messages << VERSION_HEADER << ver;
|
||||
messages.serializeBytes(kv.value);
|
||||
}
|
||||
|
||||
if (kvs.expectedSize() >= SERVER_KNOBS->DESIRED_TOTAL_BYTES) {
|
||||
endVersion = decodeTagMessagesKey(kvs.end()[-1].key) + 1;
|
||||
onlySpilled = true;
|
||||
if (reqOnlySpilled) {
|
||||
endVersion = logData->persistentDataDurableVersion + 1;
|
||||
} else {
|
||||
messages.serializeBytes(messages2.toValue());
|
||||
peekMessagesFromMemory(logData, reqTag, reqBegin, messages2, endVersion);
|
||||
}
|
||||
} else {
|
||||
// FIXME: Limit to approximately DESIRED_TOTATL_BYTES somehow.
|
||||
RangeResult kvrefs = wait(self->persistentData->readRange(
|
||||
KeyRangeRef(
|
||||
persistTagMessageRefsKey(logData->logId, reqTag, reqBegin),
|
||||
persistTagMessageRefsKey(logData->logId, reqTag, logData->persistentDataDurableVersion + 1)),
|
||||
SERVER_KNOBS->TLOG_SPILL_REFERENCE_MAX_BATCHES_PER_PEEK + 1));
|
||||
|
||||
//TraceEvent("TLogPeekResults", self->dbgid).detail("ForAddress", replyPromise.getEndpoint().getPrimaryAddress()).detail("Tag1Results", s1).detail("Tag2Results", s2).detail("Tag1ResultsLim", kv1.size()).detail("Tag2ResultsLim", kv2.size()).detail("Tag1ResultsLast", kv1.size() ? kv1[0].key : "").detail("Tag2ResultsLast", kv2.size() ? kv2[0].key : "").detail("Limited", limited).detail("NextEpoch", next_pos.epoch).detail("NextSeq", next_pos.sequence).detail("NowEpoch", self->epoch()).detail("NowSeq", self->sequence.getNextSequence());
|
||||
if (logData->shouldSpillByValue(reqTag)) {
|
||||
RangeResult kvs = wait(self->persistentData->readRange(
|
||||
KeyRangeRef(
|
||||
persistTagMessagesKey(logData->logId, reqTag, reqBegin),
|
||||
persistTagMessagesKey(logData->logId, reqTag, logData->persistentDataDurableVersion + 1)),
|
||||
SERVER_KNOBS->DESIRED_TOTAL_BYTES,
|
||||
SERVER_KNOBS->DESIRED_TOTAL_BYTES));
|
||||
|
||||
state std::vector<std::pair<IDiskQueue::location, IDiskQueue::location>> commitLocations;
|
||||
state bool earlyEnd = false;
|
||||
uint32_t mutationBytes = 0;
|
||||
state uint64_t commitBytes = 0;
|
||||
state Version firstVersion = std::numeric_limits<Version>::max();
|
||||
for (int i = 0; i < kvrefs.size() && i < SERVER_KNOBS->TLOG_SPILL_REFERENCE_MAX_BATCHES_PER_PEEK; i++) {
|
||||
auto& kv = kvrefs[i];
|
||||
VectorRef<SpilledData> spilledData;
|
||||
BinaryReader r(kv.value, AssumeVersion(logData->protocolVersion));
|
||||
r >> spilledData;
|
||||
for (const SpilledData& sd : spilledData) {
|
||||
if (mutationBytes >= SERVER_KNOBS->DESIRED_TOTAL_BYTES) {
|
||||
earlyEnd = true;
|
||||
for (auto& kv : kvs) {
|
||||
auto ver = decodeTagMessagesKey(kv.key);
|
||||
messages << VERSION_HEADER << ver;
|
||||
messages.serializeBytes(kv.value);
|
||||
}
|
||||
|
||||
if (kvs.expectedSize() >= SERVER_KNOBS->DESIRED_TOTAL_BYTES) {
|
||||
endVersion = decodeTagMessagesKey(kvs.end()[-1].key) + 1;
|
||||
onlySpilled = true;
|
||||
} else {
|
||||
messages.serializeBytes(messages2.toValue());
|
||||
}
|
||||
} else {
|
||||
// FIXME: Limit to approximately DESIRED_TOTATL_BYTES somehow.
|
||||
RangeResult kvrefs = wait(self->persistentData->readRange(
|
||||
KeyRangeRef(
|
||||
persistTagMessageRefsKey(logData->logId, reqTag, reqBegin),
|
||||
persistTagMessageRefsKey(logData->logId, reqTag, logData->persistentDataDurableVersion + 1)),
|
||||
SERVER_KNOBS->TLOG_SPILL_REFERENCE_MAX_BATCHES_PER_PEEK + 1));
|
||||
|
||||
//TraceEvent("TLogPeekResults", self->dbgid).detail("ForAddress", replyPromise.getEndpoint().getPrimaryAddress()).detail("Tag1Results", s1).detail("Tag2Results", s2).detail("Tag1ResultsLim", kv1.size()).detail("Tag2ResultsLim", kv2.size()).detail("Tag1ResultsLast", kv1.size() ? kv1[0].key : "").detail("Tag2ResultsLast", kv2.size() ? kv2[0].key : "").detail("Limited", limited).detail("NextEpoch", next_pos.epoch).detail("NextSeq", next_pos.sequence).detail("NowEpoch", self->epoch()).detail("NowSeq", self->sequence.getNextSequence());
|
||||
|
||||
state std::vector<std::pair<IDiskQueue::location, IDiskQueue::location>> commitLocations;
|
||||
state bool earlyEnd = false;
|
||||
uint32_t mutationBytes = 0;
|
||||
state uint64_t commitBytes = 0;
|
||||
state Version firstVersion = std::numeric_limits<Version>::max();
|
||||
for (int i = 0; i < kvrefs.size() && i < SERVER_KNOBS->TLOG_SPILL_REFERENCE_MAX_BATCHES_PER_PEEK; i++) {
|
||||
auto& kv = kvrefs[i];
|
||||
VectorRef<SpilledData> spilledData;
|
||||
BinaryReader r(kv.value, AssumeVersion(logData->protocolVersion));
|
||||
r >> spilledData;
|
||||
for (const SpilledData& sd : spilledData) {
|
||||
if (mutationBytes >= SERVER_KNOBS->DESIRED_TOTAL_BYTES) {
|
||||
earlyEnd = true;
|
||||
break;
|
||||
}
|
||||
if (sd.version >= reqBegin) {
|
||||
firstVersion = std::min(firstVersion, sd.version);
|
||||
const IDiskQueue::location end = sd.start.lo + sd.length;
|
||||
commitLocations.emplace_back(sd.start, end);
|
||||
// This isn't perfect, because we aren't accounting for page boundaries, but should be
|
||||
// close enough.
|
||||
commitBytes += sd.length;
|
||||
mutationBytes += sd.mutationBytes;
|
||||
}
|
||||
}
|
||||
if (earlyEnd)
|
||||
break;
|
||||
}
|
||||
if (sd.version >= reqBegin) {
|
||||
firstVersion = std::min(firstVersion, sd.version);
|
||||
const IDiskQueue::location end = sd.start.lo + sd.length;
|
||||
commitLocations.emplace_back(sd.start, end);
|
||||
// This isn't perfect, because we aren't accounting for page boundaries, but should be
|
||||
// close enough.
|
||||
commitBytes += sd.length;
|
||||
mutationBytes += sd.mutationBytes;
|
||||
}
|
||||
}
|
||||
if (earlyEnd)
|
||||
break;
|
||||
}
|
||||
earlyEnd = earlyEnd || (kvrefs.size() >= SERVER_KNOBS->TLOG_SPILL_REFERENCE_MAX_BATCHES_PER_PEEK + 1);
|
||||
wait(self->peekMemoryLimiter.take(TaskPriority::TLogSpilledPeekReply, commitBytes));
|
||||
state FlowLock::Releaser memoryReservation(self->peekMemoryLimiter, commitBytes);
|
||||
state std::vector<Future<Standalone<StringRef>>> messageReads;
|
||||
messageReads.reserve(commitLocations.size());
|
||||
for (const auto& pair : commitLocations) {
|
||||
messageReads.push_back(self->rawPersistentQueue->read(pair.first, pair.second, CheckHashes::True));
|
||||
}
|
||||
commitLocations.clear();
|
||||
wait(waitForAll(messageReads));
|
||||
earlyEnd = earlyEnd || (kvrefs.size() >= SERVER_KNOBS->TLOG_SPILL_REFERENCE_MAX_BATCHES_PER_PEEK + 1);
|
||||
wait(self->peekMemoryLimiter.take(TaskPriority::TLogSpilledPeekReply, commitBytes));
|
||||
state FlowLock::Releaser memoryReservation(self->peekMemoryLimiter, commitBytes);
|
||||
state std::vector<Future<Standalone<StringRef>>> messageReads;
|
||||
messageReads.reserve(commitLocations.size());
|
||||
for (const auto& pair : commitLocations) {
|
||||
messageReads.push_back(self->rawPersistentQueue->read(pair.first, pair.second, CheckHashes::True));
|
||||
}
|
||||
commitLocations.clear();
|
||||
wait(waitForAll(messageReads));
|
||||
|
||||
state Version lastRefMessageVersion = 0;
|
||||
state int index = 0;
|
||||
loop {
|
||||
if (index >= messageReads.size())
|
||||
break;
|
||||
Standalone<StringRef> queueEntryData = messageReads[index].get();
|
||||
uint8_t valid;
|
||||
const uint32_t length = *(uint32_t*)queueEntryData.begin();
|
||||
queueEntryData = queueEntryData.substr(4, queueEntryData.size() - 4);
|
||||
BinaryReader rd(queueEntryData, IncludeVersion());
|
||||
state TLogQueueEntry entry;
|
||||
rd >> entry >> valid;
|
||||
ASSERT(valid == 0x01);
|
||||
ASSERT(length + sizeof(valid) == queueEntryData.size());
|
||||
state Version lastRefMessageVersion = 0;
|
||||
state int index = 0;
|
||||
loop {
|
||||
if (index >= messageReads.size())
|
||||
break;
|
||||
Standalone<StringRef> queueEntryData = messageReads[index].get();
|
||||
uint8_t valid;
|
||||
const uint32_t length = *(uint32_t*)queueEntryData.begin();
|
||||
queueEntryData = queueEntryData.substr(4, queueEntryData.size() - 4);
|
||||
BinaryReader rd(queueEntryData, IncludeVersion());
|
||||
state TLogQueueEntry entry;
|
||||
rd >> entry >> valid;
|
||||
ASSERT(valid == 0x01);
|
||||
ASSERT(length + sizeof(valid) == queueEntryData.size());
|
||||
|
||||
messages << VERSION_HEADER << entry.version;
|
||||
messages << VERSION_HEADER << entry.version;
|
||||
|
||||
std::vector<StringRef> rawMessages =
|
||||
wait(parseMessagesForTag(entry.messages, reqTag, logData->logRouterTags));
|
||||
for (const StringRef& msg : rawMessages) {
|
||||
messages.serializeBytes(msg);
|
||||
DEBUG_TAGS_AND_MESSAGE("TLogPeekFromDisk", entry.version, msg, logData->logId)
|
||||
.detail("DebugID", self->dbgid)
|
||||
.detail("PeekTag", reqTag);
|
||||
std::vector<StringRef> rawMessages =
|
||||
wait(parseMessagesForTag(entry.messages, reqTag, logData->logRouterTags));
|
||||
for (const StringRef& msg : rawMessages) {
|
||||
messages.serializeBytes(msg);
|
||||
DEBUG_TAGS_AND_MESSAGE("TLogPeekFromDisk", entry.version, msg, logData->logId)
|
||||
.detail("DebugID", self->dbgid)
|
||||
.detail("PeekTag", reqTag);
|
||||
}
|
||||
|
||||
lastRefMessageVersion = entry.version;
|
||||
index++;
|
||||
}
|
||||
|
||||
lastRefMessageVersion = entry.version;
|
||||
index++;
|
||||
}
|
||||
messageReads.clear();
|
||||
memoryReservation.release();
|
||||
|
||||
messageReads.clear();
|
||||
memoryReservation.release();
|
||||
|
||||
if (earlyEnd) {
|
||||
endVersion = lastRefMessageVersion + 1;
|
||||
onlySpilled = true;
|
||||
} else {
|
||||
messages.serializeBytes(messages2.toValue());
|
||||
if (earlyEnd) {
|
||||
endVersion = lastRefMessageVersion + 1;
|
||||
onlySpilled = true;
|
||||
} else {
|
||||
messages.serializeBytes(messages2.toValue());
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (reqOnlySpilled) {
|
||||
endVersion = logData->persistentDataDurableVersion + 1;
|
||||
} else {
|
||||
peekMessagesFromMemory(logData, reqTag, reqBegin, messages, endVersion);
|
||||
if (reqOnlySpilled) {
|
||||
endVersion = logData->persistentDataDurableVersion + 1;
|
||||
} else {
|
||||
peekMessagesFromMemory(logData, reqTag, reqBegin, messages, endVersion);
|
||||
}
|
||||
|
||||
//TraceEvent("TLogPeekResults", self->dbgid).detail("ForAddress", replyPromise.getEndpoint().getPrimaryAddress()).detail("MessageBytes", messages.getLength()).detail("NextEpoch", next_pos.epoch).detail("NextSeq", next_pos.sequence).detail("NowSeq", self->sequence.getNextSequence());
|
||||
}
|
||||
|
||||
//TraceEvent("TLogPeekResults", self->dbgid).detail("ForAddress", replyPromise.getEndpoint().getPrimaryAddress()).detail("MessageBytes", messages.getLength()).detail("NextEpoch", next_pos.epoch).detail("NextSeq", next_pos.sequence).detail("NowSeq", self->sequence.getNextSequence());
|
||||
// Reply the peek request when
|
||||
// - Have data return to the caller, or
|
||||
// - Batching empty peek is disabled, or
|
||||
// - Batching empty peek interval has been reached.
|
||||
if (messages.getLength() > 0 || !SERVER_KNOBS->PEEK_BATCHING_EMPTY_MSG ||
|
||||
(now() - blockStart > SERVER_KNOBS->PEEK_BATCHING_EMPTY_MSG_INTERVAL)) {
|
||||
break;
|
||||
}
|
||||
|
||||
state Version waitUntilVersion = logData->version.get() + 1;
|
||||
|
||||
// Currently, from `reqBegin` to logData->version are all empty peeks. Wait for more versions, or the empty
|
||||
// batching interval has expired.
|
||||
wait(logData->version.whenAtLeast(waitUntilVersion) ||
|
||||
delay(SERVER_KNOBS->PEEK_BATCHING_EMPTY_MSG_INTERVAL - (now() - blockStart)));
|
||||
if (logData->version.get() < waitUntilVersion) {
|
||||
break; // We know that from `reqBegin` to logData->version are all empty messages. Skip re-executing the
|
||||
// peek logic.
|
||||
}
|
||||
}
|
||||
|
||||
TLogPeekReply reply;
|
||||
|
@ -1969,8 +1997,8 @@ ACTOR Future<Void> tLogPeekStream(TLogData* self, TLogPeekStreamRequest req, Ref
|
|||
} catch (Error& e) {
|
||||
self->activePeekStreams--;
|
||||
TraceEvent(SevDebug, "TLogPeekStreamEnd", logData->logId)
|
||||
.detail("PeerAddr", req.reply.getEndpoint().getPrimaryAddress())
|
||||
.error(e, true);
|
||||
.errorUnsuppressed(e)
|
||||
.detail("PeerAddr", req.reply.getEndpoint().getPrimaryAddress());
|
||||
|
||||
if (e.code() == error_code_end_of_stream || e.code() == error_code_operation_obsolete) {
|
||||
req.reply.sendError(e);
|
||||
|
@ -2441,7 +2469,7 @@ ACTOR Future<Void> tLogSnapCreate(TLogSnapRequest snapReq, TLogData* self, Refer
|
|||
}
|
||||
snapReq.reply.send(Void());
|
||||
} catch (Error& e) {
|
||||
TraceEvent("TLogExecHelperError").error(e, true /*includeCancelled */);
|
||||
TraceEvent("TLogExecHelperError").errorUnsuppressed(e);
|
||||
if (e.code() != error_code_operation_cancelled) {
|
||||
snapReq.reply.sendError(e);
|
||||
} else {
|
||||
|
@ -3158,7 +3186,7 @@ bool tlogTerminated(TLogData* self, IKeyValueStore* persistentData, TLogQueue* p
|
|||
|
||||
if (e.code() == error_code_worker_removed || e.code() == error_code_recruitment_failed ||
|
||||
e.code() == error_code_file_not_found || e.code() == error_code_invalid_cluster_id) {
|
||||
TraceEvent("TLogTerminated", self->dbgid).error(e, true);
|
||||
TraceEvent("TLogTerminated", self->dbgid).errorUnsuppressed(e);
|
||||
return true;
|
||||
} else
|
||||
return false;
|
||||
|
@ -3509,7 +3537,7 @@ ACTOR Future<Void> tLog(IKeyValueStore* persistentData,
|
|||
}
|
||||
} catch (Error& e) {
|
||||
self.terminated.send(Void());
|
||||
TraceEvent("TLogError", tlogId).error(e, true);
|
||||
TraceEvent("TLogError", tlogId).errorUnsuppressed(e);
|
||||
if (recovered.canBeSet())
|
||||
recovered.send(Void());
|
||||
|
||||
|
|
|
@ -0,0 +1,598 @@
|
|||
/*
|
||||
* TagThrottler.h
|
||||
*
|
||||
* This source file is part of the FoundationDB open source project
|
||||
*
|
||||
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "fdbserver/TagThrottler.h"
|
||||
|
||||
class RkTagThrottleCollection : NonCopyable {
|
||||
struct RkTagData {
|
||||
Smoother requestRate;
|
||||
RkTagData() : requestRate(CLIENT_KNOBS->TAG_THROTTLE_SMOOTHING_WINDOW) {}
|
||||
};
|
||||
|
||||
struct RkTagThrottleData {
|
||||
ClientTagThrottleLimits limits;
|
||||
Smoother clientRate;
|
||||
|
||||
// Only used by auto-throttles
|
||||
double created = now();
|
||||
double lastUpdated = 0;
|
||||
double lastReduced = now();
|
||||
bool rateSet = false;
|
||||
|
||||
RkTagThrottleData() : clientRate(CLIENT_KNOBS->TAG_THROTTLE_SMOOTHING_WINDOW) {}
|
||||
|
||||
double getTargetRate(Optional<double> requestRate) {
|
||||
if (limits.tpsRate == 0.0 || !requestRate.present() || requestRate.get() == 0.0 || !rateSet) {
|
||||
return limits.tpsRate;
|
||||
} else {
|
||||
return std::min(limits.tpsRate, (limits.tpsRate / requestRate.get()) * clientRate.smoothTotal());
|
||||
}
|
||||
}
|
||||
|
||||
Optional<double> updateAndGetClientRate(Optional<double> requestRate) {
|
||||
if (limits.expiration > now()) {
|
||||
double targetRate = getTargetRate(requestRate);
|
||||
if (targetRate == std::numeric_limits<double>::max()) {
|
||||
rateSet = false;
|
||||
return targetRate;
|
||||
}
|
||||
if (!rateSet) {
|
||||
rateSet = true;
|
||||
clientRate.reset(targetRate);
|
||||
} else {
|
||||
clientRate.setTotal(targetRate);
|
||||
}
|
||||
|
||||
double rate = clientRate.smoothTotal();
|
||||
ASSERT(rate >= 0);
|
||||
return rate;
|
||||
} else {
|
||||
TEST(true); // Get throttle rate for expired throttle
|
||||
rateSet = false;
|
||||
return Optional<double>();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
void initializeTag(TransactionTag const& tag) { tagData.try_emplace(tag); }
|
||||
|
||||
public:
|
||||
RkTagThrottleCollection() {}
|
||||
|
||||
RkTagThrottleCollection(RkTagThrottleCollection&& other) {
|
||||
autoThrottledTags = std::move(other.autoThrottledTags);
|
||||
manualThrottledTags = std::move(other.manualThrottledTags);
|
||||
tagData = std::move(other.tagData);
|
||||
}
|
||||
|
||||
void operator=(RkTagThrottleCollection&& other) {
|
||||
autoThrottledTags = std::move(other.autoThrottledTags);
|
||||
manualThrottledTags = std::move(other.manualThrottledTags);
|
||||
tagData = std::move(other.tagData);
|
||||
}
|
||||
|
||||
double computeTargetTpsRate(double currentBusyness, double targetBusyness, double requestRate) {
|
||||
ASSERT(currentBusyness > 0);
|
||||
|
||||
if (targetBusyness < 1) {
|
||||
double targetFraction = targetBusyness * (1 - currentBusyness) / ((1 - targetBusyness) * currentBusyness);
|
||||
return requestRate * targetFraction;
|
||||
} else {
|
||||
return std::numeric_limits<double>::max();
|
||||
}
|
||||
}
|
||||
|
||||
// Returns the TPS rate if the throttle is updated, otherwise returns an empty optional
|
||||
Optional<double> autoThrottleTag(UID id,
|
||||
TransactionTag const& tag,
|
||||
double fractionalBusyness,
|
||||
Optional<double> tpsRate = Optional<double>(),
|
||||
Optional<double> expiration = Optional<double>()) {
|
||||
ASSERT(!tpsRate.present() || tpsRate.get() >= 0);
|
||||
ASSERT(!expiration.present() || expiration.get() > now());
|
||||
|
||||
auto itr = autoThrottledTags.find(tag);
|
||||
bool present = (itr != autoThrottledTags.end());
|
||||
if (!present) {
|
||||
if (autoThrottledTags.size() >= SERVER_KNOBS->MAX_AUTO_THROTTLED_TRANSACTION_TAGS) {
|
||||
TEST(true); // Reached auto-throttle limit
|
||||
return Optional<double>();
|
||||
}
|
||||
|
||||
itr = autoThrottledTags.try_emplace(tag).first;
|
||||
initializeTag(tag);
|
||||
} else if (itr->second.limits.expiration <= now()) {
|
||||
TEST(true); // Re-throttling expired tag that hasn't been cleaned up
|
||||
present = false;
|
||||
itr->second = RkTagThrottleData();
|
||||
}
|
||||
|
||||
auto& throttle = itr->second;
|
||||
|
||||
if (!tpsRate.present()) {
|
||||
if (now() <= throttle.created + SERVER_KNOBS->AUTO_TAG_THROTTLE_START_AGGREGATION_TIME) {
|
||||
tpsRate = std::numeric_limits<double>::max();
|
||||
if (present) {
|
||||
return Optional<double>();
|
||||
}
|
||||
} else if (now() <= throttle.lastUpdated + SERVER_KNOBS->AUTO_TAG_THROTTLE_UPDATE_FREQUENCY) {
|
||||
TEST(true); // Tag auto-throttled too quickly
|
||||
return Optional<double>();
|
||||
} else {
|
||||
tpsRate = computeTargetTpsRate(fractionalBusyness,
|
||||
SERVER_KNOBS->AUTO_THROTTLE_TARGET_TAG_BUSYNESS,
|
||||
tagData[tag].requestRate.smoothRate());
|
||||
|
||||
if (throttle.limits.expiration > now() && tpsRate.get() >= throttle.limits.tpsRate) {
|
||||
TEST(true); // Tag auto-throttle rate increase attempt while active
|
||||
return Optional<double>();
|
||||
}
|
||||
|
||||
throttle.lastUpdated = now();
|
||||
if (tpsRate.get() < throttle.limits.tpsRate) {
|
||||
throttle.lastReduced = now();
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!expiration.present()) {
|
||||
expiration = now() + SERVER_KNOBS->AUTO_TAG_THROTTLE_DURATION;
|
||||
}
|
||||
|
||||
ASSERT(tpsRate.present() && tpsRate.get() >= 0);
|
||||
|
||||
throttle.limits.tpsRate = tpsRate.get();
|
||||
throttle.limits.expiration = expiration.get();
|
||||
|
||||
Optional<double> clientRate = throttle.updateAndGetClientRate(getRequestRate(tag));
|
||||
|
||||
TraceEvent("RkSetAutoThrottle", id)
|
||||
.detail("Tag", tag)
|
||||
.detail("TargetRate", tpsRate.get())
|
||||
.detail("Expiration", expiration.get() - now())
|
||||
.detail("ClientRate", clientRate)
|
||||
.detail("Created", now() - throttle.created)
|
||||
.detail("LastUpdate", now() - throttle.lastUpdated)
|
||||
.detail("LastReduced", now() - throttle.lastReduced);
|
||||
|
||||
if (tpsRate.get() != std::numeric_limits<double>::max()) {
|
||||
return tpsRate.get();
|
||||
} else {
|
||||
return Optional<double>();
|
||||
}
|
||||
}
|
||||
|
||||
void manualThrottleTag(UID id,
|
||||
TransactionTag const& tag,
|
||||
TransactionPriority priority,
|
||||
double tpsRate,
|
||||
double expiration,
|
||||
Optional<ClientTagThrottleLimits> const& oldLimits) {
|
||||
ASSERT(tpsRate >= 0);
|
||||
ASSERT(expiration > now());
|
||||
|
||||
auto& priorityThrottleMap = manualThrottledTags[tag];
|
||||
auto result = priorityThrottleMap.try_emplace(priority);
|
||||
initializeTag(tag);
|
||||
ASSERT(result.second); // Updating to the map is done by copying the whole map
|
||||
|
||||
result.first->second.limits.tpsRate = tpsRate;
|
||||
result.first->second.limits.expiration = expiration;
|
||||
|
||||
if (!oldLimits.present()) {
|
||||
TEST(true); // Transaction tag manually throttled
|
||||
TraceEvent("RatekeeperAddingManualThrottle", id)
|
||||
.detail("Tag", tag)
|
||||
.detail("Rate", tpsRate)
|
||||
.detail("Priority", transactionPriorityToString(priority))
|
||||
.detail("SecondsToExpiration", expiration - now());
|
||||
} else if (oldLimits.get().tpsRate != tpsRate || oldLimits.get().expiration != expiration) {
|
||||
TEST(true); // Manual transaction tag throttle updated
|
||||
TraceEvent("RatekeeperUpdatingManualThrottle", id)
|
||||
.detail("Tag", tag)
|
||||
.detail("Rate", tpsRate)
|
||||
.detail("Priority", transactionPriorityToString(priority))
|
||||
.detail("SecondsToExpiration", expiration - now());
|
||||
}
|
||||
|
||||
Optional<double> clientRate = result.first->second.updateAndGetClientRate(getRequestRate(tag));
|
||||
ASSERT(clientRate.present());
|
||||
}
|
||||
|
||||
Optional<ClientTagThrottleLimits> getManualTagThrottleLimits(TransactionTag const& tag,
|
||||
TransactionPriority priority) {
|
||||
auto itr = manualThrottledTags.find(tag);
|
||||
if (itr != manualThrottledTags.end()) {
|
||||
auto priorityItr = itr->second.find(priority);
|
||||
if (priorityItr != itr->second.end()) {
|
||||
return priorityItr->second.limits;
|
||||
}
|
||||
}
|
||||
|
||||
return Optional<ClientTagThrottleLimits>();
|
||||
}
|
||||
|
||||
PrioritizedTransactionTagMap<ClientTagThrottleLimits> getClientRates(bool autoThrottlingEnabled) {
|
||||
PrioritizedTransactionTagMap<ClientTagThrottleLimits> clientRates;
|
||||
|
||||
for (auto tagItr = tagData.begin(); tagItr != tagData.end();) {
|
||||
bool tagPresent = false;
|
||||
|
||||
double requestRate = tagItr->second.requestRate.smoothRate();
|
||||
auto manualItr = manualThrottledTags.find(tagItr->first);
|
||||
if (manualItr != manualThrottledTags.end()) {
|
||||
Optional<ClientTagThrottleLimits> manualClientRate;
|
||||
for (auto priority = allTransactionPriorities.rbegin(); !(priority == allTransactionPriorities.rend());
|
||||
++priority) {
|
||||
auto priorityItr = manualItr->second.find(*priority);
|
||||
if (priorityItr != manualItr->second.end()) {
|
||||
Optional<double> priorityClientRate = priorityItr->second.updateAndGetClientRate(requestRate);
|
||||
if (!priorityClientRate.present()) {
|
||||
TEST(true); // Manual priority throttle expired
|
||||
priorityItr = manualItr->second.erase(priorityItr);
|
||||
} else {
|
||||
if (!manualClientRate.present() ||
|
||||
manualClientRate.get().tpsRate > priorityClientRate.get()) {
|
||||
manualClientRate = ClientTagThrottleLimits(priorityClientRate.get(),
|
||||
priorityItr->second.limits.expiration);
|
||||
} else {
|
||||
TEST(true); // Manual throttle overriden by higher priority
|
||||
}
|
||||
|
||||
++priorityItr;
|
||||
}
|
||||
}
|
||||
|
||||
if (manualClientRate.present()) {
|
||||
tagPresent = true;
|
||||
TEST(true); // Using manual throttle
|
||||
clientRates[*priority][tagItr->first] = manualClientRate.get();
|
||||
}
|
||||
}
|
||||
|
||||
if (manualItr->second.empty()) {
|
||||
TEST(true); // All manual throttles expired
|
||||
manualThrottledTags.erase(manualItr);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
auto autoItr = autoThrottledTags.find(tagItr->first);
|
||||
if (autoItr != autoThrottledTags.end()) {
|
||||
Optional<double> autoClientRate = autoItr->second.updateAndGetClientRate(requestRate);
|
||||
if (autoClientRate.present()) {
|
||||
double adjustedRate = autoClientRate.get();
|
||||
double rampStartTime = autoItr->second.lastReduced + SERVER_KNOBS->AUTO_TAG_THROTTLE_DURATION -
|
||||
SERVER_KNOBS->AUTO_TAG_THROTTLE_RAMP_UP_TIME;
|
||||
if (now() >= rampStartTime && adjustedRate != std::numeric_limits<double>::max()) {
|
||||
TEST(true); // Tag auto-throttle ramping up
|
||||
|
||||
double targetBusyness = SERVER_KNOBS->AUTO_THROTTLE_TARGET_TAG_BUSYNESS;
|
||||
if (targetBusyness == 0) {
|
||||
targetBusyness = 0.01;
|
||||
}
|
||||
|
||||
double rampLocation = (now() - rampStartTime) / SERVER_KNOBS->AUTO_TAG_THROTTLE_RAMP_UP_TIME;
|
||||
adjustedRate =
|
||||
computeTargetTpsRate(targetBusyness, pow(targetBusyness, 1 - rampLocation), adjustedRate);
|
||||
}
|
||||
|
||||
tagPresent = true;
|
||||
if (autoThrottlingEnabled) {
|
||||
auto result = clientRates[TransactionPriority::DEFAULT].try_emplace(
|
||||
tagItr->first, adjustedRate, autoItr->second.limits.expiration);
|
||||
if (!result.second && result.first->second.tpsRate > adjustedRate) {
|
||||
result.first->second =
|
||||
ClientTagThrottleLimits(adjustedRate, autoItr->second.limits.expiration);
|
||||
} else {
|
||||
TEST(true); // Auto throttle overriden by manual throttle
|
||||
}
|
||||
clientRates[TransactionPriority::BATCH][tagItr->first] =
|
||||
ClientTagThrottleLimits(0, autoItr->second.limits.expiration);
|
||||
}
|
||||
} else {
|
||||
ASSERT(autoItr->second.limits.expiration <= now());
|
||||
TEST(true); // Auto throttle expired
|
||||
if (BUGGIFY) { // Temporarily extend the window between expiration and cleanup
|
||||
tagPresent = true;
|
||||
} else {
|
||||
autoThrottledTags.erase(autoItr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!tagPresent) {
|
||||
TEST(true); // All tag throttles expired
|
||||
tagItr = tagData.erase(tagItr);
|
||||
} else {
|
||||
++tagItr;
|
||||
}
|
||||
}
|
||||
|
||||
return clientRates;
|
||||
}
|
||||
|
||||
void addRequests(TransactionTag const& tag, int requests) {
|
||||
if (requests > 0) {
|
||||
TEST(true); // Requests reported for throttled tag
|
||||
|
||||
auto tagItr = tagData.try_emplace(tag);
|
||||
tagItr.first->second.requestRate.addDelta(requests);
|
||||
|
||||
double requestRate = tagItr.first->second.requestRate.smoothRate();
|
||||
|
||||
auto autoItr = autoThrottledTags.find(tag);
|
||||
if (autoItr != autoThrottledTags.end()) {
|
||||
autoItr->second.updateAndGetClientRate(requestRate);
|
||||
}
|
||||
|
||||
auto manualItr = manualThrottledTags.find(tag);
|
||||
if (manualItr != manualThrottledTags.end()) {
|
||||
for (auto priorityItr = manualItr->second.begin(); priorityItr != manualItr->second.end();
|
||||
++priorityItr) {
|
||||
priorityItr->second.updateAndGetClientRate(requestRate);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Optional<double> getRequestRate(TransactionTag const& tag) {
|
||||
auto itr = tagData.find(tag);
|
||||
if (itr != tagData.end()) {
|
||||
return itr->second.requestRate.smoothRate();
|
||||
}
|
||||
return Optional<double>();
|
||||
}
|
||||
|
||||
int64_t autoThrottleCount() const { return autoThrottledTags.size(); }
|
||||
|
||||
int64_t manualThrottleCount() const {
|
||||
int64_t count = 0;
|
||||
for (auto itr = manualThrottledTags.begin(); itr != manualThrottledTags.end(); ++itr) {
|
||||
count += itr->second.size();
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
TransactionTagMap<RkTagThrottleData> autoThrottledTags;
|
||||
TransactionTagMap<std::map<TransactionPriority, RkTagThrottleData>> manualThrottledTags;
|
||||
TransactionTagMap<RkTagData> tagData;
|
||||
uint32_t busyReadTagCount = 0, busyWriteTagCount = 0;
|
||||
};
|
||||
|
||||
class TagThrottlerImpl {
|
||||
Database db;
|
||||
UID id;
|
||||
RkTagThrottleCollection throttledTags;
|
||||
uint64_t throttledTagChangeId{ 0 };
|
||||
bool autoThrottlingEnabled{ false };
|
||||
|
||||
ACTOR static Future<Void> monitorThrottlingChanges(TagThrottlerImpl* self) {
|
||||
state bool committed = false;
|
||||
loop {
|
||||
state ReadYourWritesTransaction tr(self->db);
|
||||
|
||||
loop {
|
||||
try {
|
||||
tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||
tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
|
||||
|
||||
state Future<RangeResult> throttledTagKeys = tr.getRange(tagThrottleKeys, CLIENT_KNOBS->TOO_MANY);
|
||||
state Future<Optional<Value>> autoThrottlingEnabled = tr.get(tagThrottleAutoEnabledKey);
|
||||
|
||||
if (!committed) {
|
||||
BinaryWriter limitWriter(Unversioned());
|
||||
limitWriter << SERVER_KNOBS->MAX_MANUAL_THROTTLED_TRANSACTION_TAGS;
|
||||
tr.set(tagThrottleLimitKey, limitWriter.toValue());
|
||||
}
|
||||
|
||||
wait(success(throttledTagKeys) && success(autoThrottlingEnabled));
|
||||
|
||||
if (autoThrottlingEnabled.get().present() &&
|
||||
autoThrottlingEnabled.get().get() == LiteralStringRef("0")) {
|
||||
TEST(true); // Auto-throttling disabled
|
||||
if (self->autoThrottlingEnabled) {
|
||||
TraceEvent("AutoTagThrottlingDisabled", self->id).log();
|
||||
}
|
||||
self->autoThrottlingEnabled = false;
|
||||
} else if (autoThrottlingEnabled.get().present() &&
|
||||
autoThrottlingEnabled.get().get() == LiteralStringRef("1")) {
|
||||
TEST(true); // Auto-throttling enabled
|
||||
if (!self->autoThrottlingEnabled) {
|
||||
TraceEvent("AutoTagThrottlingEnabled", self->id).log();
|
||||
}
|
||||
self->autoThrottlingEnabled = true;
|
||||
} else {
|
||||
TEST(true); // Auto-throttling unspecified
|
||||
if (autoThrottlingEnabled.get().present()) {
|
||||
TraceEvent(SevWarnAlways, "InvalidAutoTagThrottlingValue", self->id)
|
||||
.detail("Value", autoThrottlingEnabled.get().get());
|
||||
}
|
||||
self->autoThrottlingEnabled = SERVER_KNOBS->AUTO_TAG_THROTTLING_ENABLED;
|
||||
if (!committed)
|
||||
tr.set(tagThrottleAutoEnabledKey,
|
||||
LiteralStringRef(self->autoThrottlingEnabled ? "1" : "0"));
|
||||
}
|
||||
|
||||
RkTagThrottleCollection updatedTagThrottles;
|
||||
|
||||
TraceEvent("RatekeeperReadThrottledTags", self->id)
|
||||
.detail("NumThrottledTags", throttledTagKeys.get().size());
|
||||
for (auto entry : throttledTagKeys.get()) {
|
||||
TagThrottleKey tagKey = TagThrottleKey::fromKey(entry.key);
|
||||
TagThrottleValue tagValue = TagThrottleValue::fromValue(entry.value);
|
||||
|
||||
ASSERT(tagKey.tags.size() == 1); // Currently, only 1 tag per throttle is supported
|
||||
|
||||
if (tagValue.expirationTime == 0 ||
|
||||
tagValue.expirationTime > now() + tagValue.initialDuration) {
|
||||
TEST(true); // Converting tag throttle duration to absolute time
|
||||
tagValue.expirationTime = now() + tagValue.initialDuration;
|
||||
BinaryWriter wr(IncludeVersion(ProtocolVersion::withTagThrottleValueReason()));
|
||||
wr << tagValue;
|
||||
state Value value = wr.toValue();
|
||||
|
||||
tr.set(entry.key, value);
|
||||
}
|
||||
|
||||
if (tagValue.expirationTime > now()) {
|
||||
TransactionTag tag = *tagKey.tags.begin();
|
||||
Optional<ClientTagThrottleLimits> oldLimits =
|
||||
self->throttledTags.getManualTagThrottleLimits(tag, tagKey.priority);
|
||||
|
||||
if (tagKey.throttleType == TagThrottleType::AUTO) {
|
||||
updatedTagThrottles.autoThrottleTag(
|
||||
self->id, tag, 0, tagValue.tpsRate, tagValue.expirationTime);
|
||||
if (tagValue.reason == TagThrottledReason::BUSY_READ) {
|
||||
updatedTagThrottles.busyReadTagCount++;
|
||||
} else if (tagValue.reason == TagThrottledReason::BUSY_WRITE) {
|
||||
updatedTagThrottles.busyWriteTagCount++;
|
||||
}
|
||||
} else {
|
||||
updatedTagThrottles.manualThrottleTag(self->id,
|
||||
tag,
|
||||
tagKey.priority,
|
||||
tagValue.tpsRate,
|
||||
tagValue.expirationTime,
|
||||
oldLimits);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
self->throttledTags = std::move(updatedTagThrottles);
|
||||
++self->throttledTagChangeId;
|
||||
|
||||
state Future<Void> watchFuture = tr.watch(tagThrottleSignalKey);
|
||||
wait(tr.commit());
|
||||
committed = true;
|
||||
|
||||
wait(watchFuture);
|
||||
TraceEvent("RatekeeperThrottleSignaled", self->id).log();
|
||||
TEST(true); // Tag throttle changes detected
|
||||
break;
|
||||
} catch (Error& e) {
|
||||
TraceEvent("RatekeeperMonitorThrottlingChangesError", self->id).error(e);
|
||||
wait(tr.onError(e));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Optional<double> autoThrottleTag(UID id, TransactionTag tag, double busyness) {
|
||||
return throttledTags.autoThrottleTag(id, tag, busyness);
|
||||
}
|
||||
|
||||
Future<Void> tryAutoThrottleTag(TransactionTag tag, double rate, double busyness, TagThrottledReason reason) {
|
||||
// NOTE: before the comparison with MIN_TAG_COST, the busiest tag rate also compares with MIN_TAG_PAGES_RATE
|
||||
// currently MIN_TAG_PAGES_RATE > MIN_TAG_COST in our default knobs.
|
||||
if (busyness > SERVER_KNOBS->AUTO_THROTTLE_TARGET_TAG_BUSYNESS && rate > SERVER_KNOBS->MIN_TAG_COST) {
|
||||
TEST(true); // Transaction tag auto-throttled
|
||||
Optional<double> clientRate = autoThrottleTag(id, tag, busyness);
|
||||
if (clientRate.present()) {
|
||||
TagSet tags;
|
||||
tags.addTag(tag);
|
||||
|
||||
Reference<DatabaseContext> dbRef = Reference<DatabaseContext>::addRef(db.getPtr());
|
||||
return ThrottleApi::throttleTags(dbRef,
|
||||
tags,
|
||||
clientRate.get(),
|
||||
SERVER_KNOBS->AUTO_TAG_THROTTLE_DURATION,
|
||||
TagThrottleType::AUTO,
|
||||
TransactionPriority::DEFAULT,
|
||||
now() + SERVER_KNOBS->AUTO_TAG_THROTTLE_DURATION,
|
||||
reason);
|
||||
}
|
||||
}
|
||||
return Void();
|
||||
}
|
||||
|
||||
public:
|
||||
TagThrottlerImpl(Database db, UID id) : db(db), id(id) {}
|
||||
Future<Void> monitorThrottlingChanges() { return monitorThrottlingChanges(this); }
|
||||
|
||||
void addRequests(TransactionTag tag, int count) { throttledTags.addRequests(tag, count); }
|
||||
uint64_t getThrottledTagChangeId() const { return throttledTagChangeId; }
|
||||
PrioritizedTransactionTagMap<ClientTagThrottleLimits> getClientRates() {
|
||||
return throttledTags.getClientRates(autoThrottlingEnabled);
|
||||
}
|
||||
int64_t autoThrottleCount() const { return throttledTags.autoThrottleCount(); }
|
||||
uint32_t busyReadTagCount() const { return throttledTags.busyReadTagCount; }
|
||||
uint32_t busyWriteTagCount() const { return throttledTags.busyWriteTagCount; }
|
||||
int64_t manualThrottleCount() const { return throttledTags.manualThrottleCount(); }
|
||||
bool isAutoThrottlingEnabled() const { return autoThrottlingEnabled; }
|
||||
|
||||
Future<Void> tryAutoThrottleTag(StorageQueueInfo& ss, int64_t storageQueue, int64_t storageDurabilityLag) {
|
||||
// NOTE: we just keep it simple and don't differentiate write-saturation and read-saturation at the moment. In
|
||||
// most of situation, this works. More indicators besides queue size and durability lag could be investigated in
|
||||
// the future
|
||||
if (storageQueue > SERVER_KNOBS->AUTO_TAG_THROTTLE_STORAGE_QUEUE_BYTES ||
|
||||
storageDurabilityLag > SERVER_KNOBS->AUTO_TAG_THROTTLE_DURABILITY_LAG_VERSIONS) {
|
||||
if (ss.busiestWriteTag.present()) {
|
||||
return tryAutoThrottleTag(ss.busiestWriteTag.get(),
|
||||
ss.busiestWriteTagRate,
|
||||
ss.busiestWriteTagFractionalBusyness,
|
||||
TagThrottledReason::BUSY_WRITE);
|
||||
}
|
||||
if (ss.busiestReadTag.present()) {
|
||||
return tryAutoThrottleTag(ss.busiestReadTag.get(),
|
||||
ss.busiestReadTagRate,
|
||||
ss.busiestReadTagFractionalBusyness,
|
||||
TagThrottledReason::BUSY_READ);
|
||||
}
|
||||
}
|
||||
return Void();
|
||||
}
|
||||
|
||||
}; // class TagThrottlerImpl
|
||||
|
||||
TagThrottler::TagThrottler(Database db, UID id) : impl(PImpl<TagThrottlerImpl>::create(db, id)) {}
|
||||
TagThrottler::~TagThrottler() = default;
|
||||
Future<Void> TagThrottler::monitorThrottlingChanges() {
|
||||
return impl->monitorThrottlingChanges();
|
||||
}
|
||||
void TagThrottler::addRequests(TransactionTag tag, int count) {
|
||||
impl->addRequests(tag, count);
|
||||
}
|
||||
uint64_t TagThrottler::getThrottledTagChangeId() const {
|
||||
return impl->getThrottledTagChangeId();
|
||||
}
|
||||
PrioritizedTransactionTagMap<ClientTagThrottleLimits> TagThrottler::getClientRates() {
|
||||
return impl->getClientRates();
|
||||
}
|
||||
int64_t TagThrottler::autoThrottleCount() const {
|
||||
return impl->autoThrottleCount();
|
||||
}
|
||||
uint32_t TagThrottler::busyReadTagCount() const {
|
||||
return impl->busyReadTagCount();
|
||||
}
|
||||
uint32_t TagThrottler::busyWriteTagCount() const {
|
||||
return impl->busyWriteTagCount();
|
||||
}
|
||||
int64_t TagThrottler::manualThrottleCount() const {
|
||||
return impl->manualThrottleCount();
|
||||
}
|
||||
bool TagThrottler::isAutoThrottlingEnabled() const {
|
||||
return impl->isAutoThrottlingEnabled();
|
||||
}
|
||||
Future<Void> TagThrottler::tryAutoThrottleTag(StorageQueueInfo& ss,
|
||||
int64_t storageQueue,
|
||||
int64_t storageDurabilityLag) {
|
||||
return impl->tryAutoThrottleTag(ss, storageQueue, storageDurabilityLag);
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
/*
|
||||
* TagThrottler.h
|
||||
*
|
||||
* This source file is part of the FoundationDB open source project
|
||||
*
|
||||
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "fdbclient/PImpl.h"
|
||||
#include "fdbserver/Ratekeeper.h"
|
||||
|
||||
class TagThrottler {
|
||||
PImpl<class TagThrottlerImpl> impl;
|
||||
|
||||
public:
|
||||
TagThrottler(Database db, UID id);
|
||||
~TagThrottler();
|
||||
Future<Void> monitorThrottlingChanges();
|
||||
void addRequests(TransactionTag tag, int count);
|
||||
uint64_t getThrottledTagChangeId() const;
|
||||
PrioritizedTransactionTagMap<ClientTagThrottleLimits> getClientRates();
|
||||
int64_t autoThrottleCount() const;
|
||||
uint32_t busyReadTagCount() const;
|
||||
uint32_t busyWriteTagCount() const;
|
||||
int64_t manualThrottleCount() const;
|
||||
bool isAutoThrottlingEnabled() const;
|
||||
Future<Void> tryAutoThrottleTag(StorageQueueInfo&, int64_t storageQueue, int64_t storageDurabilityLag);
|
||||
};
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue