Merge branch 'main' of github.com:apple/foundationdb into tenant-list-filter
This commit is contained in:
commit
886c286297
|
@ -59,6 +59,8 @@
|
|||
#include "shm.hpp"
|
||||
#include "stats.hpp"
|
||||
#include "time.hpp"
|
||||
#include "rapidjson/document.h"
|
||||
#include "rapidjson/error/en.h"
|
||||
|
||||
namespace mako {
|
||||
|
||||
|
@ -88,14 +90,29 @@ Transaction createNewTransaction(Database db, Arguments const& args, int id = -1
|
|||
}
|
||||
// Create Tenant Transaction
|
||||
int tenant_id = (id == -1) ? urand(0, args.active_tenants - 1) : id;
|
||||
Transaction tr;
|
||||
std::string tenantStr;
|
||||
// If provided tenants array, use it
|
||||
if (tenants) {
|
||||
return tenants[tenant_id].createTransaction();
|
||||
tr = tenants[tenant_id].createTransaction();
|
||||
} else {
|
||||
tenantStr = "tenant" + std::to_string(tenant_id);
|
||||
BytesRef tenant_name = toBytesRef(tenantStr);
|
||||
Tenant t = db.openTenant(tenant_name);
|
||||
tr = t.createTransaction();
|
||||
}
|
||||
std::string tenantStr = "tenant" + std::to_string(tenant_id);
|
||||
BytesRef tenant_name = toBytesRef(tenantStr);
|
||||
Tenant t = db.openTenant(tenant_name);
|
||||
return t.createTransaction();
|
||||
if (!args.authorization_tokens.empty()) {
|
||||
// lookup token based on tenant name and, if found, set authz token to transaction
|
||||
if (tenantStr.empty())
|
||||
tenantStr = "tenant" + std::to_string(tenant_id);
|
||||
auto tokenMapItr = args.authorization_tokens.find(tenantStr);
|
||||
if (tokenMapItr != args.authorization_tokens.end()) {
|
||||
tr.setOption(FDB_TR_OPTION_AUTHORIZATION_TOKEN, tokenMapItr->second);
|
||||
} else {
|
||||
logr.warn("Authorization token map is not empty, but could not find token for tenant '{}'", tenantStr);
|
||||
}
|
||||
}
|
||||
return tr;
|
||||
}
|
||||
|
||||
uint64_t byteswapHelper(uint64_t input) {
|
||||
|
@ -815,6 +832,18 @@ int workerProcessMain(Arguments const& args, int worker_id, shared_memory::Acces
|
|||
logr.error("network::setOption(FDB_NET_OPTION_DISTRIBUTED_CLIENT_TRACER): {}", err.what());
|
||||
}
|
||||
|
||||
if (args.tls_certificate_file.has_value()) {
|
||||
network::setOption(FDB_NET_OPTION_TLS_CERT_PATH, args.tls_certificate_file.value());
|
||||
}
|
||||
|
||||
if (args.tls_key_file.has_value()) {
|
||||
network::setOption(FDB_NET_OPTION_TLS_KEY_PATH, args.tls_key_file.value());
|
||||
}
|
||||
|
||||
if (args.tls_ca_file.has_value()) {
|
||||
network::setOption(FDB_NET_OPTION_TLS_CA_PATH, args.tls_ca_file.value());
|
||||
}
|
||||
|
||||
/* enable flatbuffers if specified */
|
||||
if (args.flatbuffers) {
|
||||
#ifdef FDB_NET_OPTION_USE_FLATBUFFERS
|
||||
|
@ -982,57 +1011,55 @@ int workerProcessMain(Arguments const& args, int worker_id, shared_memory::Acces
|
|||
}
|
||||
|
||||
/* initialize the parameters with default values */
|
||||
int initArguments(Arguments& args) {
|
||||
memset(&args, 0, sizeof(Arguments)); /* zero-out everything */
|
||||
args.num_fdb_clusters = 0;
|
||||
args.num_databases = 1;
|
||||
args.api_version = maxApiVersion();
|
||||
args.json = 0;
|
||||
args.num_processes = 1;
|
||||
args.num_threads = 1;
|
||||
args.async_xacts = 0;
|
||||
args.mode = MODE_INVALID;
|
||||
args.rows = 100000;
|
||||
args.load_factor = 1.0;
|
||||
args.row_digits = digits(args.rows);
|
||||
args.seconds = 30;
|
||||
args.iteration = 0;
|
||||
args.tpsmax = 0;
|
||||
args.tpsmin = -1;
|
||||
args.tpsinterval = 10;
|
||||
args.tpschange = TPS_SIN;
|
||||
args.sampling = 1000;
|
||||
args.key_length = 32;
|
||||
args.value_length = 16;
|
||||
args.active_tenants = 0;
|
||||
args.total_tenants = 0;
|
||||
args.tenant_batch_size = 10000;
|
||||
args.zipf = 0;
|
||||
args.commit_get = 0;
|
||||
args.verbose = 1;
|
||||
args.flatbuffers = 0; /* internal */
|
||||
args.knobs[0] = '\0';
|
||||
args.log_group[0] = '\0';
|
||||
args.prefixpadding = 0;
|
||||
args.trace = 0;
|
||||
args.tracepath[0] = '\0';
|
||||
args.traceformat = 0; /* default to client's default (XML) */
|
||||
args.streaming_mode = FDB_STREAMING_MODE_WANT_ALL;
|
||||
args.txntrace = 0;
|
||||
args.txntagging = 0;
|
||||
memset(args.txntagging_prefix, 0, TAGPREFIXLENGTH_MAX);
|
||||
Arguments::Arguments() {
|
||||
num_fdb_clusters = 0;
|
||||
num_databases = 1;
|
||||
api_version = maxApiVersion();
|
||||
json = 0;
|
||||
num_processes = 1;
|
||||
num_threads = 1;
|
||||
async_xacts = 0;
|
||||
mode = MODE_INVALID;
|
||||
rows = 100000;
|
||||
load_factor = 1.0;
|
||||
row_digits = digits(rows);
|
||||
seconds = 30;
|
||||
iteration = 0;
|
||||
tpsmax = 0;
|
||||
tpsmin = -1;
|
||||
tpsinterval = 10;
|
||||
tpschange = TPS_SIN;
|
||||
sampling = 1000;
|
||||
key_length = 32;
|
||||
value_length = 16;
|
||||
active_tenants = 0;
|
||||
total_tenants = 0;
|
||||
tenant_batch_size = 10000;
|
||||
zipf = 0;
|
||||
commit_get = 0;
|
||||
verbose = 1;
|
||||
flatbuffers = 0; /* internal */
|
||||
knobs[0] = '\0';
|
||||
log_group[0] = '\0';
|
||||
prefixpadding = 0;
|
||||
trace = 0;
|
||||
tracepath[0] = '\0';
|
||||
traceformat = 0; /* default to client's default (XML) */
|
||||
streaming_mode = FDB_STREAMING_MODE_WANT_ALL;
|
||||
txntrace = 0;
|
||||
txntagging = 0;
|
||||
memset(txntagging_prefix, 0, TAGPREFIXLENGTH_MAX);
|
||||
for (auto i = 0; i < MAX_OP; i++) {
|
||||
args.txnspec.ops[i][OP_COUNT] = 0;
|
||||
txnspec.ops[i][OP_COUNT] = 0;
|
||||
}
|
||||
args.client_threads_per_version = 0;
|
||||
args.disable_client_bypass = false;
|
||||
args.disable_ryw = 0;
|
||||
args.json_output_path[0] = '\0';
|
||||
args.stats_export_path[0] = '\0';
|
||||
args.bg_materialize_files = false;
|
||||
args.bg_file_path[0] = '\0';
|
||||
args.distributed_tracer_client = 0;
|
||||
return 0;
|
||||
client_threads_per_version = 0;
|
||||
disable_client_bypass = false;
|
||||
disable_ryw = 0;
|
||||
json_output_path[0] = '\0';
|
||||
stats_export_path[0] = '\0';
|
||||
bg_materialize_files = false;
|
||||
bg_file_path[0] = '\0';
|
||||
distributed_tracer_client = 0;
|
||||
}
|
||||
|
||||
/* parse transaction specification */
|
||||
|
@ -1279,6 +1306,10 @@ int parseArguments(int argc, char* argv[], Arguments& args) {
|
|||
{ "bg_file_path", required_argument, NULL, ARG_BG_FILE_PATH },
|
||||
{ "stats_export_path", optional_argument, NULL, ARG_EXPORT_PATH },
|
||||
{ "distributed_tracer_client", required_argument, NULL, ARG_DISTRIBUTED_TRACER_CLIENT },
|
||||
{ "tls_certificate_file", required_argument, NULL, ARG_TLS_CERTIFICATE_FILE },
|
||||
{ "tls_key_file", required_argument, NULL, ARG_TLS_KEY_FILE },
|
||||
{ "tls_ca_file", required_argument, NULL, ARG_TLS_CA_FILE },
|
||||
{ "authorization_token_file", required_argument, NULL, ARG_AUTHORIZATION_TOKEN_FILE },
|
||||
{ NULL, 0, NULL, 0 }
|
||||
};
|
||||
idx = 0;
|
||||
|
@ -1515,6 +1546,45 @@ int parseArguments(int argc, char* argv[], Arguments& args) {
|
|||
args.distributed_tracer_client = -1;
|
||||
}
|
||||
break;
|
||||
case ARG_TLS_CERTIFICATE_FILE:
|
||||
args.tls_certificate_file = std::string(optarg);
|
||||
break;
|
||||
case ARG_TLS_KEY_FILE:
|
||||
args.tls_key_file = std::string(optarg);
|
||||
break;
|
||||
case ARG_TLS_CA_FILE:
|
||||
args.tls_ca_file = std::string(optarg);
|
||||
break;
|
||||
case ARG_AUTHORIZATION_TOKEN_FILE: {
|
||||
std::string tokenFilename(optarg);
|
||||
std::ifstream ifs(tokenFilename);
|
||||
std::ostringstream oss;
|
||||
oss << ifs.rdbuf();
|
||||
rapidjson::Document d;
|
||||
d.Parse(oss.str().c_str());
|
||||
if (d.HasParseError()) {
|
||||
logr.error("Failed to parse authorization token JSON file '{}': {} at offset {}",
|
||||
tokenFilename,
|
||||
GetParseError_En(d.GetParseError()),
|
||||
d.GetErrorOffset());
|
||||
return -1;
|
||||
} else if (!d.IsObject()) {
|
||||
logr.error("Authorization token JSON file '{}' must contain a JSON object", tokenFilename);
|
||||
return -1;
|
||||
}
|
||||
for (auto itr = d.MemberBegin(); itr != d.MemberEnd(); ++itr) {
|
||||
if (!itr->value.IsString()) {
|
||||
logr.error("Token '{}' is not a string", itr->name.GetString());
|
||||
return -1;
|
||||
}
|
||||
args.authorization_tokens.insert_or_assign(
|
||||
std::string(itr->name.GetString(), itr->name.GetStringLength()),
|
||||
std::string(itr->value.GetString(), itr->value.GetStringLength()));
|
||||
}
|
||||
logr.info("Added {} tenant authorization tokens to map from file '{}'",
|
||||
args.authorization_tokens.size(),
|
||||
tokenFilename);
|
||||
} break;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1525,93 +1595,97 @@ int parseArguments(int argc, char* argv[], Arguments& args) {
|
|||
return 0;
|
||||
}
|
||||
|
||||
int validateArguments(Arguments const& args) {
|
||||
if (args.mode == MODE_INVALID) {
|
||||
int Arguments::validate() {
|
||||
if (mode == MODE_INVALID) {
|
||||
logr.error("--mode has to be set");
|
||||
return -1;
|
||||
}
|
||||
if (args.verbose < VERBOSE_NONE || args.verbose > VERBOSE_DEBUG) {
|
||||
if (verbose < VERBOSE_NONE || verbose > VERBOSE_DEBUG) {
|
||||
logr.error("--verbose must be between 0 and 3");
|
||||
return -1;
|
||||
}
|
||||
if (args.rows <= 0) {
|
||||
if (rows <= 0) {
|
||||
logr.error("--rows must be a positive integer");
|
||||
return -1;
|
||||
}
|
||||
if (args.load_factor <= 0 || args.load_factor > 1) {
|
||||
if (load_factor <= 0 || load_factor > 1) {
|
||||
logr.error("--load_factor must be in range (0, 1]");
|
||||
return -1;
|
||||
}
|
||||
if (args.key_length < 0) {
|
||||
if (key_length < 0) {
|
||||
logr.error("--keylen must be a positive integer");
|
||||
return -1;
|
||||
}
|
||||
if (args.value_length < 0) {
|
||||
if (value_length < 0) {
|
||||
logr.error("--vallen must be a positive integer");
|
||||
return -1;
|
||||
}
|
||||
if (args.num_fdb_clusters > NUM_CLUSTERS_MAX) {
|
||||
if (num_fdb_clusters > NUM_CLUSTERS_MAX) {
|
||||
logr.error("Mako is not supported to do work to more than {} clusters", NUM_CLUSTERS_MAX);
|
||||
return -1;
|
||||
}
|
||||
if (args.num_databases > NUM_DATABASES_MAX) {
|
||||
if (num_databases > NUM_DATABASES_MAX) {
|
||||
logr.error("Mako is not supported to do work to more than {} databases", NUM_DATABASES_MAX);
|
||||
return -1;
|
||||
}
|
||||
if (args.num_databases < args.num_fdb_clusters) {
|
||||
logr.error("--num_databases ({}) must be >= number of clusters({})", args.num_databases, args.num_fdb_clusters);
|
||||
if (num_databases < num_fdb_clusters) {
|
||||
logr.error("--num_databases ({}) must be >= number of clusters({})", num_databases, num_fdb_clusters);
|
||||
return -1;
|
||||
}
|
||||
if (args.num_threads < args.num_databases) {
|
||||
logr.error("--threads ({}) must be >= number of databases ({})", args.num_threads, args.num_databases);
|
||||
if (num_threads < num_databases) {
|
||||
logr.error("--threads ({}) must be >= number of databases ({})", num_threads, num_databases);
|
||||
return -1;
|
||||
}
|
||||
if (args.key_length < 4 /* "mako" */ + args.row_digits) {
|
||||
if (key_length < 4 /* "mako" */ + row_digits) {
|
||||
logr.error("--keylen must be larger than {} to store \"mako\" prefix "
|
||||
"and maximum row number",
|
||||
4 + args.row_digits);
|
||||
4 + row_digits);
|
||||
return -1;
|
||||
}
|
||||
if (args.active_tenants > args.total_tenants) {
|
||||
if (active_tenants > total_tenants) {
|
||||
logr.error("--active_tenants must be less than or equal to --total_tenants");
|
||||
return -1;
|
||||
}
|
||||
if (args.tenant_batch_size < 1) {
|
||||
if (tenant_batch_size < 1) {
|
||||
logr.error("--tenant_batch_size must be at least 1");
|
||||
return -1;
|
||||
}
|
||||
if (args.mode == MODE_RUN) {
|
||||
if ((args.seconds > 0) && (args.iteration > 0)) {
|
||||
if (mode == MODE_RUN) {
|
||||
if ((seconds > 0) && (iteration > 0)) {
|
||||
logr.error("Cannot specify seconds and iteration together");
|
||||
return -1;
|
||||
}
|
||||
if ((args.seconds == 0) && (args.iteration == 0)) {
|
||||
if ((seconds == 0) && (iteration == 0)) {
|
||||
logr.error("Must specify either seconds or iteration");
|
||||
return -1;
|
||||
}
|
||||
if (args.txntagging < 0) {
|
||||
if (txntagging < 0) {
|
||||
logr.error("--txntagging must be a non-negative integer");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
// ensure that all of the files provided to mako are valid and exist
|
||||
if (args.mode == MODE_REPORT) {
|
||||
if (!args.num_report_files) {
|
||||
if (mode == MODE_REPORT) {
|
||||
if (!num_report_files) {
|
||||
logr.error("No files to merge");
|
||||
}
|
||||
for (int i = 0; i < args.num_report_files; i++) {
|
||||
for (int i = 0; i < num_report_files; i++) {
|
||||
struct stat buffer;
|
||||
if (stat(args.report_files[i], &buffer) != 0) {
|
||||
logr.error("Couldn't open file {}", args.report_files[i]);
|
||||
if (stat(report_files[i], &buffer) != 0) {
|
||||
logr.error("Couldn't open file {}", report_files[i]);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (args.distributed_tracer_client < 0) {
|
||||
logr.error("--disibuted_tracer_client must specify either (disabled, network_lossy, log_file)");
|
||||
if (distributed_tracer_client < 0) {
|
||||
logr.error("--distributed_tracer_client must specify either (disabled, network_lossy, log_file)");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (!authorization_tokens.empty() && !tls_ca_file.has_value()) {
|
||||
logr.warn("Authorization tokens are being used without explicit TLS CA file configured");
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -2262,11 +2336,6 @@ int main(int argc, char* argv[]) {
|
|||
|
||||
auto rc = int{};
|
||||
auto args = Arguments{};
|
||||
rc = initArguments(args);
|
||||
if (rc < 0) {
|
||||
logr.error("initArguments failed");
|
||||
return -1;
|
||||
}
|
||||
rc = parseArguments(argc, argv, args);
|
||||
if (rc < 0) {
|
||||
/* usage printed */
|
||||
|
@ -2282,7 +2351,7 @@ int main(int argc, char* argv[]) {
|
|||
args.total_tenants = args.active_tenants;
|
||||
}
|
||||
|
||||
rc = validateArguments(args);
|
||||
rc = args.validate();
|
||||
if (rc < 0)
|
||||
return -1;
|
||||
logr.setVerbosity(args.verbose);
|
||||
|
|
|
@ -30,6 +30,7 @@
|
|||
#include <cassert>
|
||||
#include <chrono>
|
||||
#include <list>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <string_view>
|
||||
#include <fdb_api.hpp>
|
||||
|
@ -79,7 +80,11 @@ enum ArgKind {
|
|||
ARG_JSON_REPORT,
|
||||
ARG_BG_FILE_PATH, // if blob granule files are stored locally, mako will read and materialize them if this is set
|
||||
ARG_EXPORT_PATH,
|
||||
ARG_DISTRIBUTED_TRACER_CLIENT
|
||||
ARG_DISTRIBUTED_TRACER_CLIENT,
|
||||
ARG_TLS_CERTIFICATE_FILE,
|
||||
ARG_TLS_KEY_FILE,
|
||||
ARG_TLS_CA_FILE,
|
||||
ARG_AUTHORIZATION_TOKEN_FILE,
|
||||
};
|
||||
|
||||
constexpr const int OP_COUNT = 0;
|
||||
|
@ -131,6 +136,9 @@ constexpr const int MAX_REPORT_FILES = 200;
|
|||
|
||||
/* benchmark parameters */
|
||||
struct Arguments {
|
||||
Arguments();
|
||||
int validate();
|
||||
|
||||
int api_version;
|
||||
int json;
|
||||
int num_processes;
|
||||
|
@ -180,6 +188,10 @@ struct Arguments {
|
|||
char report_files[MAX_REPORT_FILES][PATH_MAX];
|
||||
int num_report_files;
|
||||
int distributed_tracer_client;
|
||||
std::optional<std::string> tls_certificate_file;
|
||||
std::optional<std::string> tls_key_file;
|
||||
std::optional<std::string> tls_ca_file;
|
||||
std::map<std::string, std::string> authorization_tokens; // maps tenant name to token string
|
||||
};
|
||||
|
||||
} // namespace mako
|
||||
|
|
|
@ -38,7 +38,7 @@ Arguments
|
|||
| - ``build``: Populate data
|
||||
| - ``run``: Run the benchmark
|
||||
|
||||
- | ``-c | --cluster <cluster file>``
|
||||
- | ``-c | --cluster <cluster_file>``
|
||||
| FDB cluster files (Required, comma-separated)
|
||||
|
||||
- | ``-d | --num_databases <num_databases>``
|
||||
|
@ -125,9 +125,21 @@ Arguments
|
|||
| Disable snapshot read-your-writes
|
||||
|
||||
- | ``--json_report`` defaults to ``mako.json``
|
||||
| ``--json_report=PATH``
|
||||
| ``--json_report <path>``
|
||||
| Output stats to the specified json file
|
||||
|
||||
- | ``--tls_certificate_file <path>``
|
||||
| Use TLS certificate located in ``<path>``
|
||||
|
||||
- | ``--tls_key_file <path>``
|
||||
| Use TLS key file located in ``<path>``
|
||||
|
||||
- | ``--tls_ca_file <path>``
|
||||
| Use TLS CA file located in ``<path>``
|
||||
|
||||
- | ``--authorization_token_file <path>``
|
||||
| Use authorization token JSON file located in ``<path>``
|
||||
| Expected content is a JSON object where each key is a tenant name and the mapped value is a token string
|
||||
|
||||
Transaction Specification
|
||||
=========================
|
||||
|
|
|
@ -76,38 +76,11 @@ function(generate_coverage_xml)
|
|||
add_dependencies(coverage_${target_name} coveragetool)
|
||||
endfunction()
|
||||
|
||||
# This function asserts that `versions.h` does not exist in the source
|
||||
# directory. It does this in the prebuild phase of the target.
|
||||
# This is an ugly hack that should make sure that cmake isn't used with
|
||||
# a source directory in which FDB was previously built with `make`.
|
||||
function(assert_no_version_h target)
|
||||
|
||||
message(STATUS "Check versions.h on ${target}")
|
||||
set(target_name "${target}_versions_h_check")
|
||||
|
||||
if (DEFINED ENV{VERBOSE})
|
||||
add_custom_target("${target_name}"
|
||||
COMMAND "${CMAKE_COMMAND}" -DFILE="${CMAKE_SOURCE_DIR}/versions.h"
|
||||
-P "${CMAKE_SOURCE_DIR}/cmake/AssertFileDoesntExist.cmake"
|
||||
COMMAND echo
|
||||
"${CMAKE_COMMAND}" -P "${CMAKE_SOURCE_DIR}/cmake/AssertFileDoesntExist.cmake"
|
||||
-DFILE="${CMAKE_SOURCE_DIR}/versions.h"
|
||||
COMMENT "Check old build system wasn't used in source dir")
|
||||
else()
|
||||
add_custom_target("${target_name}"
|
||||
COMMAND "${CMAKE_COMMAND}" -DFILE="${CMAKE_SOURCE_DIR}/versions.h"
|
||||
-P "${CMAKE_SOURCE_DIR}/cmake/AssertFileDoesntExist.cmake"
|
||||
COMMENT "Check old build system wasn't used in source dir")
|
||||
endif()
|
||||
|
||||
add_dependencies(${target} ${target_name})
|
||||
endfunction()
|
||||
|
||||
add_custom_target(strip_targets)
|
||||
add_dependencies(packages strip_targets)
|
||||
|
||||
function(strip_debug_symbols target)
|
||||
if (WIN32)
|
||||
if(WIN32)
|
||||
return()
|
||||
endif()
|
||||
get_target_property(target_type ${target} TYPE)
|
||||
|
@ -146,7 +119,7 @@ function(strip_debug_symbols target)
|
|||
COMMAND objcopy --verbose --only-keep-debug $<TARGET_FILE:${target}> "${out_file}.debug"
|
||||
COMMAND objcopy --verbose --add-gnu-debuglink="${out_file}.debug" "${out_file}"
|
||||
COMMENT "Copy debug symbols to ${out_name}.debug")
|
||||
add_custom_target(strip_${target} DEPENDS "${out_file}.debug")
|
||||
add_custom_target(strip_${target} DEPENDS "${out_file}.debug")
|
||||
else()
|
||||
add_custom_target(strip_${target})
|
||||
add_dependencies(strip_${target} strip_only_${target})
|
||||
|
@ -171,7 +144,7 @@ function(copy_headers)
|
|||
foreach(f IN LISTS CP_SRCS)
|
||||
is_prefix(bd "${CMAKE_CURRENT_BINARY_DIR}" "${f}")
|
||||
is_prefix(sd "${CMAKE_CURRENT_SOURCE_DIR}" "${f}")
|
||||
if (bd OR sd)
|
||||
if(bd OR sd)
|
||||
continue()
|
||||
endif()
|
||||
is_header(hdr "${f}")
|
||||
|
@ -180,7 +153,7 @@ function(copy_headers)
|
|||
endif()
|
||||
get_filename_component(fname ${f} NAME)
|
||||
get_filename_component(dname ${f} DIRECTORY)
|
||||
if (dname)
|
||||
if(dname)
|
||||
make_directory(${incl_dir}/${dname})
|
||||
endif()
|
||||
set(fpath "${incl_dir}/${dname}/${fname}")
|
||||
|
@ -309,9 +282,6 @@ function(add_flow_target)
|
|||
|
||||
add_custom_target(${AFT_NAME}_actors DEPENDS ${generated_files})
|
||||
add_dependencies(${AFT_NAME} ${AFT_NAME}_actors)
|
||||
if(NOT WIN32)
|
||||
assert_no_version_h(${AFT_NAME}_actors)
|
||||
endif()
|
||||
generate_coverage_xml(${AFT_NAME})
|
||||
if(strip_target)
|
||||
strip_debug_symbols(${AFT_NAME})
|
||||
|
|
|
@ -8,40 +8,43 @@ endif()
|
|||
|
||||
include(ExternalProject)
|
||||
ExternalProject_Add(awssdk_project
|
||||
GIT_REPOSITORY https://github.com/aws/aws-sdk-cpp.git
|
||||
GIT_TAG e4b4b310d8631bc7e9a797b6ac03a73c6f210bf6 # v1.9.331
|
||||
SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/awssdk-src"
|
||||
BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/awssdk-build"
|
||||
GIT_CONFIG advice.detachedHead=false
|
||||
CMAKE_ARGS -DBUILD_SHARED_LIBS=OFF # SDK builds shared libs by default, we want static libs
|
||||
-DENABLE_TESTING=OFF
|
||||
-DBUILD_ONLY=core # git repo contains SDK for every AWS product, we only want the core auth libraries
|
||||
-DSIMPLE_INSTALL=ON
|
||||
-DCMAKE_INSTALL_PREFIX=install # need to specify an install prefix so it doesn't install in /usr/lib - FIXME: use absolute path
|
||||
-DBYO_CRYPTO=ON # we have our own crypto libraries that conflict if we let aws sdk build and link its own
|
||||
-DBUILD_CURL=ON
|
||||
-DBUILD_ZLIB=ON
|
||||
|
||||
-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
|
||||
-DCMAKE_CXX_FLAGS=${AWSSDK_COMPILER_FLAGS}
|
||||
TEST_COMMAND ""
|
||||
GIT_REPOSITORY https://github.com/aws/aws-sdk-cpp.git
|
||||
GIT_TAG e4b4b310d8631bc7e9a797b6ac03a73c6f210bf6 # v1.9.331
|
||||
SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/awssdk-src"
|
||||
BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/awssdk-build"
|
||||
GIT_CONFIG advice.detachedHead=false
|
||||
# it seems advice.detachedHead breaks something which causes aws sdk to always be rebuilt.
|
||||
# This option forces to cmake to build the aws sdk only once and never attempt to update it
|
||||
UPDATE_DISCONNECTED ON
|
||||
CMAKE_ARGS -DBUILD_SHARED_LIBS=OFF # SDK builds shared libs by default, we want static libs
|
||||
-DENABLE_TESTING=OFF
|
||||
-DBUILD_ONLY=core # git repo contains SDK for every AWS product, we only want the core auth libraries
|
||||
-DSIMPLE_INSTALL=ON
|
||||
-DCMAKE_INSTALL_PREFIX=install # need to specify an install prefix so it doesn't install in /usr/lib - FIXME: use absolute path
|
||||
-DBYO_CRYPTO=ON # we have our own crypto libraries that conflict if we let aws sdk build and link its own
|
||||
-DBUILD_CURL=ON
|
||||
-DBUILD_ZLIB=ON
|
||||
|
||||
-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
|
||||
-DCMAKE_CXX_FLAGS=${AWSSDK_COMPILER_FLAGS}
|
||||
TEST_COMMAND ""
|
||||
# the sdk build produces a ton of artifacts, with their own dependency tree, so there is a very specific dependency order they must be linked in
|
||||
BUILD_BYPRODUCTS "${CMAKE_CURRENT_BINARY_DIR}/awssdk-build/install/lib64/libaws-cpp-sdk-core.a"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/awssdk-build/install/lib64/libaws-crt-cpp.a"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/awssdk-build/install/lib64/libaws-c-s3.a"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/awssdk-build/install/lib64/libaws-c-auth.a"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/awssdk-build/install/lib64/libaws-c-event-stream.a"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/awssdk-build/install/lib64/libaws-c-http.a"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/awssdk-build/install/lib64/libaws-c-mqtt.a"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/awssdk-build/install/lib64/libaws-c-sdkutils.a"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/awssdk-build/install/lib64/libaws-c-io.a"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/awssdk-build/install/lib64/libaws-checksums.a"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/awssdk-build/install/lib64/libaws-c-compression.a"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/awssdk-build/install/lib64/libaws-c-cal.a"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/awssdk-build/install/lib64/libaws-c-common.a"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/awssdk-build/install/external-install/curl/lib/libcurl.a"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/awssdk-build/install/external-install/zlib/lib/libz.a"
|
||||
)
|
||||
BUILD_BYPRODUCTS "${CMAKE_CURRENT_BINARY_DIR}/awssdk-build/install/lib64/libaws-cpp-sdk-core.a"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/awssdk-build/install/lib64/libaws-crt-cpp.a"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/awssdk-build/install/lib64/libaws-c-s3.a"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/awssdk-build/install/lib64/libaws-c-auth.a"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/awssdk-build/install/lib64/libaws-c-event-stream.a"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/awssdk-build/install/lib64/libaws-c-http.a"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/awssdk-build/install/lib64/libaws-c-mqtt.a"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/awssdk-build/install/lib64/libaws-c-sdkutils.a"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/awssdk-build/install/lib64/libaws-c-io.a"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/awssdk-build/install/lib64/libaws-checksums.a"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/awssdk-build/install/lib64/libaws-c-compression.a"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/awssdk-build/install/lib64/libaws-c-cal.a"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/awssdk-build/install/lib64/libaws-c-common.a"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/awssdk-build/install/external-install/curl/lib/libcurl.a"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/awssdk-build/install/external-install/zlib/lib/libz.a"
|
||||
)
|
||||
|
||||
add_library(awssdk_core STATIC IMPORTED)
|
||||
add_dependencies(awssdk_core awssdk_project)
|
||||
|
|
|
@ -159,13 +159,20 @@ class Parser:
|
|||
pass
|
||||
|
||||
|
||||
class XmlParser(Parser, xml.sax.handler.ContentHandler):
|
||||
class XmlParser(Parser, xml.sax.handler.ContentHandler, xml.sax.handler.ErrorHandler):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.handler: ParseHandler | None = None
|
||||
|
||||
def parse(self, file: TextIO, handler: ParseHandler) -> None:
|
||||
xml.sax.parse(file, self)
|
||||
self.handler = handler
|
||||
xml.sax.parse(file, self, errorHandler=self)
|
||||
|
||||
def error(self, exception):
|
||||
pass
|
||||
|
||||
def fatalError(self, exception):
|
||||
pass
|
||||
|
||||
def startElement(self, name, attrs) -> None:
|
||||
attributes: Dict[str, str] = {}
|
||||
|
@ -276,6 +283,7 @@ class TraceFiles:
|
|||
raise StopIteration
|
||||
self.current += 1
|
||||
return self.trace_files[self.current - 1]
|
||||
|
||||
return TraceFilesIterator(self)
|
||||
|
||||
|
||||
|
@ -426,7 +434,8 @@ class Summary:
|
|||
lines = self.error_out.splitlines()
|
||||
stderr_bytes = 0
|
||||
for line in lines:
|
||||
if line.endswith("WARNING: ASan doesn't fully support makecontext/swapcontext functions and may produce false positives in some cases!"):
|
||||
if line.endswith(
|
||||
"WARNING: ASan doesn't fully support makecontext/swapcontext functions and may produce false positives in some cases!"):
|
||||
# When running ASAN we expect to see this message. Boost coroutine should be using the correct asan annotations so that it shouldn't produce any false positives.
|
||||
continue
|
||||
if line.endswith("Warning: unimplemented fcntl command: 1036"):
|
||||
|
@ -560,6 +569,9 @@ class Summary:
|
|||
self.handler.add_handler(('Severity', '30'), parse_warning)
|
||||
|
||||
def parse_error(attrs: Dict[str, str]):
|
||||
if 'ErrorIsInjectedFault' in attrs and attrs['ErrorIsInjectedFault'].lower() in ['1', 'true']:
|
||||
# ignore injected errors. In newer fdb versions these will have a lower severity
|
||||
return
|
||||
self.errors += 1
|
||||
self.error = True
|
||||
if self.errors > config.max_errors:
|
||||
|
@ -606,6 +618,7 @@ class Summary:
|
|||
child.attributes['File'] = attrs['File']
|
||||
child.attributes['Line'] = attrs['Line']
|
||||
self.out.append(child)
|
||||
|
||||
self.handler.add_handler(('Type', 'BuggifySection'), buggify_section)
|
||||
self.handler.add_handler(('Type', 'FaultInjected'), buggify_section)
|
||||
|
||||
|
@ -614,9 +627,11 @@ class Summary:
|
|||
child.attributes['Name'] = attrs['Name']
|
||||
child.attributes['File'] = attrs['File']
|
||||
child.attributes['Line'] = attrs['Line']
|
||||
|
||||
self.handler.add_handler(('Type', 'RunningUnitTest'), running_unit_test)
|
||||
|
||||
def stderr_severity(attrs: Dict[str, str]):
|
||||
if 'NewSeverity' in attrs:
|
||||
self.stderr_severity = attrs['NewSeverity']
|
||||
|
||||
self.handler.add_handler(('Type', 'StderrSeverity'), stderr_severity)
|
||||
|
|
|
@ -2365,6 +2365,7 @@ ACTOR Future<Void> runRestore(Database db,
|
|||
KeyRef(addPrefix),
|
||||
KeyRef(removePrefix),
|
||||
LockDB::True,
|
||||
UnlockDB::True,
|
||||
onlyApplyMutationLogs,
|
||||
inconsistentSnapshotOnly,
|
||||
beginVersion,
|
||||
|
|
|
@ -83,7 +83,7 @@ BlobCipherMetrics::BlobCipherMetrics()
|
|||
CounterSet(cc, "Backup"),
|
||||
CounterSet(cc, "Test") }) {
|
||||
specialCounter(cc, "CacheSize", []() { return BlobCipherKeyCache::getInstance()->getSize(); });
|
||||
traceFuture = traceCounters("BlobCipherMetrics", UID(), FLOW_KNOBS->ENCRYPT_KEY_CACHE_LOGGING_INTERVAL, &cc);
|
||||
traceFuture = cc.traceCounters("BlobCipherMetrics", UID(), FLOW_KNOBS->ENCRYPT_KEY_CACHE_LOGGING_INTERVAL);
|
||||
}
|
||||
|
||||
std::string toString(BlobCipherMetrics::UsageType type) {
|
||||
|
|
|
@ -142,7 +142,6 @@ bool isRangeFullyCovered(KeyRange range, Standalone<VectorRef<BlobGranuleChunkRe
|
|||
for (const BlobGranuleChunkRef& chunk : blobChunks) {
|
||||
blobRanges.push_back(chunk.keyRange);
|
||||
}
|
||||
|
||||
return range.isCovered(blobRanges);
|
||||
}
|
||||
|
||||
|
@ -194,7 +193,7 @@ TEST_CASE("/fdbserver/blobgranule/isRangeCoveredByBlob") {
|
|||
testAddChunkRange("key_a1"_sr, "key_a9"_sr, continuedChunks);
|
||||
testAddChunkRange("key_a9"_sr, "key_b1"_sr, continuedChunks);
|
||||
testAddChunkRange("key_b1"_sr, "key_b9"_sr, continuedChunks);
|
||||
ASSERT(isRangeFullyCovered(KeyRangeRef("key_a1"_sr, "key_b9"_sr), continuedChunks) == false);
|
||||
ASSERT(isRangeFullyCovered(KeyRangeRef("key_a1"_sr, "key_b9"_sr), continuedChunks));
|
||||
}
|
||||
return Void();
|
||||
}
|
||||
|
|
|
@ -272,6 +272,7 @@ void ClientKnobs::initialize(Randomize randomize) {
|
|||
init( TAG_THROTTLE_EXPIRATION_INTERVAL, 60.0 ); if( randomize && BUGGIFY ) TAG_THROTTLE_EXPIRATION_INTERVAL = 1.0;
|
||||
init( WRITE_COST_BYTE_FACTOR, 16384 ); if( randomize && BUGGIFY ) WRITE_COST_BYTE_FACTOR = 4096;
|
||||
init( READ_COST_BYTE_FACTOR, 16384 ); if( randomize && BUGGIFY ) READ_COST_BYTE_FACTOR = 4096;
|
||||
init( PROXY_MAX_TAG_THROTTLE_DURATION, 5.0 ); if( randomize && BUGGIFY ) PROXY_MAX_TAG_THROTTLE_DURATION = 0.5;
|
||||
|
||||
// busyness reporting
|
||||
init( BUSYNESS_SPIKE_START_THRESHOLD, 0.100 );
|
||||
|
|
|
@ -22,6 +22,16 @@
|
|||
#include "fdbclient/Knobs.h"
|
||||
#include "fdbclient/NativeAPI.actor.h"
|
||||
|
||||
KeyRangeRef toPrefixRelativeRange(KeyRangeRef range, KeyRef prefix) {
|
||||
if (prefix.empty()) {
|
||||
return range;
|
||||
} else {
|
||||
KeyRef begin = range.begin.startsWith(prefix) ? range.begin.removePrefix(prefix) : allKeys.begin;
|
||||
KeyRef end = range.end.startsWith(prefix) ? range.end.removePrefix(prefix) : allKeys.end;
|
||||
return KeyRangeRef(begin, end);
|
||||
}
|
||||
}
|
||||
|
||||
KeyRef keyBetween(const KeyRangeRef& keys) {
|
||||
int pos = 0; // will be the position of the first difference between keys.begin and keys.end
|
||||
int minSize = std::min(keys.begin.size(), keys.end.size());
|
||||
|
|
|
@ -167,6 +167,7 @@ public:
|
|||
KeyBackedProperty<Key> removePrefix() { return configSpace.pack(__FUNCTION__sr); }
|
||||
KeyBackedProperty<bool> onlyApplyMutationLogs() { return configSpace.pack(__FUNCTION__sr); }
|
||||
KeyBackedProperty<bool> inconsistentSnapshotOnly() { return configSpace.pack(__FUNCTION__sr); }
|
||||
KeyBackedProperty<bool> unlockDBAfterRestore() { return configSpace.pack(__FUNCTION__sr); }
|
||||
// XXX: Remove restoreRange() once it is safe to remove. It has been changed to restoreRanges
|
||||
KeyBackedProperty<KeyRange> restoreRange() { return configSpace.pack(__FUNCTION__sr); }
|
||||
KeyBackedProperty<std::vector<KeyRange>> restoreRanges() { return configSpace.pack(__FUNCTION__sr); }
|
||||
|
@ -591,12 +592,11 @@ struct EncryptedRangeFileWriter : public IRangeFileWriter {
|
|||
}
|
||||
|
||||
ACTOR static Future<StringRef> decryptImpl(Database cx,
|
||||
StringRef headerS,
|
||||
BlobCipherEncryptHeader header,
|
||||
const uint8_t* dataP,
|
||||
int64_t dataLen,
|
||||
Arena* arena) {
|
||||
Reference<AsyncVar<ClientDBInfo> const> dbInfo = cx->clientInfo;
|
||||
state BlobCipherEncryptHeader header = BlobCipherEncryptHeader::fromStringRef(headerS);
|
||||
TextAndHeaderCipherKeys cipherKeys = wait(getEncryptCipherKeys(dbInfo, header, BlobCipherMetrics::BACKUP));
|
||||
ASSERT(cipherKeys.cipherHeaderKey.isValid() && cipherKeys.cipherTextKey.isValid());
|
||||
validateEncryptionHeader(cipherKeys.cipherHeaderKey, cipherKeys.cipherTextKey, header);
|
||||
|
@ -606,7 +606,7 @@ struct EncryptedRangeFileWriter : public IRangeFileWriter {
|
|||
}
|
||||
|
||||
static Future<StringRef> decrypt(Database cx,
|
||||
StringRef headerS,
|
||||
BlobCipherEncryptHeader headerS,
|
||||
const uint8_t* dataP,
|
||||
int64_t dataLen,
|
||||
Arena* arena) {
|
||||
|
@ -651,7 +651,7 @@ struct EncryptedRangeFileWriter : public IRangeFileWriter {
|
|||
}
|
||||
|
||||
ACTOR static Future<Void> updateEncryptionKeysCtx(EncryptedRangeFileWriter* self, KeyRef key) {
|
||||
state std::pair<int64_t, TenantName> curTenantInfo = wait(getEncryptionDomainDetails(key, self));
|
||||
state std::pair<int64_t, TenantName> curTenantInfo = wait(getEncryptionDomainDetails(key, self->tenantCache));
|
||||
state Reference<AsyncVar<ClientDBInfo> const> dbInfo = self->cx->clientInfo;
|
||||
|
||||
// Get text and header cipher key
|
||||
|
@ -693,12 +693,13 @@ struct EncryptedRangeFileWriter : public IRangeFileWriter {
|
|||
|
||||
static bool isSystemKey(KeyRef key) { return key.size() && key[0] == systemKeys.begin[0]; }
|
||||
|
||||
ACTOR static Future<std::pair<int64_t, TenantName>>
|
||||
getEncryptionDomainDetailsImpl(KeyRef key, Reference<TenantEntryCache<Void>> tenantCache, bool useTenantCache) {
|
||||
ACTOR static Future<std::pair<int64_t, TenantName>> getEncryptionDomainDetailsImpl(
|
||||
KeyRef key,
|
||||
Reference<TenantEntryCache<Void>> tenantCache) {
|
||||
if (isSystemKey(key)) {
|
||||
return std::make_pair(SYSTEM_KEYSPACE_ENCRYPT_DOMAIN_ID, FDB_SYSTEM_KEYSPACE_ENCRYPT_DOMAIN_NAME);
|
||||
}
|
||||
if (key.size() < TENANT_PREFIX_SIZE || !useTenantCache) {
|
||||
if (key.size() < TENANT_PREFIX_SIZE) {
|
||||
return std::make_pair(FDB_DEFAULT_ENCRYPT_DOMAIN_ID, FDB_DEFAULT_ENCRYPT_DOMAIN_NAME);
|
||||
}
|
||||
KeyRef tenantPrefix = KeyRef(key.begin(), TENANT_PREFIX_SIZE);
|
||||
|
@ -710,21 +711,10 @@ struct EncryptedRangeFileWriter : public IRangeFileWriter {
|
|||
return std::make_pair(FDB_DEFAULT_ENCRYPT_DOMAIN_ID, FDB_DEFAULT_ENCRYPT_DOMAIN_NAME);
|
||||
}
|
||||
|
||||
static Future<std::pair<int64_t, TenantName>> getEncryptionDomainDetails(KeyRef key,
|
||||
EncryptedRangeFileWriter* self) {
|
||||
// If tenants are disabled on a cluster then don't use the TenantEntryCache as it will result in alot of
|
||||
// unnecessary cache misses. For a cluster configured in TenantMode::Optional, the backup performance may
|
||||
// degrade if most of the mutations belong to an invalid tenant
|
||||
TenantMode mode = self->cx->clientInfo->get().tenantMode;
|
||||
bool useTenantCache = mode != TenantMode::DISABLED;
|
||||
if (g_network->isSimulated() && mode == TenantMode::OPTIONAL_TENANT) {
|
||||
// TODO: Currently simulation tests run with optional tenant mode but most data does not belong to any
|
||||
// tenant. This results in many timeouts so disable using the tenant cache until optional tenant mode
|
||||
// support with backups is more performant
|
||||
useTenantCache = false;
|
||||
}
|
||||
CODE_PROBE(useTenantCache, "using tenant cache");
|
||||
return getEncryptionDomainDetailsImpl(key, self->tenantCache, useTenantCache);
|
||||
static Future<std::pair<int64_t, TenantName>> getEncryptionDomainDetails(
|
||||
KeyRef key,
|
||||
Reference<TenantEntryCache<Void>> tenantCache) {
|
||||
return getEncryptionDomainDetailsImpl(key, tenantCache);
|
||||
}
|
||||
|
||||
// Handles the first block and internal blocks. Ends current block if needed.
|
||||
|
@ -816,6 +806,7 @@ struct EncryptedRangeFileWriter : public IRangeFileWriter {
|
|||
curKeyTenantInfo.first != FDB_DEFAULT_ENCRYPT_DOMAIN_ID) {
|
||||
endKey = StringRef(k.begin(), TENANT_PREFIX_SIZE);
|
||||
}
|
||||
|
||||
state ValueRef newValue = StringRef();
|
||||
self->lastKey = k;
|
||||
self->lastValue = v;
|
||||
|
@ -834,9 +825,9 @@ struct EncryptedRangeFileWriter : public IRangeFileWriter {
|
|||
if (self->lastKey.size() == 0 || k.size() == 0) {
|
||||
return false;
|
||||
}
|
||||
state std::pair<int64_t, TenantName> curKeyTenantInfo = wait(getEncryptionDomainDetails(k, self));
|
||||
state std::pair<int64_t, TenantName> prevKeyTenantInfo = wait(getEncryptionDomainDetails(self->lastKey, self));
|
||||
// crossing tenant boundaries so finish the current block using only the tenant prefix of the new key
|
||||
state std::pair<int64_t, TenantName> curKeyTenantInfo = wait(getEncryptionDomainDetails(k, self->tenantCache));
|
||||
state std::pair<int64_t, TenantName> prevKeyTenantInfo =
|
||||
wait(getEncryptionDomainDetails(self->lastKey, self->tenantCache));
|
||||
if (curKeyTenantInfo.first != prevKeyTenantInfo.first) {
|
||||
CODE_PROBE(true, "crossed tenant boundaries");
|
||||
wait(handleTenantBondary(self, k, v, writeValue, curKeyTenantInfo));
|
||||
|
@ -1040,11 +1031,18 @@ private:
|
|||
Key lastValue;
|
||||
};
|
||||
|
||||
void decodeKVPairs(StringRefReader* reader, Standalone<VectorRef<KeyValueRef>>* results) {
|
||||
ACTOR static Future<Void> decodeKVPairs(StringRefReader* reader,
|
||||
Standalone<VectorRef<KeyValueRef>>* results,
|
||||
bool encryptedBlock,
|
||||
Optional<Reference<TenantEntryCache<Void>>> tenantCache,
|
||||
Optional<BlobCipherEncryptHeader> encryptHeader) {
|
||||
// Read begin key, if this fails then block was invalid.
|
||||
uint32_t kLen = reader->consumeNetworkUInt32();
|
||||
const uint8_t* k = reader->consume(kLen);
|
||||
state uint32_t kLen = reader->consumeNetworkUInt32();
|
||||
state const uint8_t* k = reader->consume(kLen);
|
||||
results->push_back(results->arena(), KeyValueRef(KeyRef(k, kLen), ValueRef()));
|
||||
state KeyRef prevKey = KeyRef(k, kLen);
|
||||
state bool done = false;
|
||||
state Optional<std::pair<int64_t, TenantName>> prevTenantInfo;
|
||||
|
||||
// Read kv pairs and end key
|
||||
while (1) {
|
||||
|
@ -1052,6 +1050,35 @@ void decodeKVPairs(StringRefReader* reader, Standalone<VectorRef<KeyValueRef>>*
|
|||
kLen = reader->consumeNetworkUInt32();
|
||||
k = reader->consume(kLen);
|
||||
|
||||
// make sure that all keys in a block belong to exactly one tenant,
|
||||
// unless its the last key in which case it can be a truncated (different) tenant prefix
|
||||
if (encryptedBlock && g_network && g_network->isSimulated()) {
|
||||
ASSERT(tenantCache.present());
|
||||
ASSERT(encryptHeader.present());
|
||||
state KeyRef curKey = KeyRef(k, kLen);
|
||||
if (!prevTenantInfo.present()) {
|
||||
std::pair<int64_t, TenantName> tenantInfo =
|
||||
wait(EncryptedRangeFileWriter::getEncryptionDomainDetails(prevKey, tenantCache.get()));
|
||||
prevTenantInfo = tenantInfo;
|
||||
}
|
||||
std::pair<int64_t, TenantName> curTenantInfo =
|
||||
wait(EncryptedRangeFileWriter::getEncryptionDomainDetails(curKey, tenantCache.get()));
|
||||
if (!curKey.empty() && !prevKey.empty() && prevTenantInfo.get().first != curTenantInfo.first) {
|
||||
ASSERT(!done);
|
||||
if (curTenantInfo.first != SYSTEM_KEYSPACE_ENCRYPT_DOMAIN_ID &&
|
||||
curTenantInfo.first != FDB_DEFAULT_ENCRYPT_DOMAIN_ID) {
|
||||
ASSERT(curKey.size() == TENANT_PREFIX_SIZE);
|
||||
}
|
||||
done = true;
|
||||
}
|
||||
// make sure that all keys (except possibly the last key) in a block are encrypted using the correct key
|
||||
if (!prevKey.empty()) {
|
||||
ASSERT(prevTenantInfo.get().first == encryptHeader.get().cipherTextDetails.encryptDomainId);
|
||||
}
|
||||
prevKey = curKey;
|
||||
prevTenantInfo = curTenantInfo;
|
||||
}
|
||||
|
||||
// If eof reached or first value len byte is 0xFF then a valid block end was reached.
|
||||
if (reader->eof() || *reader->rptr == 0xFF) {
|
||||
results->push_back(results->arena(), KeyValueRef(KeyRef(k, kLen), ValueRef()));
|
||||
|
@ -1072,6 +1099,8 @@ void decodeKVPairs(StringRefReader* reader, Standalone<VectorRef<KeyValueRef>>*
|
|||
for (auto b : reader->remainder())
|
||||
if (b != 0xFF)
|
||||
throw restore_corrupted_data_padding();
|
||||
|
||||
return Void();
|
||||
}
|
||||
|
||||
ACTOR Future<Standalone<VectorRef<KeyValueRef>>> decodeRangeFileBlock(Reference<IAsyncFile> file,
|
||||
|
@ -1094,7 +1123,11 @@ ACTOR Future<Standalone<VectorRef<KeyValueRef>>> decodeRangeFileBlock(Reference<
|
|||
// BACKUP_AGENT_ENCRYPTED_SNAPSHOT_FILE_VERSION
|
||||
int32_t file_version = reader.consume<int32_t>();
|
||||
if (file_version == BACKUP_AGENT_SNAPSHOT_FILE_VERSION) {
|
||||
decodeKVPairs(&reader, &results);
|
||||
wait(decodeKVPairs(&reader,
|
||||
&results,
|
||||
false,
|
||||
Optional<Reference<TenantEntryCache<Void>>>(),
|
||||
Optional<BlobCipherEncryptHeader>()));
|
||||
} else if (file_version == BACKUP_AGENT_ENCRYPTED_SNAPSHOT_FILE_VERSION) {
|
||||
CODE_PROBE(true, "decoding encrypted block");
|
||||
ASSERT(cx.present());
|
||||
|
@ -1108,7 +1141,8 @@ ACTOR Future<Standalone<VectorRef<KeyValueRef>>> decodeRangeFileBlock(Reference<
|
|||
|
||||
// read encryption header
|
||||
const uint8_t* headerStart = reader.consume(BlobCipherEncryptHeader::headerSize);
|
||||
StringRef header = StringRef(headerStart, BlobCipherEncryptHeader::headerSize);
|
||||
StringRef headerS = StringRef(headerStart, BlobCipherEncryptHeader::headerSize);
|
||||
state BlobCipherEncryptHeader header = BlobCipherEncryptHeader::fromStringRef(headerS);
|
||||
const uint8_t* dataPayloadStart = headerStart + BlobCipherEncryptHeader::headerSize;
|
||||
// calculate the total bytes read up to (and including) the header
|
||||
int64_t bytesRead = sizeof(int32_t) + sizeof(uint32_t) + optionsLen + BlobCipherEncryptHeader::headerSize;
|
||||
|
@ -1117,7 +1151,12 @@ ACTOR Future<Standalone<VectorRef<KeyValueRef>>> decodeRangeFileBlock(Reference<
|
|||
StringRef decryptedData =
|
||||
wait(EncryptedRangeFileWriter::decrypt(cx.get(), header, dataPayloadStart, dataLen, &results.arena()));
|
||||
reader = StringRefReader(decryptedData, restore_corrupted_data());
|
||||
decodeKVPairs(&reader, &results);
|
||||
state Optional<Reference<TenantEntryCache<Void>>> tenantCache;
|
||||
if (g_network && g_simulator->isSimulated()) {
|
||||
tenantCache = makeReference<TenantEntryCache<Void>>(cx.get(), TenantEntryCacheRefreshMode::WATCH);
|
||||
wait(tenantCache.get()->init());
|
||||
}
|
||||
wait(decodeKVPairs(&reader, &results, true, tenantCache, header));
|
||||
} else {
|
||||
throw restore_unsupported_file_version();
|
||||
}
|
||||
|
@ -3398,6 +3437,8 @@ struct RestoreCompleteTaskFunc : RestoreTaskFuncBase {
|
|||
|
||||
state RestoreConfig restore(task);
|
||||
restore.stateEnum().set(tr, ERestoreState::COMPLETED);
|
||||
state bool unlockDB = wait(restore.unlockDBAfterRestore().getD(tr, Snapshot::False, true));
|
||||
|
||||
tr->atomicOp(metadataVersionKey, metadataVersionRequiredValue, MutationRef::SetVersionstampedValue);
|
||||
// Clear the file map now since it could be huge.
|
||||
restore.fileSet().clear(tr);
|
||||
|
@ -3413,7 +3454,9 @@ struct RestoreCompleteTaskFunc : RestoreTaskFuncBase {
|
|||
restore.clearApplyMutationsKeys(tr);
|
||||
|
||||
wait(taskBucket->finish(tr, task));
|
||||
wait(unlockDatabase(tr, restore.getUid()));
|
||||
if (unlockDB) {
|
||||
wait(unlockDatabase(tr, restore.getUid()));
|
||||
}
|
||||
|
||||
return Void();
|
||||
}
|
||||
|
@ -5172,6 +5215,7 @@ public:
|
|||
Key addPrefix,
|
||||
Key removePrefix,
|
||||
LockDB lockDB,
|
||||
UnlockDB unlockDB,
|
||||
OnlyApplyMutationLogs onlyApplyMutationLogs,
|
||||
InconsistentSnapshotOnly inconsistentSnapshotOnly,
|
||||
Version beginVersion,
|
||||
|
@ -5245,6 +5289,7 @@ public:
|
|||
restore.onlyApplyMutationLogs().set(tr, onlyApplyMutationLogs);
|
||||
restore.inconsistentSnapshotOnly().set(tr, inconsistentSnapshotOnly);
|
||||
restore.beginVersion().set(tr, beginVersion);
|
||||
restore.unlockDBAfterRestore().set(tr, unlockDB);
|
||||
if (BUGGIFY && restoreRanges.size() == 1) {
|
||||
restore.restoreRange().set(tr, restoreRanges[0]);
|
||||
} else {
|
||||
|
@ -5836,6 +5881,7 @@ public:
|
|||
Key addPrefix,
|
||||
Key removePrefix,
|
||||
LockDB lockDB,
|
||||
UnlockDB unlockDB,
|
||||
OnlyApplyMutationLogs onlyApplyMutationLogs,
|
||||
InconsistentSnapshotOnly inconsistentSnapshotOnly,
|
||||
Version beginVersion,
|
||||
|
@ -5892,6 +5938,7 @@ public:
|
|||
addPrefix,
|
||||
removePrefix,
|
||||
lockDB,
|
||||
unlockDB,
|
||||
onlyApplyMutationLogs,
|
||||
inconsistentSnapshotOnly,
|
||||
beginVersion,
|
||||
|
@ -6017,7 +6064,7 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
Reference<IBackupContainer> bc = wait(backupConfig.backupContainer().getOrThrow(cx.getReference()));
|
||||
state Reference<IBackupContainer> bc = wait(backupConfig.backupContainer().getOrThrow(cx.getReference()));
|
||||
|
||||
if (fastRestore) {
|
||||
TraceEvent("AtomicParallelRestoreStartRestore").log();
|
||||
|
@ -6043,24 +6090,80 @@ public:
|
|||
return -1;
|
||||
} else {
|
||||
TraceEvent("AS_StartRestore").log();
|
||||
Version ver = wait(restore(backupAgent,
|
||||
cx,
|
||||
cx,
|
||||
tagName,
|
||||
KeyRef(bc->getURL()),
|
||||
bc->getProxy(),
|
||||
ranges,
|
||||
WaitForComplete::True,
|
||||
::invalidVersion,
|
||||
Verbose::True,
|
||||
addPrefix,
|
||||
removePrefix,
|
||||
LockDB::True,
|
||||
OnlyApplyMutationLogs::False,
|
||||
InconsistentSnapshotOnly::False,
|
||||
::invalidVersion,
|
||||
{},
|
||||
randomUid));
|
||||
state Standalone<VectorRef<KeyRangeRef>> restoreRange;
|
||||
state Standalone<VectorRef<KeyRangeRef>> systemRestoreRange;
|
||||
bool encryptionEnabled = cx->clientInfo->get().isEncryptionEnabled;
|
||||
for (auto r : ranges) {
|
||||
if (!encryptionEnabled || !r.intersects(getSystemBackupRanges())) {
|
||||
restoreRange.push_back_deep(restoreRange.arena(), r);
|
||||
} else {
|
||||
KeyRangeRef normalKeyRange = r & normalKeys;
|
||||
KeyRangeRef systemKeyRange = r & systemKeys;
|
||||
if (!normalKeyRange.empty()) {
|
||||
restoreRange.push_back_deep(restoreRange.arena(), normalKeyRange);
|
||||
}
|
||||
if (!systemKeyRange.empty()) {
|
||||
systemRestoreRange.push_back_deep(systemRestoreRange.arena(), systemKeyRange);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!systemRestoreRange.empty()) {
|
||||
// restore system keys
|
||||
wait(success(restore(backupAgent,
|
||||
cx,
|
||||
cx,
|
||||
"system_restore"_sr,
|
||||
KeyRef(bc->getURL()),
|
||||
bc->getProxy(),
|
||||
systemRestoreRange,
|
||||
WaitForComplete::True,
|
||||
::invalidVersion,
|
||||
Verbose::True,
|
||||
addPrefix,
|
||||
removePrefix,
|
||||
LockDB::True,
|
||||
UnlockDB::False,
|
||||
OnlyApplyMutationLogs::False,
|
||||
InconsistentSnapshotOnly::False,
|
||||
::invalidVersion,
|
||||
{},
|
||||
randomUid)));
|
||||
state Reference<ReadYourWritesTransaction> rywTransaction =
|
||||
Reference<ReadYourWritesTransaction>(new ReadYourWritesTransaction(cx));
|
||||
// clear old restore config associated with system keys
|
||||
loop {
|
||||
try {
|
||||
rywTransaction->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||
rywTransaction->setOption(FDBTransactionOptions::LOCK_AWARE);
|
||||
state RestoreConfig oldRestore(randomUid);
|
||||
oldRestore.clear(rywTransaction);
|
||||
wait(rywTransaction->commit());
|
||||
break;
|
||||
} catch (Error& e) {
|
||||
wait(rywTransaction->onError(e));
|
||||
}
|
||||
}
|
||||
}
|
||||
// restore user data
|
||||
state Version ver = wait(restore(backupAgent,
|
||||
cx,
|
||||
cx,
|
||||
tagName,
|
||||
KeyRef(bc->getURL()),
|
||||
bc->getProxy(),
|
||||
restoreRange,
|
||||
WaitForComplete::True,
|
||||
::invalidVersion,
|
||||
Verbose::True,
|
||||
addPrefix,
|
||||
removePrefix,
|
||||
LockDB::True,
|
||||
UnlockDB::True,
|
||||
OnlyApplyMutationLogs::False,
|
||||
InconsistentSnapshotOnly::False,
|
||||
::invalidVersion,
|
||||
{},
|
||||
randomUid));
|
||||
return ver;
|
||||
}
|
||||
}
|
||||
|
@ -6120,6 +6223,7 @@ Future<Version> FileBackupAgent::restore(Database cx,
|
|||
Key addPrefix,
|
||||
Key removePrefix,
|
||||
LockDB lockDB,
|
||||
UnlockDB unlockDB,
|
||||
OnlyApplyMutationLogs onlyApplyMutationLogs,
|
||||
InconsistentSnapshotOnly inconsistentSnapshotOnly,
|
||||
Version beginVersion,
|
||||
|
@ -6137,6 +6241,7 @@ Future<Version> FileBackupAgent::restore(Database cx,
|
|||
addPrefix,
|
||||
removePrefix,
|
||||
lockDB,
|
||||
unlockDB,
|
||||
onlyApplyMutationLogs,
|
||||
inconsistentSnapshotOnly,
|
||||
beginVersion,
|
||||
|
@ -6178,6 +6283,7 @@ Future<Version> FileBackupAgent::restore(Database cx,
|
|||
addPrefix,
|
||||
removePrefix,
|
||||
lockDB,
|
||||
UnlockDB::True,
|
||||
onlyApplyMutationLogs,
|
||||
inconsistentSnapshotOnly,
|
||||
beginVersion,
|
||||
|
|
|
@ -2559,19 +2559,19 @@ bool schemaMatch(json_spirit::mValue const& schemaValue,
|
|||
}
|
||||
}
|
||||
|
||||
void setStorageQuota(Transaction& tr, StringRef tenantName, uint64_t quota) {
|
||||
void setStorageQuota(Transaction& tr, StringRef tenantName, int64_t quota) {
|
||||
tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||
auto key = storageQuotaKey(tenantName);
|
||||
tr.set(key, BinaryWriter::toValue<uint64_t>(quota, Unversioned()));
|
||||
tr.set(key, BinaryWriter::toValue<int64_t>(quota, Unversioned()));
|
||||
}
|
||||
|
||||
ACTOR Future<Optional<uint64_t>> getStorageQuota(Transaction* tr, StringRef tenantName) {
|
||||
ACTOR Future<Optional<int64_t>> getStorageQuota(Transaction* tr, StringRef tenantName) {
|
||||
tr->setOption(FDBTransactionOptions::READ_SYSTEM_KEYS);
|
||||
state Optional<Value> v = wait(tr->get(storageQuotaKey(tenantName)));
|
||||
if (!v.present()) {
|
||||
return Optional<uint64_t>();
|
||||
return Optional<int64_t>();
|
||||
}
|
||||
return BinaryReader::fromStringRef<uint64_t>(v.get(), Unversioned());
|
||||
return BinaryReader::fromStringRef<int64_t>(v.get(), Unversioned());
|
||||
}
|
||||
|
||||
std::string ManagementAPI::generateErrorMessage(const CoordinatorsResult& res) {
|
||||
|
|
|
@ -1479,16 +1479,6 @@ Future<RangeResult> HealthMetricsRangeImpl::getRange(ReadYourWritesTransaction*
|
|||
return healthMetricsGetRangeActor(ryw, kr);
|
||||
}
|
||||
|
||||
KeyRangeRef toRelativeRange(KeyRangeRef range, KeyRef prefix) {
|
||||
if (prefix.empty()) {
|
||||
return range;
|
||||
} else {
|
||||
KeyRef begin = range.begin.startsWith(prefix) ? range.begin.removePrefix(prefix) : allKeys.begin;
|
||||
KeyRef end = range.end.startsWith(prefix) ? range.end.removePrefix(prefix) : allKeys.end;
|
||||
return KeyRangeRef(begin, end);
|
||||
}
|
||||
}
|
||||
|
||||
ACTOR Future<UID> getClusterId(Database db) {
|
||||
while (!db->clientInfo->get().clusterId.isValid()) {
|
||||
wait(db->clientInfo->onChange());
|
||||
|
@ -1925,7 +1915,8 @@ Optional<KeyRangeLocationInfo> DatabaseContext::getCachedLocation(const Optional
|
|||
auto range =
|
||||
isBackward ? locationCache.rangeContainingKeyBefore(resolvedKey) : locationCache.rangeContaining(resolvedKey);
|
||||
if (range->value()) {
|
||||
return KeyRangeLocationInfo(tenantEntry, toRelativeRange(range->range(), tenantEntry.prefix), range->value());
|
||||
return KeyRangeLocationInfo(
|
||||
tenantEntry, toPrefixRelativeRange(range->range(), tenantEntry.prefix), range->value());
|
||||
}
|
||||
|
||||
return Optional<KeyRangeLocationInfo>();
|
||||
|
@ -1962,7 +1953,8 @@ bool DatabaseContext::getCachedLocations(const Optional<TenantNameRef>& tenantNa
|
|||
result.clear();
|
||||
return false;
|
||||
}
|
||||
result.emplace_back(tenantEntry, toRelativeRange(r->range() & resolvedRange, tenantEntry.prefix), r->value());
|
||||
result.emplace_back(
|
||||
tenantEntry, toPrefixRelativeRange(r->range() & resolvedRange, tenantEntry.prefix), r->value());
|
||||
if (result.size() == limit || begin == end) {
|
||||
break;
|
||||
}
|
||||
|
@ -2978,7 +2970,7 @@ ACTOR Future<KeyRangeLocationInfo> getKeyLocation_internal(Database cx,
|
|||
|
||||
return KeyRangeLocationInfo(
|
||||
rep.tenantEntry,
|
||||
KeyRange(toRelativeRange(rep.results[0].first, rep.tenantEntry.prefix), rep.arena),
|
||||
KeyRange(toPrefixRelativeRange(rep.results[0].first, rep.tenantEntry.prefix), rep.arena),
|
||||
locationInfo);
|
||||
}
|
||||
}
|
||||
|
@ -3123,7 +3115,7 @@ ACTOR Future<std::vector<KeyRangeLocationInfo>> getKeyRangeLocations_internal(
|
|||
// efficient to save the map pairs and insert them all at once.
|
||||
results.emplace_back(
|
||||
rep.tenantEntry,
|
||||
(toRelativeRange(rep.results[shard].first, rep.tenantEntry.prefix) & keys),
|
||||
(toPrefixRelativeRange(rep.results[shard].first, rep.tenantEntry.prefix) & keys),
|
||||
cx->setCachedLocation(
|
||||
tenant.name, rep.tenantEntry, rep.results[shard].first, rep.results[shard].second));
|
||||
wait(yield());
|
||||
|
@ -6558,7 +6550,7 @@ ACTOR static Future<Void> tryCommit(Reference<TransactionState> trState,
|
|||
e.code() != error_code_grv_proxy_memory_limit_exceeded &&
|
||||
e.code() != error_code_batch_transaction_throttled && e.code() != error_code_tag_throttled &&
|
||||
e.code() != error_code_process_behind && e.code() != error_code_future_version &&
|
||||
e.code() != error_code_tenant_not_found) {
|
||||
e.code() != error_code_tenant_not_found && e.code() != error_code_proxy_tag_throttled) {
|
||||
TraceEvent(SevError, "TryCommitError").error(e);
|
||||
}
|
||||
if (trState->trLogInfo)
|
||||
|
@ -6999,6 +6991,8 @@ ACTOR Future<GetReadVersionReply> getConsistentReadVersion(SpanContext parentSpa
|
|||
&GrvProxyInterface::getConsistentReadVersion,
|
||||
req,
|
||||
cx->taskID))) {
|
||||
CODE_PROBE(v.proxyTagThrottledDuration > 0.0,
|
||||
"getConsistentReadVersion received GetReadVersionReply delayed by proxy tag throttling");
|
||||
if (tags.size() != 0) {
|
||||
auto& priorityThrottledTags = cx->throttledTags[priority];
|
||||
for (auto& tag : tags) {
|
||||
|
@ -7033,7 +7027,7 @@ ACTOR Future<GetReadVersionReply> getConsistentReadVersion(SpanContext parentSpa
|
|||
}
|
||||
} catch (Error& e) {
|
||||
if (e.code() != error_code_broken_promise && e.code() != error_code_batch_transaction_throttled &&
|
||||
e.code() != error_code_grv_proxy_memory_limit_exceeded)
|
||||
e.code() != error_code_grv_proxy_memory_limit_exceeded && e.code() != error_code_proxy_tag_throttled)
|
||||
TraceEvent(SevError, "GetConsistentReadVersionError").error(e);
|
||||
if (e.code() == error_code_batch_transaction_throttled && !cx->apiVersionAtLeast(630)) {
|
||||
wait(delayJittered(5.0));
|
||||
|
@ -7484,7 +7478,7 @@ Future<Void> Transaction::onError(Error const& e) {
|
|||
e.code() == error_code_database_locked || e.code() == error_code_commit_proxy_memory_limit_exceeded ||
|
||||
e.code() == error_code_grv_proxy_memory_limit_exceeded || e.code() == error_code_process_behind ||
|
||||
e.code() == error_code_batch_transaction_throttled || e.code() == error_code_tag_throttled ||
|
||||
e.code() == error_code_blob_granule_request_failed) {
|
||||
e.code() == error_code_blob_granule_request_failed || e.code() == error_code_proxy_tag_throttled) {
|
||||
if (e.code() == error_code_not_committed)
|
||||
++trState->cx->transactionsNotCommitted;
|
||||
else if (e.code() == error_code_commit_unknown_result)
|
||||
|
@ -7724,6 +7718,35 @@ ACTOR Future<Standalone<VectorRef<ReadHotRangeWithMetrics>>> getReadHotRanges(Da
|
|||
}
|
||||
}
|
||||
|
||||
ACTOR Future<Optional<StorageMetrics>> waitStorageMetricsWithLocation(TenantInfo tenantInfo,
|
||||
KeyRange keys,
|
||||
std::vector<KeyRangeLocationInfo> locations,
|
||||
StorageMetrics min,
|
||||
StorageMetrics max,
|
||||
StorageMetrics permittedError) {
|
||||
try {
|
||||
Future<StorageMetrics> fx;
|
||||
if (locations.size() > 1) {
|
||||
fx = waitStorageMetricsMultipleLocations(tenantInfo, locations, min, max, permittedError);
|
||||
} else {
|
||||
WaitMetricsRequest req(tenantInfo, keys, min, max);
|
||||
fx = loadBalance(locations[0].locations->locations(),
|
||||
&StorageServerInterface::waitMetrics,
|
||||
req,
|
||||
TaskPriority::DataDistribution);
|
||||
}
|
||||
StorageMetrics x = wait(fx);
|
||||
return x;
|
||||
} catch (Error& e) {
|
||||
TraceEvent(SevDebug, "WaitStorageMetricsError").error(e);
|
||||
if (e.code() != error_code_wrong_shard_server && e.code() != error_code_all_alternatives_failed) {
|
||||
TraceEvent(SevError, "WaitStorageMetricsError").error(e);
|
||||
throw;
|
||||
}
|
||||
}
|
||||
return Optional<StorageMetrics>();
|
||||
}
|
||||
|
||||
ACTOR Future<std::pair<Optional<StorageMetrics>, int>> waitStorageMetrics(
|
||||
Database cx,
|
||||
KeyRange keys,
|
||||
|
@ -7753,38 +7776,26 @@ ACTOR Future<std::pair<Optional<StorageMetrics>, int>> waitStorageMetrics(
|
|||
}
|
||||
|
||||
// SOMEDAY: Right now, if there are too many shards we delay and check again later. There may be a better
|
||||
// solution to this.
|
||||
if (locations.size() < shardLimit) {
|
||||
try {
|
||||
Future<StorageMetrics> fx;
|
||||
if (locations.size() > 1) {
|
||||
fx = waitStorageMetricsMultipleLocations(tenantInfo, locations, min, max, permittedError);
|
||||
} else {
|
||||
WaitMetricsRequest req(tenantInfo, keys, min, max);
|
||||
fx = loadBalance(locations[0].locations->locations(),
|
||||
&StorageServerInterface::waitMetrics,
|
||||
req,
|
||||
TaskPriority::DataDistribution);
|
||||
}
|
||||
StorageMetrics x = wait(fx);
|
||||
return std::make_pair(x, -1);
|
||||
} catch (Error& e) {
|
||||
if (e.code() != error_code_wrong_shard_server && e.code() != error_code_all_alternatives_failed) {
|
||||
TraceEvent(SevError, "WaitStorageMetricsError").error(e);
|
||||
throw;
|
||||
}
|
||||
cx->invalidateCache(locations[0].tenantEntry.prefix, keys);
|
||||
wait(delay(CLIENT_KNOBS->WRONG_SHARD_SERVER_DELAY, TaskPriority::DataDistribution));
|
||||
}
|
||||
} else {
|
||||
// solution to this. How could this happen?
|
||||
if (locations.size() >= shardLimit) {
|
||||
TraceEvent(SevWarn, "WaitStorageMetricsPenalty")
|
||||
.detail("Keys", keys)
|
||||
.detail("Limit", CLIENT_KNOBS->STORAGE_METRICS_SHARD_LIMIT)
|
||||
.detail("Limit", shardLimit)
|
||||
.detail("LocationSize", locations.size())
|
||||
.detail("JitteredSecondsOfPenitence", CLIENT_KNOBS->STORAGE_METRICS_TOO_MANY_SHARDS_DELAY);
|
||||
wait(delayJittered(CLIENT_KNOBS->STORAGE_METRICS_TOO_MANY_SHARDS_DELAY, TaskPriority::DataDistribution));
|
||||
// make sure that the next getKeyRangeLocations() call will actually re-fetch the range
|
||||
cx->invalidateCache(locations[0].tenantEntry.prefix, keys);
|
||||
continue;
|
||||
}
|
||||
|
||||
Optional<StorageMetrics> res =
|
||||
wait(waitStorageMetricsWithLocation(tenantInfo, keys, locations, min, max, permittedError));
|
||||
if (res.present()) {
|
||||
return std::make_pair(res, -1);
|
||||
}
|
||||
cx->invalidateCache(locations[0].tenantEntry.prefix, keys);
|
||||
wait(delay(CLIENT_KNOBS->WRONG_SHARD_SERVER_DELAY, TaskPriority::DataDistribution));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -8645,6 +8656,56 @@ Future<Void> DatabaseContext::splitStorageMetricsStream(const PromiseStream<Key>
|
|||
resultStream, Database(Reference<DatabaseContext>::addRef(this)), keys, limit, estimated, minSplitBytes);
|
||||
}
|
||||
|
||||
ACTOR Future<Optional<Standalone<VectorRef<KeyRef>>>> splitStorageMetricsWithLocations(
|
||||
std::vector<KeyRangeLocationInfo> locations,
|
||||
KeyRange keys,
|
||||
StorageMetrics limit,
|
||||
StorageMetrics estimated,
|
||||
Optional<int> minSplitBytes) {
|
||||
state StorageMetrics used;
|
||||
state Standalone<VectorRef<KeyRef>> results;
|
||||
results.push_back_deep(results.arena(), keys.begin);
|
||||
//TraceEvent("SplitStorageMetrics").detail("Locations", locations.size());
|
||||
try {
|
||||
state int i = 0;
|
||||
for (; i < locations.size(); i++) {
|
||||
SplitMetricsRequest req(
|
||||
locations[i].range, limit, used, estimated, i == locations.size() - 1, minSplitBytes);
|
||||
SplitMetricsReply res = wait(loadBalance(locations[i].locations->locations(),
|
||||
&StorageServerInterface::splitMetrics,
|
||||
req,
|
||||
TaskPriority::DataDistribution));
|
||||
if (res.splits.size() && res.splits[0] <= results.back()) { // split points are out of order, possibly
|
||||
// because of moving data, throw error to retry
|
||||
ASSERT_WE_THINK(false); // FIXME: This seems impossible and doesn't seem to be covered by testing
|
||||
throw all_alternatives_failed();
|
||||
}
|
||||
if (res.splits.size()) {
|
||||
results.append(results.arena(), res.splits.begin(), res.splits.size());
|
||||
results.arena().dependsOn(res.splits.arena());
|
||||
}
|
||||
used = res.used;
|
||||
|
||||
//TraceEvent("SplitStorageMetricsResult").detail("Used", used.bytes).detail("Location", i).detail("Size", res.splits.size());
|
||||
}
|
||||
|
||||
if (used.allLessOrEqual(limit * CLIENT_KNOBS->STORAGE_METRICS_UNFAIR_SPLIT_LIMIT) && results.size() > 1) {
|
||||
results.resize(results.arena(), results.size() - 1);
|
||||
}
|
||||
|
||||
if (keys.end <= locations.back().range.end) {
|
||||
results.push_back_deep(results.arena(), keys.end);
|
||||
}
|
||||
return results;
|
||||
} catch (Error& e) {
|
||||
if (e.code() != error_code_wrong_shard_server && e.code() != error_code_all_alternatives_failed) {
|
||||
TraceEvent(SevError, "SplitStorageMetricsError").error(e);
|
||||
throw;
|
||||
}
|
||||
}
|
||||
return Optional<Standalone<VectorRef<KeyRef>>>();
|
||||
}
|
||||
|
||||
ACTOR Future<Standalone<VectorRef<KeyRef>>> splitStorageMetrics(Database cx,
|
||||
KeyRange keys,
|
||||
StorageMetrics limit,
|
||||
|
@ -8663,61 +8724,24 @@ ACTOR Future<Standalone<VectorRef<KeyRef>>> splitStorageMetrics(Database cx,
|
|||
Optional<UID>(),
|
||||
UseProvisionalProxies::False,
|
||||
latestVersion));
|
||||
state StorageMetrics used;
|
||||
state Standalone<VectorRef<KeyRef>> results;
|
||||
|
||||
// SOMEDAY: Right now, if there are too many shards we delay and check again later. There may be a better
|
||||
// solution to this.
|
||||
if (locations.size() == CLIENT_KNOBS->STORAGE_METRICS_SHARD_LIMIT) {
|
||||
wait(delay(CLIENT_KNOBS->STORAGE_METRICS_TOO_MANY_SHARDS_DELAY, TaskPriority::DataDistribution));
|
||||
cx->invalidateCache(Key(), keys);
|
||||
} else {
|
||||
results.push_back_deep(results.arena(), keys.begin);
|
||||
try {
|
||||
//TraceEvent("SplitStorageMetrics").detail("Locations", locations.size());
|
||||
|
||||
state int i = 0;
|
||||
for (; i < locations.size(); i++) {
|
||||
SplitMetricsRequest req(
|
||||
locations[i].range, limit, used, estimated, i == locations.size() - 1, minSplitBytes);
|
||||
SplitMetricsReply res = wait(loadBalance(locations[i].locations->locations(),
|
||||
&StorageServerInterface::splitMetrics,
|
||||
req,
|
||||
TaskPriority::DataDistribution));
|
||||
if (res.splits.size() &&
|
||||
res.splits[0] <= results.back()) { // split points are out of order, possibly because of
|
||||
// moving data, throw error to retry
|
||||
ASSERT_WE_THINK(
|
||||
false); // FIXME: This seems impossible and doesn't seem to be covered by testing
|
||||
throw all_alternatives_failed();
|
||||
}
|
||||
if (res.splits.size()) {
|
||||
results.append(results.arena(), res.splits.begin(), res.splits.size());
|
||||
results.arena().dependsOn(res.splits.arena());
|
||||
}
|
||||
used = res.used;
|
||||
|
||||
//TraceEvent("SplitStorageMetricsResult").detail("Used", used.bytes).detail("Location", i).detail("Size", res.splits.size());
|
||||
}
|
||||
|
||||
if (used.allLessOrEqual(limit * CLIENT_KNOBS->STORAGE_METRICS_UNFAIR_SPLIT_LIMIT) &&
|
||||
results.size() > 1) {
|
||||
results.resize(results.arena(), results.size() - 1);
|
||||
}
|
||||
|
||||
if (keys.end <= locations.back().range.end) {
|
||||
results.push_back_deep(results.arena(), keys.end);
|
||||
}
|
||||
return results;
|
||||
} catch (Error& e) {
|
||||
if (e.code() != error_code_wrong_shard_server && e.code() != error_code_all_alternatives_failed) {
|
||||
TraceEvent(SevError, "SplitStorageMetricsError").error(e);
|
||||
throw;
|
||||
}
|
||||
cx->invalidateCache(Key(), keys);
|
||||
wait(delay(CLIENT_KNOBS->WRONG_SHARD_SERVER_DELAY, TaskPriority::DataDistribution));
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
Optional<Standalone<VectorRef<KeyRef>>> results =
|
||||
wait(splitStorageMetricsWithLocations(locations, keys, limit, estimated, minSplitBytes));
|
||||
|
||||
if (results.present()) {
|
||||
return results.get();
|
||||
}
|
||||
|
||||
cx->invalidateCache(Key(), keys);
|
||||
wait(delay(CLIENT_KNOBS->WRONG_SHARD_SERVER_DELAY, TaskPriority::DataDistribution));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -10540,6 +10564,76 @@ Reference<DatabaseContext::TransactionT> DatabaseContext::createTransaction() {
|
|||
}
|
||||
|
||||
// BlobGranule API.
|
||||
ACTOR Future<Standalone<VectorRef<KeyRangeRef>>> getBlobRanges(Transaction* tr, KeyRange range, int batchLimit) {
|
||||
state Standalone<VectorRef<KeyRangeRef>> blobRanges;
|
||||
state Key beginKey = range.begin;
|
||||
|
||||
loop {
|
||||
try {
|
||||
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||
|
||||
state RangeResult results = wait(
|
||||
krmGetRangesUnaligned(tr, blobRangeKeys.begin, KeyRangeRef(beginKey, range.end), 2 * batchLimit + 2));
|
||||
|
||||
blobRanges.arena().dependsOn(results.arena());
|
||||
for (int i = 0; i < results.size() - 1; i++) {
|
||||
if (results[i].value == blobRangeActive) {
|
||||
blobRanges.push_back(blobRanges.arena(), KeyRangeRef(results[i].key, results[i + 1].key));
|
||||
}
|
||||
if (blobRanges.size() == batchLimit) {
|
||||
return blobRanges;
|
||||
}
|
||||
}
|
||||
|
||||
if (!results.more) {
|
||||
return blobRanges;
|
||||
}
|
||||
beginKey = results.back().key;
|
||||
} catch (Error& e) {
|
||||
wait(tr->onError(e));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ACTOR Future<Standalone<VectorRef<KeyRangeRef>>> getBlobbifiedRanges(Transaction* tr,
|
||||
KeyRange range,
|
||||
int rangeLimit,
|
||||
Optional<TenantName> tenantName) {
|
||||
state TenantMapEntry tme;
|
||||
|
||||
loop {
|
||||
try {
|
||||
if (tenantName.present()) {
|
||||
wait(store(tme, blobGranuleGetTenantEntry(tr, range.begin, tenantName)));
|
||||
range = range.withPrefix(tme.prefix);
|
||||
}
|
||||
break;
|
||||
} catch (Error& e) {
|
||||
wait(tr->onError(e));
|
||||
}
|
||||
}
|
||||
|
||||
state Standalone<VectorRef<KeyRangeRef>> blobRanges = wait(getBlobRanges(tr, range, rangeLimit));
|
||||
if (!tenantName.present()) {
|
||||
return blobRanges;
|
||||
}
|
||||
|
||||
// Strip tenant prefix out.
|
||||
state Standalone<VectorRef<KeyRangeRef>> tenantBlobRanges;
|
||||
for (auto& blobRange : blobRanges) {
|
||||
// Filter out blob ranges that span tenants for some reason.
|
||||
if (!blobRange.begin.startsWith(tme.prefix) || !blobRange.end.startsWith(tme.prefix)) {
|
||||
TraceEvent("ListBlobbifiedRangeSpansTenants")
|
||||
.suppressFor(/*seconds=*/5)
|
||||
.detail("Tenant", tenantName.get())
|
||||
.detail("Range", blobRange);
|
||||
continue;
|
||||
}
|
||||
tenantBlobRanges.push_back_deep(tenantBlobRanges.arena(), blobRange.removePrefix(tme.prefix));
|
||||
}
|
||||
return tenantBlobRanges;
|
||||
}
|
||||
|
||||
ACTOR Future<Key> purgeBlobGranulesActor(Reference<DatabaseContext> db,
|
||||
KeyRange range,
|
||||
Version purgeVersion,
|
||||
|
@ -10582,10 +10676,13 @@ ACTOR Future<Key> purgeBlobGranulesActor(Reference<DatabaseContext> db,
|
|||
}
|
||||
|
||||
// must be aligned to blob range(s)
|
||||
state Future<Optional<Value>> beginPresent = tr.get(purgeRange.begin.withPrefix(blobRangeKeys.begin));
|
||||
state Future<Optional<Value>> endPresent = tr.get(purgeRange.end.withPrefix(blobRangeKeys.begin));
|
||||
wait(success(beginPresent) && success(endPresent));
|
||||
if (!beginPresent.get().present() || !endPresent.get().present()) {
|
||||
state Future<Standalone<VectorRef<KeyRangeRef>>> blobbifiedBegin =
|
||||
getBlobbifiedRanges(&tr, KeyRangeRef(purgeRange.begin, purgeRange.begin), 2, {});
|
||||
state Future<Standalone<VectorRef<KeyRangeRef>>> blobbifiedEnd =
|
||||
getBlobbifiedRanges(&tr, KeyRangeRef(purgeRange.end, purgeRange.end), 2, {});
|
||||
wait(success(blobbifiedBegin) && success(blobbifiedEnd));
|
||||
if ((!blobbifiedBegin.get().empty() && blobbifiedBegin.get().front().begin < purgeRange.begin) ||
|
||||
(!blobbifiedEnd.get().empty() && blobbifiedEnd.get().back().end > purgeRange.end)) {
|
||||
TraceEvent("UnalignedPurge")
|
||||
.detail("Range", range)
|
||||
.detail("Version", purgeVersion)
|
||||
|
@ -10662,39 +10759,6 @@ Future<Void> DatabaseContext::waitPurgeGranulesComplete(Key purgeKey) {
|
|||
return waitPurgeGranulesCompleteActor(Reference<DatabaseContext>::addRef(this), purgeKey);
|
||||
}
|
||||
|
||||
ACTOR Future<Standalone<VectorRef<KeyRangeRef>>> getBlobRanges(Reference<ReadYourWritesTransaction> tr,
|
||||
KeyRange range,
|
||||
int batchLimit) {
|
||||
state Standalone<VectorRef<KeyRangeRef>> blobRanges;
|
||||
state Key beginKey = range.begin;
|
||||
|
||||
loop {
|
||||
try {
|
||||
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||
|
||||
state RangeResult results = wait(
|
||||
krmGetRangesUnaligned(tr, blobRangeKeys.begin, KeyRangeRef(beginKey, range.end), 2 * batchLimit + 2));
|
||||
|
||||
blobRanges.arena().dependsOn(results.arena());
|
||||
for (int i = 0; i < results.size() - 1; i++) {
|
||||
if (results[i].value == blobRangeActive) {
|
||||
blobRanges.push_back(blobRanges.arena(), KeyRangeRef(results[i].key, results[i + 1].key));
|
||||
}
|
||||
if (blobRanges.size() == batchLimit) {
|
||||
return blobRanges;
|
||||
}
|
||||
}
|
||||
|
||||
if (!results.more) {
|
||||
return blobRanges;
|
||||
}
|
||||
beginKey = results.back().key;
|
||||
} catch (Error& e) {
|
||||
wait(tr->onError(e));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ACTOR Future<bool> setBlobRangeActor(Reference<DatabaseContext> cx,
|
||||
KeyRange range,
|
||||
bool active,
|
||||
|
@ -10716,7 +10780,7 @@ ACTOR Future<bool> setBlobRangeActor(Reference<DatabaseContext> cx,
|
|||
range = range.withPrefix(tenantEntry.prefix);
|
||||
}
|
||||
|
||||
Standalone<VectorRef<KeyRangeRef>> startBlobRanges = wait(getBlobRanges(tr, range, 1));
|
||||
Standalone<VectorRef<KeyRangeRef>> startBlobRanges = wait(getBlobRanges(&tr->getTransaction(), range, 1));
|
||||
|
||||
if (active) {
|
||||
// Idempotent request.
|
||||
|
@ -10764,47 +10828,19 @@ ACTOR Future<Standalone<VectorRef<KeyRangeRef>>> listBlobbifiedRangesActor(Refer
|
|||
KeyRange range,
|
||||
int rangeLimit,
|
||||
Optional<TenantName> tenantName) {
|
||||
|
||||
state Database db(cx);
|
||||
state Reference<ReadYourWritesTransaction> tr = makeReference<ReadYourWritesTransaction>(db);
|
||||
state TenantMapEntry tme;
|
||||
state Transaction tr(db);
|
||||
|
||||
loop {
|
||||
try {
|
||||
if (tenantName.present()) {
|
||||
wait(store(tme, blobGranuleGetTenantEntry(&tr->getTransaction(), range.begin, tenantName)));
|
||||
range = range.withPrefix(tme.prefix);
|
||||
}
|
||||
break;
|
||||
} catch (Error& e) {
|
||||
wait(tr->onError(e));
|
||||
}
|
||||
}
|
||||
Standalone<VectorRef<KeyRangeRef>> blobbifiedRanges = wait(getBlobbifiedRanges(&tr, range, rangeLimit, tenantName));
|
||||
|
||||
state Standalone<VectorRef<KeyRangeRef>> blobRanges = wait(getBlobRanges(tr, range, rangeLimit));
|
||||
if (!tenantName.present()) {
|
||||
return blobRanges;
|
||||
}
|
||||
|
||||
// Strip tenant prefix out.
|
||||
state Standalone<VectorRef<KeyRangeRef>> tenantBlobRanges;
|
||||
for (auto& blobRange : blobRanges) {
|
||||
// Filter out blob ranges that span tenants for some reason.
|
||||
if (!blobRange.begin.startsWith(tme.prefix) || !blobRange.end.startsWith(tme.prefix)) {
|
||||
TraceEvent("ListBlobbifiedRangeSpansTenants")
|
||||
.suppressFor(/*seconds=*/5)
|
||||
.detail("Tenant", tenantName.get())
|
||||
.detail("Range", blobRange);
|
||||
continue;
|
||||
}
|
||||
tenantBlobRanges.push_back_deep(tenantBlobRanges.arena(), blobRange.removePrefix(tme.prefix));
|
||||
}
|
||||
return tenantBlobRanges;
|
||||
return blobbifiedRanges;
|
||||
}
|
||||
|
||||
Future<Standalone<VectorRef<KeyRangeRef>>> DatabaseContext::listBlobbifiedRanges(KeyRange range,
|
||||
int rowLimit,
|
||||
int rangeLimit,
|
||||
Optional<TenantName> tenantName) {
|
||||
return listBlobbifiedRangesActor(Reference<DatabaseContext>::addRef(this), range, rowLimit, tenantName);
|
||||
return listBlobbifiedRangesActor(Reference<DatabaseContext>::addRef(this), range, rangeLimit, tenantName);
|
||||
}
|
||||
|
||||
int64_t getMaxKeySize(KeyRef const& key) {
|
||||
|
|
|
@ -297,7 +297,8 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
|
|||
init( DD_STORAGE_WIGGLE_MIN_SS_AGE_SEC, isSimulated ? 2 : 21 * 60 * 60 * 24 ); if(randomize && BUGGIFY) DD_STORAGE_WIGGLE_MIN_SS_AGE_SEC = isSimulated ? 0: 120;
|
||||
init( DD_TENANT_AWARENESS_ENABLED, false );
|
||||
init( TENANT_CACHE_LIST_REFRESH_INTERVAL, 2 ); if( randomize && BUGGIFY ) TENANT_CACHE_LIST_REFRESH_INTERVAL = deterministicRandom()->randomInt(1, 10);
|
||||
init( TENANT_CACHE_STORAGE_REFRESH_INTERVAL, 2 ); if( randomize && BUGGIFY ) TENANT_CACHE_STORAGE_REFRESH_INTERVAL = deterministicRandom()->randomInt(1, 10);
|
||||
init( TENANT_CACHE_STORAGE_USAGE_REFRESH_INTERVAL, 2 ); if( randomize && BUGGIFY ) TENANT_CACHE_STORAGE_USAGE_REFRESH_INTERVAL = deterministicRandom()->randomInt(1, 10);
|
||||
init( TENANT_CACHE_STORAGE_QUOTA_REFRESH_INTERVAL, 10 ); if( randomize && BUGGIFY ) TENANT_CACHE_STORAGE_QUOTA_REFRESH_INTERVAL = deterministicRandom()->randomInt(1, 10);
|
||||
|
||||
// TeamRemover
|
||||
init( TR_FLAG_DISABLE_MACHINE_TEAM_REMOVER, false ); if( randomize && BUGGIFY ) TR_FLAG_DISABLE_MACHINE_TEAM_REMOVER = deterministicRandom()->random01() < 0.1 ? true : false; // false by default. disable the consistency check when it's true
|
||||
|
@ -726,8 +727,8 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
|
|||
init( TAG_THROTTLE_EXPIRED_CLEANUP_INTERVAL, 30.0 ); if(randomize && BUGGIFY) TAG_THROTTLE_EXPIRED_CLEANUP_INTERVAL = 1.0;
|
||||
init( AUTO_TAG_THROTTLING_ENABLED, true ); if(randomize && BUGGIFY) AUTO_TAG_THROTTLING_ENABLED = false;
|
||||
init( SS_THROTTLE_TAGS_TRACKED, 1 ); if(randomize && BUGGIFY) SS_THROTTLE_TAGS_TRACKED = deterministicRandom()->randomInt(1, 10);
|
||||
init( GLOBAL_TAG_THROTTLING, false );
|
||||
init( ENFORCE_TAG_THROTTLING_ON_PROXIES, false );
|
||||
init( GLOBAL_TAG_THROTTLING, false ); if(isSimulated) GLOBAL_TAG_THROTTLING = deterministicRandom()->coinflip();
|
||||
init( ENFORCE_TAG_THROTTLING_ON_PROXIES, GLOBAL_TAG_THROTTLING );
|
||||
init( GLOBAL_TAG_THROTTLING_MIN_RATE, 1.0 );
|
||||
init( GLOBAL_TAG_THROTTLING_FOLDING_TIME, 10.0 );
|
||||
init( GLOBAL_TAG_THROTTLING_RW_FUNGIBILITY_RATIO, 5.0 );
|
||||
|
@ -966,6 +967,9 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
|
|||
init( BG_CONSISTENCY_CHECK_ENABLED, true ); if (randomize && BUGGIFY) BG_CONSISTENCY_CHECK_ENABLED = false;
|
||||
init( BG_CONSISTENCY_CHECK_TARGET_SPEED_KB, 1000 ); if (randomize && BUGGIFY) BG_CONSISTENCY_CHECK_TARGET_SPEED_KB *= (deterministicRandom()->randomInt(2, 50) / 10);
|
||||
init( BG_KEY_TUPLE_TRUNCATE_OFFSET, 0 );
|
||||
init( BG_ENABLE_READ_DRIVEN_COMPACTION, true ); if (randomize && BUGGIFY) BG_ENABLE_READ_DRIVEN_COMPACTION = false;
|
||||
init( BG_RDC_BYTES_FACTOR, 2 ); if (randomize && BUGGIFY) BG_RDC_BYTES_FACTOR = deterministicRandom()->randomInt(1, 10);
|
||||
init( BG_RDC_READ_FACTOR, 3 ); if (randomize && BUGGIFY) BG_RDC_READ_FACTOR = deterministicRandom()->randomInt(1, 10);
|
||||
|
||||
init( BG_ENABLE_MERGING, true ); if (randomize && BUGGIFY) BG_ENABLE_MERGING = false;
|
||||
init( BG_MERGE_CANDIDATE_THRESHOLD_SECONDS, isSimulated ? 20.0 : 30 * 60 ); if (randomize && BUGGIFY) BG_MERGE_CANDIDATE_THRESHOLD_SECONDS = 5.0;
|
||||
|
@ -974,6 +978,8 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
|
|||
init( BLOB_WORKER_INITIAL_SNAPSHOT_PARALLELISM, 8 ); if( randomize && BUGGIFY ) BLOB_WORKER_INITIAL_SNAPSHOT_PARALLELISM = 1;
|
||||
init( BLOB_WORKER_RESNAPSHOT_PARALLELISM, 40 ); if( randomize && BUGGIFY ) BLOB_WORKER_RESNAPSHOT_PARALLELISM = deterministicRandom()->randomInt(1, 10);
|
||||
init( BLOB_WORKER_DELTA_FILE_WRITE_PARALLELISM, 2000 ); if( randomize && BUGGIFY ) BLOB_WORKER_DELTA_FILE_WRITE_PARALLELISM = deterministicRandom()->randomInt(10, 100);
|
||||
init( BLOB_WORKER_RDC_PARALLELISM, 2 ); if( randomize && BUGGIFY ) BLOB_WORKER_RDC_PARALLELISM = deterministicRandom()->randomInt(1, 6);
|
||||
|
||||
init( BLOB_WORKER_TIMEOUT, 10.0 ); if( randomize && BUGGIFY ) BLOB_WORKER_TIMEOUT = 1.0;
|
||||
init( BLOB_WORKER_REQUEST_TIMEOUT, 5.0 ); if( randomize && BUGGIFY ) BLOB_WORKER_REQUEST_TIMEOUT = 1.0;
|
||||
init( BLOB_WORKERLIST_FETCH_INTERVAL, 1.0 );
|
||||
|
|
|
@ -579,8 +579,8 @@ public:
|
|||
int maxConcurrentTasks) {
|
||||
state Reference<AsyncVar<bool>> paused = makeReference<AsyncVar<bool>>(true);
|
||||
state Future<Void> watchPausedFuture = watchPaused(cx, taskBucket, paused);
|
||||
taskBucket->metricLogger = traceCounters(
|
||||
"TaskBucketMetrics", taskBucket->dbgid, CLIENT_KNOBS->TASKBUCKET_LOGGING_DELAY, &taskBucket->cc);
|
||||
taskBucket->metricLogger = taskBucket->cc.traceCounters(
|
||||
"TaskBucketMetrics", taskBucket->dbgid, CLIENT_KNOBS->TASKBUCKET_LOGGING_DELAY);
|
||||
loop {
|
||||
while (paused->get()) {
|
||||
wait(paused->onChange() || watchPausedFuture);
|
||||
|
|
|
@ -196,6 +196,7 @@ public:
|
|||
Key addPrefix = Key(),
|
||||
Key removePrefix = Key(),
|
||||
LockDB = LockDB::True,
|
||||
UnlockDB = UnlockDB::True,
|
||||
OnlyApplyMutationLogs = OnlyApplyMutationLogs::False,
|
||||
InconsistentSnapshotOnly = InconsistentSnapshotOnly::False,
|
||||
Version beginVersion = ::invalidVersion,
|
||||
|
|
|
@ -45,6 +45,7 @@ struct BlobWorkerStats {
|
|||
Counter compressionBytesFinal;
|
||||
Counter fullRejections;
|
||||
Counter forceFlushCleanups;
|
||||
Counter readDrivenCompactions;
|
||||
|
||||
int numRangesAssigned;
|
||||
int mutationBytesBuffered;
|
||||
|
@ -83,10 +84,11 @@ struct BlobWorkerStats {
|
|||
readRequestsWithBegin("ReadRequestsWithBegin", cc), readRequestsCollapsed("ReadRequestsCollapsed", cc),
|
||||
flushGranuleReqs("FlushGranuleReqs", cc), compressionBytesRaw("CompressionBytesRaw", cc),
|
||||
compressionBytesFinal("CompressionBytesFinal", cc), fullRejections("FullRejections", cc),
|
||||
forceFlushCleanups("ForceFlushCleanups", cc), numRangesAssigned(0), mutationBytesBuffered(0),
|
||||
activeReadRequests(0), granulesPendingSplitCheck(0), minimumCFVersion(0), cfVersionLag(0),
|
||||
notAtLatestChangeFeeds(0), lastResidentMemory(0), estimatedMaxResidentMemory(0),
|
||||
initialSnapshotLock(initialSnapshotLock), resnapshotLock(resnapshotLock), deltaWritesLock(deltaWritesLock) {
|
||||
forceFlushCleanups("ForceFlushCleanups", cc), readDrivenCompactions("ReadDrivenCompactions", cc),
|
||||
numRangesAssigned(0), mutationBytesBuffered(0), activeReadRequests(0), granulesPendingSplitCheck(0),
|
||||
minimumCFVersion(0), cfVersionLag(0), notAtLatestChangeFeeds(0), lastResidentMemory(0),
|
||||
estimatedMaxResidentMemory(0), initialSnapshotLock(initialSnapshotLock), resnapshotLock(resnapshotLock),
|
||||
deltaWritesLock(deltaWritesLock) {
|
||||
specialCounter(cc, "NumRangesAssigned", [this]() { return this->numRangesAssigned; });
|
||||
specialCounter(cc, "MutationBytesBuffered", [this]() { return this->mutationBytesBuffered; });
|
||||
specialCounter(cc, "ActiveReadRequests", [this]() { return this->activeReadRequests; });
|
||||
|
@ -103,8 +105,8 @@ struct BlobWorkerStats {
|
|||
specialCounter(cc, "DeltaFileWritesActive", [this]() { return this->deltaWritesLock->activePermits(); });
|
||||
specialCounter(cc, "DeltaFileWritesWaiting", [this]() { return this->deltaWritesLock->waiters(); });
|
||||
|
||||
logger = traceCounters("BlobWorkerMetrics", id, interval, &cc, "BlobWorkerMetrics");
|
||||
logger = cc.traceCounters("BlobWorkerMetrics", id, interval, "BlobWorkerMetrics");
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
|
|
@ -262,6 +262,8 @@ public:
|
|||
double TAG_THROTTLE_EXPIRATION_INTERVAL;
|
||||
int64_t WRITE_COST_BYTE_FACTOR; // Used to round up the cost of write operations
|
||||
int64_t READ_COST_BYTE_FACTOR; // Used to round up the cost of read operations
|
||||
double PROXY_MAX_TAG_THROTTLE_DURATION; // Maximum duration that a transaction can be tag throttled by proxy before
|
||||
// being rejected
|
||||
|
||||
// busyness reporting
|
||||
double BUSYNESS_SPIKE_START_THRESHOLD;
|
||||
|
|
|
@ -336,12 +336,13 @@ struct KeyRangeRef {
|
|||
bool isCovered(std::vector<KeyRangeRef>& ranges) {
|
||||
ASSERT(std::is_sorted(ranges.begin(), ranges.end(), KeyRangeRef::ArbitraryOrder()));
|
||||
KeyRangeRef clone(begin, end);
|
||||
|
||||
for (auto r : ranges) {
|
||||
if (begin < r.begin)
|
||||
if (clone.begin < r.begin)
|
||||
return false; // uncovered gap between clone.begin and r.begin
|
||||
if (end <= r.end)
|
||||
if (clone.end <= r.end)
|
||||
return true; // range is fully covered
|
||||
if (end > r.begin)
|
||||
if (clone.end > r.begin)
|
||||
// {clone.begin, r.end} is covered. need to check coverage for {r.end, clone.end}
|
||||
clone = KeyRangeRef(r.end, clone.end);
|
||||
}
|
||||
|
@ -589,6 +590,8 @@ inline KeyRange prefixRange(KeyRef prefix) {
|
|||
// The returned reference is valid as long as keys is valid.
|
||||
KeyRef keyBetween(const KeyRangeRef& keys);
|
||||
|
||||
KeyRangeRef toPrefixRelativeRange(KeyRangeRef range, KeyRef prefix);
|
||||
|
||||
struct KeySelectorRef {
|
||||
private:
|
||||
KeyRef key; // Find the last item less than key
|
||||
|
|
|
@ -0,0 +1,48 @@
|
|||
/*
|
||||
* KeyLocationService.h
|
||||
*
|
||||
* This source file is part of the FoundationDB open source project
|
||||
*
|
||||
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef FOUNDATIONDB_KEYLOCATIONSERVICE_H
|
||||
#define FOUNDATIONDB_KEYLOCATIONSERVICE_H
|
||||
|
||||
#include "fdbclient/NativeAPI.actor.h"
|
||||
#include "fdbclient/DatabaseContext.h"
|
||||
|
||||
class IKeyLocationService {
|
||||
|
||||
// If isBackward == true, returns the shard containing the key before 'key' (an infinitely long, inexpressible key).
|
||||
// Otherwise returns the shard containing key. It's possible the returned location is a failed interface.
|
||||
virtual Future<KeyRangeLocationInfo> getKeyLocation(TenantInfo tenant,
|
||||
Key key,
|
||||
SpanContext spanContext,
|
||||
Optional<UID> debugID,
|
||||
UseProvisionalProxies useProvisionalProxies,
|
||||
Reverse isBackward,
|
||||
Version version) = 0;
|
||||
|
||||
virtual Future<std::vector<KeyRangeLocationInfo>> getKeyRangeLocations(TenantInfo tenant,
|
||||
KeyRange keys,
|
||||
int limit,
|
||||
Reverse reverse,
|
||||
SpanContext spanContext,
|
||||
Optional<UID> debugID,
|
||||
UseProvisionalProxies useProvisionalProxies,
|
||||
Version version) = 0;
|
||||
};
|
||||
|
||||
#endif // FOUNDATIONDB_KEYLOCATIONSERVICE_H
|
|
@ -164,8 +164,8 @@ bool schemaMatch(json_spirit::mValue const& schema,
|
|||
ACTOR Future<Void> mgmtSnapCreate(Database cx, Standalone<StringRef> snapCmd, UID snapUID);
|
||||
|
||||
// Set and get the storage quota per tenant
|
||||
void setStorageQuota(Transaction& tr, StringRef tenantName, uint64_t quota);
|
||||
ACTOR Future<Optional<uint64_t>> getStorageQuota(Transaction* tr, StringRef tenantName);
|
||||
void setStorageQuota(Transaction& tr, StringRef tenantName, int64_t quota);
|
||||
ACTOR Future<Optional<int64_t>> getStorageQuota(Transaction* tr, StringRef tenantName);
|
||||
|
||||
#include "flow/unactorcompiler.h"
|
||||
#endif
|
||||
|
|
|
@ -591,6 +591,26 @@ int64_t getMaxWriteKeySize(KeyRef const& key, bool hasRawAccess);
|
|||
// Returns the maximum legal size of a key that can be cleared. Keys larger than this will be assumed not to exist.
|
||||
int64_t getMaxClearKeySize(KeyRef const& key);
|
||||
|
||||
struct KeyRangeLocationInfo;
|
||||
// Return the aggregated StorageMetrics of range keys to the caller. The locations tell which interface should
|
||||
// serve the request. The final result is within (min-permittedError/2, max + permittedError/2) if valid.
|
||||
ACTOR Future<Optional<StorageMetrics>> waitStorageMetricsWithLocation(TenantInfo tenantInfo,
|
||||
KeyRange keys,
|
||||
std::vector<KeyRangeLocationInfo> locations,
|
||||
StorageMetrics min,
|
||||
StorageMetrics max,
|
||||
StorageMetrics permittedError);
|
||||
|
||||
// Return the suggested split points from storage server.The locations tell which interface should
|
||||
// serve the request. `limit` is the current estimated storage metrics of `keys`.The returned points, if present,
|
||||
// guarantee the metrics of split result is within limit.
|
||||
ACTOR Future<Optional<Standalone<VectorRef<KeyRef>>>> splitStorageMetricsWithLocations(
|
||||
std::vector<KeyRangeLocationInfo> locations,
|
||||
KeyRange keys,
|
||||
StorageMetrics limit,
|
||||
StorageMetrics estimated,
|
||||
Optional<int> minSplitBytes);
|
||||
|
||||
namespace NativeAPI {
|
||||
ACTOR Future<std::vector<std::pair<StorageServerInterface, ProcessClass>>> getServerListAndProcessClasses(
|
||||
Transaction* tr);
|
||||
|
|
|
@ -237,8 +237,10 @@ public:
|
|||
DD_STORAGE_WIGGLE_MIN_SS_AGE_SEC; // Minimal age of a correct-configured server before it's chosen to be wiggled
|
||||
bool DD_TENANT_AWARENESS_ENABLED;
|
||||
int TENANT_CACHE_LIST_REFRESH_INTERVAL; // How often the TenantCache is refreshed
|
||||
int TENANT_CACHE_STORAGE_REFRESH_INTERVAL; // How often the storage bytes used by each tenant in the TenantCache is
|
||||
// refreshed
|
||||
int TENANT_CACHE_STORAGE_USAGE_REFRESH_INTERVAL; // How often the storage bytes used by each tenant is refreshed
|
||||
// in the TenantCache
|
||||
int TENANT_CACHE_STORAGE_QUOTA_REFRESH_INTERVAL; // How often the storage quota allocated to each tenant is
|
||||
// refreshed in the TenantCache
|
||||
|
||||
// TeamRemover to remove redundant teams
|
||||
bool TR_FLAG_DISABLE_MACHINE_TEAM_REMOVER; // disable the machineTeamRemover actor
|
||||
|
@ -948,10 +950,14 @@ public:
|
|||
int BG_MERGE_CANDIDATE_THRESHOLD_SECONDS;
|
||||
int BG_MERGE_CANDIDATE_DELAY_SECONDS;
|
||||
int BG_KEY_TUPLE_TRUNCATE_OFFSET;
|
||||
bool BG_ENABLE_READ_DRIVEN_COMPACTION;
|
||||
int BG_RDC_BYTES_FACTOR;
|
||||
int BG_RDC_READ_FACTOR;
|
||||
|
||||
int BLOB_WORKER_INITIAL_SNAPSHOT_PARALLELISM;
|
||||
int BLOB_WORKER_RESNAPSHOT_PARALLELISM;
|
||||
int BLOB_WORKER_DELTA_FILE_WRITE_PARALLELISM;
|
||||
int BLOB_WORKER_RDC_PARALLELISM;
|
||||
|
||||
double BLOB_WORKER_TIMEOUT; // Blob Manager's reaction time to a blob worker failure
|
||||
double BLOB_WORKER_REQUEST_TIMEOUT; // Blob Worker's server-side request timeout
|
||||
|
|
|
@ -68,6 +68,10 @@ using TenantEntryCachePayloadFunc = std::function<TenantEntryCachePayload<T>(con
|
|||
// 1. Lookup by 'TenantId'
|
||||
// 2. Lookup by 'TenantPrefix'
|
||||
// 3. Lookup by 'TenantName'
|
||||
// TODO: Currently this cache performs poorly if there are tenant access happening to unknown tenants which happens most
|
||||
// frequently in optional tenant mode but can also happen in required mode if there are alot of tenants created. Further
|
||||
// as a consequence of the design we cannot be sure that the state of a given tenant is accurate even if its present in
|
||||
// the cache.
|
||||
|
||||
template <class T>
|
||||
class TenantEntryCache : public ReferenceCounted<TenantEntryCache<T>>, NonCopyable {
|
||||
|
|
|
@ -273,17 +273,4 @@ struct ITracer {
|
|||
virtual void trace(Span const& span) = 0;
|
||||
};
|
||||
|
||||
void openTracer(TracerType type);
|
||||
|
||||
template <class T>
|
||||
struct SpannedDeque : Deque<T> {
|
||||
Span span;
|
||||
explicit SpannedDeque(Location loc) : span(loc) {}
|
||||
SpannedDeque(SpannedDeque&& other) : Deque<T>(std::move(other)), span(std::move(other.span)) {}
|
||||
SpannedDeque(SpannedDeque const&) = delete;
|
||||
SpannedDeque& operator=(SpannedDeque const&) = delete;
|
||||
SpannedDeque& operator=(SpannedDeque&& other) {
|
||||
*static_cast<Deque<T>*>(this) = std::move(other);
|
||||
span = std::move(other.span);
|
||||
}
|
||||
};
|
||||
void openTracer(TracerType type);
|
|
@ -5,9 +5,8 @@ get_target_property(fdbclient_target_includes fdbclient INCLUDE_DIRECTORIES)
|
|||
target_link_libraries(fdbmonitor PUBLIC SimpleOpt)
|
||||
target_include_directories(fdbmonitor PUBLIC "${fdbclient_target_includes}")
|
||||
strip_debug_symbols(fdbmonitor)
|
||||
assert_no_version_h(fdbmonitor)
|
||||
if(UNIX AND NOT APPLE)
|
||||
target_link_libraries(fdbmonitor PRIVATE rt)
|
||||
target_link_libraries(fdbmonitor PRIVATE rt)
|
||||
endif()
|
||||
# FIXME: This include directory is an ugly hack. We probably want to fix this.
|
||||
# as soon as we get rid of the old build system
|
||||
|
@ -17,17 +16,17 @@ target_link_libraries(fdbmonitor PUBLIC Threads::Threads)
|
|||
# appears to change its behavior (it no longer seems to restart killed
|
||||
# processes). fdbmonitor is single-threaded anyway.
|
||||
get_target_property(fdbmonitor_options fdbmonitor COMPILE_OPTIONS)
|
||||
if (NOT "${fdbmonitor_options}" STREQUAL "fdbmonitor_options-NOTFOUND")
|
||||
if(NOT "${fdbmonitor_options}" STREQUAL "fdbmonitor_options-NOTFOUND")
|
||||
list(REMOVE_ITEM fdbmonitor_options "-fsanitize=thread")
|
||||
set_property(TARGET fdbmonitor PROPERTY COMPILE_OPTIONS ${fdbmonitor_options})
|
||||
endif ()
|
||||
endif()
|
||||
|
||||
get_target_property(fdbmonitor_options fdbmonitor LINK_OPTIONS)
|
||||
|
||||
if (NOT "${fdbmonitor_options}" STREQUAL "fdbmonitor_options-NOTFOUND")
|
||||
if(NOT "${fdbmonitor_options}" STREQUAL "fdbmonitor_options-NOTFOUND")
|
||||
list(REMOVE_ITEM fdbmonitor_options "-fsanitize=thread")
|
||||
set_property(TARGET fdbmonitor PROPERTY LINK_OPTIONS ${fdbmonitor_options})
|
||||
endif ()
|
||||
endif()
|
||||
|
||||
if(GENERATE_DEBUG_PACKAGES)
|
||||
fdb_install(TARGETS fdbmonitor DESTINATION fdbmonitor COMPONENT server)
|
||||
|
@ -51,7 +50,7 @@ add_custom_target(clean_sandbox
|
|||
|
||||
add_custom_target(start_sandbox
|
||||
COMMAND ${CMAKE_BINARY_DIR}/bin/fdbmonitor --conffile ${CMAKE_BINARY_DIR}/sandbox/foundationdb.conf
|
||||
--lockfile ${CMAKE_BINARY_DIR}/sandbox/fdbmonitor.lock)
|
||||
--lockfile ${CMAKE_BINARY_DIR}/sandbox/fdbmonitor.lock)
|
||||
|
||||
add_dependencies(start_sandbox fdbmonitor fdbserver)
|
||||
|
||||
|
@ -61,6 +60,6 @@ if(NOT EXISTS ${CMAKE_BINARY_DIR}/contrib/generate_profile.sh)
|
|||
endif()
|
||||
|
||||
add_custom_target(generate_profile
|
||||
COMMAND ${CMAKE_BINARY_DIR}/contrib/generate_profile.sh ${CMAKE_BINARY_DIR})
|
||||
COMMAND ${CMAKE_BINARY_DIR}/contrib/generate_profile.sh ${CMAKE_BINARY_DIR})
|
||||
|
||||
add_dependencies(generate_profile fdbmonitor fdbserver mako fdbcli)
|
||||
|
|
|
@ -24,8 +24,8 @@
|
|||
Counter::Counter(std::string const& name, CounterCollection& collection)
|
||||
: name(name), interval_start(0), last_event(0), interval_sq_time(0), roughness_interval_start(0), interval_delta(0),
|
||||
interval_start_value(0) {
|
||||
metric.init(collection.name + "." + (char)toupper(name.at(0)) + name.substr(1), collection.id);
|
||||
collection.counters.push_back(this);
|
||||
metric.init(collection.getName() + "." + (char)toupper(name.at(0)) + name.substr(1), collection.getId());
|
||||
collection.addCounter(this);
|
||||
}
|
||||
|
||||
void Counter::operator+=(Value delta) {
|
||||
|
@ -88,36 +88,48 @@ void CounterCollection::logToTraceEvent(TraceEvent& te) const {
|
|||
}
|
||||
}
|
||||
|
||||
ACTOR Future<Void> traceCounters(std::string traceEventName,
|
||||
UID traceEventID,
|
||||
double interval,
|
||||
CounterCollection* counters,
|
||||
std::string trackLatestName,
|
||||
std::function<void(TraceEvent&)> decorator) {
|
||||
wait(delay(0)); // Give an opportunity for all members used in special counters to be initialized
|
||||
class CounterCollectionImpl {
|
||||
public:
|
||||
ACTOR static Future<Void> traceCounters(CounterCollection* counters,
|
||||
std::string traceEventName,
|
||||
UID traceEventID,
|
||||
double interval,
|
||||
std::string trackLatestName,
|
||||
std::function<void(TraceEvent&)> decorator) {
|
||||
wait(delay(0)); // Give an opportunity for all members used in special counters to be initialized
|
||||
|
||||
for (ICounter* c : counters->counters)
|
||||
c->resetInterval();
|
||||
|
||||
state Reference<EventCacheHolder> traceEventHolder;
|
||||
if (!trackLatestName.empty()) {
|
||||
traceEventHolder = makeReference<EventCacheHolder>(trackLatestName);
|
||||
}
|
||||
|
||||
state double last_interval = now();
|
||||
|
||||
loop {
|
||||
TraceEvent te(traceEventName.c_str(), traceEventID);
|
||||
te.detail("Elapsed", now() - last_interval);
|
||||
|
||||
counters->logToTraceEvent(te);
|
||||
decorator(te);
|
||||
for (ICounter* c : counters->counters)
|
||||
c->resetInterval();
|
||||
|
||||
state Reference<EventCacheHolder> traceEventHolder;
|
||||
if (!trackLatestName.empty()) {
|
||||
te.trackLatest(traceEventHolder->trackingKey);
|
||||
traceEventHolder = makeReference<EventCacheHolder>(trackLatestName);
|
||||
}
|
||||
|
||||
last_interval = now();
|
||||
wait(delay(interval, TaskPriority::FlushTrace));
|
||||
state double last_interval = now();
|
||||
|
||||
loop {
|
||||
TraceEvent te(traceEventName.c_str(), traceEventID);
|
||||
te.detail("Elapsed", now() - last_interval);
|
||||
|
||||
counters->logToTraceEvent(te);
|
||||
decorator(te);
|
||||
|
||||
if (!trackLatestName.empty()) {
|
||||
te.trackLatest(traceEventHolder->trackingKey);
|
||||
}
|
||||
|
||||
last_interval = now();
|
||||
wait(delay(interval, TaskPriority::FlushTrace));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
Future<Void> CounterCollection::traceCounters(std::string const& traceEventName,
|
||||
UID traceEventID,
|
||||
double interval,
|
||||
std::string const& trackLatestName,
|
||||
std::function<void(TraceEvent&)> const& decorator) {
|
||||
return CounterCollectionImpl::traceCounters(
|
||||
this, traceEventName, traceEventID, interval, trackLatestName, decorator);
|
||||
}
|
||||
|
|
|
@ -67,17 +67,37 @@ struct Traceable<ICounter*> : std::true_type {
|
|||
}
|
||||
};
|
||||
|
||||
struct CounterCollection {
|
||||
CounterCollection(std::string name, std::string id = std::string()) : name(name), id(id) {}
|
||||
std::vector<struct ICounter*> counters, counters_to_remove;
|
||||
~CounterCollection() {
|
||||
for (auto c : counters_to_remove)
|
||||
c->remove();
|
||||
}
|
||||
class CounterCollection {
|
||||
friend class CounterCollectionImpl;
|
||||
|
||||
std::string name;
|
||||
std::string id;
|
||||
std::vector<struct ICounter*> counters, countersToRemove;
|
||||
|
||||
public:
|
||||
CounterCollection(std::string const& name, std::string const& id = std::string()) : name(name), id(id) {}
|
||||
~CounterCollection() {
|
||||
for (auto c : countersToRemove)
|
||||
c->remove();
|
||||
}
|
||||
|
||||
void addCounter(ICounter* counter) { counters.push_back(counter); }
|
||||
|
||||
// Call remove method on this counter in ~CounterCollection
|
||||
void markForRemoval(ICounter* counter) { countersToRemove.push_back(counter); }
|
||||
|
||||
std::string const& getName() const { return name; }
|
||||
|
||||
std::string const& getId() const { return id; }
|
||||
|
||||
void logToTraceEvent(TraceEvent& te) const;
|
||||
|
||||
Future<Void> traceCounters(
|
||||
std::string const& traceEventName,
|
||||
UID traceEventID,
|
||||
double interval,
|
||||
std::string const& trackLatestName = std::string(),
|
||||
std::function<void(TraceEvent&)> const& decorator = [](auto& te) {});
|
||||
};
|
||||
|
||||
struct Counter final : ICounter, NonCopyable {
|
||||
|
@ -131,8 +151,8 @@ struct Traceable<Counter> : std::true_type {
|
|||
template <class F>
|
||||
struct SpecialCounter final : ICounter, FastAllocated<SpecialCounter<F>>, NonCopyable {
|
||||
SpecialCounter(CounterCollection& collection, std::string const& name, F&& f) : name(name), f(f) {
|
||||
collection.counters.push_back(this);
|
||||
collection.counters_to_remove.push_back(this);
|
||||
collection.addCounter(this);
|
||||
collection.markForRemoval(this);
|
||||
}
|
||||
void remove() override { delete this; }
|
||||
|
||||
|
@ -162,14 +182,6 @@ static void specialCounter(CounterCollection& collection, std::string const& nam
|
|||
new SpecialCounter<F>(collection, name, std::move(f));
|
||||
}
|
||||
|
||||
Future<Void> traceCounters(
|
||||
std::string const& traceEventName,
|
||||
UID const& traceEventID,
|
||||
double const& interval,
|
||||
CounterCollection* const& counters,
|
||||
std::string const& trackLatestName = std::string(),
|
||||
std::function<void(TraceEvent&)> const& decorator = [](TraceEvent& te) {});
|
||||
|
||||
class LatencyBands {
|
||||
public:
|
||||
LatencyBands(std::string name, UID id, double loggingInterval)
|
||||
|
@ -180,7 +192,7 @@ public:
|
|||
if (bands.size() == 0) {
|
||||
ASSERT(!cc && !filteredCount);
|
||||
cc = std::make_unique<CounterCollection>(name, id.toString());
|
||||
logger = traceCounters(name, id, loggingInterval, cc.get(), id.toString() + "/" + name);
|
||||
logger = cc->traceCounters(name, id, loggingInterval, id.toString() + "/" + name);
|
||||
filteredCount = std::make_unique<Counter>("Filtered", *cc);
|
||||
insertBand(std::numeric_limits<double>::infinity());
|
||||
}
|
||||
|
|
|
@ -42,8 +42,6 @@ struct TenantInfo {
|
|||
// Is set during deserialization. It will be set to true if the tenant
|
||||
// name is set and the client is authorized to use this tenant.
|
||||
bool tenantAuthorized = false;
|
||||
// Number of storage bytes currently used by this tenant.
|
||||
int64_t storageUsage = 0;
|
||||
|
||||
// Helper function for most endpoints that read/write data. This returns true iff
|
||||
// the client is either a) a trusted peer or b) is accessing keyspace belonging to a tenant,
|
||||
|
|
|
@ -290,8 +290,8 @@ struct BackupData {
|
|||
specialCounter(cc, "MsgQ", [this]() { return this->messages.size(); });
|
||||
specialCounter(cc, "BufferedBytes", [this]() { return this->lock->activePermits(); });
|
||||
specialCounter(cc, "AvailableBytes", [this]() { return this->lock->available(); });
|
||||
logger = traceCounters(
|
||||
"BackupWorkerMetrics", myId, SERVER_KNOBS->WORKER_LOGGING_INTERVAL, &cc, "BackupWorkerMetrics");
|
||||
logger =
|
||||
cc.traceCounters("BackupWorkerMetrics", myId, SERVER_KNOBS->WORKER_LOGGING_INTERVAL, "BackupWorkerMetrics");
|
||||
}
|
||||
|
||||
bool pullFinished() const { return endVersion.present() && pulledVersion.get() > endVersion.get(); }
|
||||
|
|
|
@ -296,7 +296,7 @@ struct BlobManagerStats {
|
|||
specialCounter(cc, "HardBoundaries", [mergeHardBoundaries]() { return mergeHardBoundaries->size(); });
|
||||
specialCounter(cc, "SoftBoundaries", [mergeBoundaries]() { return mergeBoundaries->size(); });
|
||||
specialCounter(cc, "BlockedAssignments", [this]() { return this->blockedAssignments; });
|
||||
logger = traceCounters("BlobManagerMetrics", id, interval, &cc, "BlobManagerMetrics");
|
||||
logger = cc.traceCounters("BlobManagerMetrics", id, interval, "BlobManagerMetrics");
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -3537,7 +3537,7 @@ ACTOR Future<Void> recoverBlobManager(Reference<BlobManagerData> bmData) {
|
|||
}
|
||||
|
||||
// skip the rest of the algorithm for the first blob manager
|
||||
if (bmData->epoch == 1) {
|
||||
if (bmData->epoch == 1 && !isFullRestoreMode()) {
|
||||
bmData->doneRecovering.send(Void());
|
||||
return Void();
|
||||
}
|
||||
|
|
|
@ -26,6 +26,7 @@
|
|||
#include "fdbclient/BlobGranuleCommon.h"
|
||||
#include "fdbserver/Knobs.h"
|
||||
#include "flow/FastRef.h"
|
||||
#include "flow/Trace.h"
|
||||
#include "flow/flow.h"
|
||||
#include "fdbclient/NativeAPI.actor.h"
|
||||
#include "fdbclient/BlobConnectionProvider.h"
|
||||
|
@ -189,23 +190,6 @@ private:
|
|||
static const int sMaxCount_{ 5 }; // max number of manifest file to keep
|
||||
};
|
||||
|
||||
// Defines granule info that interests full restore
|
||||
struct BlobGranuleVersion {
|
||||
// Two constructors required by VectorRef
|
||||
BlobGranuleVersion() {}
|
||||
BlobGranuleVersion(Arena& a, const BlobGranuleVersion& copyFrom)
|
||||
: granuleID(copyFrom.granuleID), keyRange(a, copyFrom.keyRange), version(copyFrom.version),
|
||||
sizeInBytes(copyFrom.sizeInBytes) {}
|
||||
|
||||
UID granuleID;
|
||||
KeyRangeRef keyRange;
|
||||
Version version;
|
||||
int64_t sizeInBytes;
|
||||
};
|
||||
|
||||
// Defines a vector for BlobGranuleVersion
|
||||
typedef Standalone<VectorRef<BlobGranuleVersion>> BlobGranuleVersionVector;
|
||||
|
||||
// Defines filename, version, size for each granule file that interests full restore
|
||||
struct GranuleFileVersion {
|
||||
Version version;
|
||||
|
@ -226,16 +210,53 @@ public:
|
|||
Value data = wait(readFromFile(self));
|
||||
Standalone<BlobManifest> manifest = decode(data);
|
||||
wait(writeSystemKeys(self, manifest.rows));
|
||||
BlobGranuleVersionVector _ = wait(listGranules(self));
|
||||
BlobGranuleRestoreVersionVector _ = wait(listGranules(self));
|
||||
} catch (Error& e) {
|
||||
dprint("WARNING: unexpected manifest loader error {}\n", e.what()); // skip error handling so far
|
||||
}
|
||||
return Void();
|
||||
}
|
||||
|
||||
// Iterate active granules and return their version/sizes
|
||||
ACTOR static Future<BlobGranuleRestoreVersionVector> listGranules(Reference<BlobManifestLoader> self) {
|
||||
state Transaction tr(self->db_);
|
||||
loop {
|
||||
state BlobGranuleRestoreVersionVector results;
|
||||
tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||
tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
|
||||
tr.setOption(FDBTransactionOptions::LOCK_AWARE);
|
||||
|
||||
try {
|
||||
std::vector<KeyRangeRef> granules;
|
||||
state int i = 0;
|
||||
auto limit = GetRangeLimits::BYTE_LIMIT_UNLIMITED;
|
||||
state RangeResult blobRanges = wait(tr.getRange(blobGranuleMappingKeys, limit));
|
||||
for (i = 0; i < blobRanges.size() - 1; i++) {
|
||||
Key startKey = blobRanges[i].key.removePrefix(blobGranuleMappingKeys.begin);
|
||||
Key endKey = blobRanges[i + 1].key.removePrefix(blobGranuleMappingKeys.begin);
|
||||
state KeyRange granuleRange = KeyRangeRef(startKey, endKey);
|
||||
try {
|
||||
Standalone<BlobGranuleRestoreVersion> granule = wait(getGranule(&tr, granuleRange));
|
||||
results.push_back_deep(results.arena(), granule);
|
||||
} catch (Error& e) {
|
||||
if (e.code() == error_code_restore_missing_data) {
|
||||
dprint("missing data for key range {} \n", granuleRange.toString());
|
||||
TraceEvent("BlobRestoreMissingData").detail("KeyRange", granuleRange.toString());
|
||||
} else {
|
||||
throw;
|
||||
}
|
||||
}
|
||||
}
|
||||
return results;
|
||||
} catch (Error& e) {
|
||||
wait(tr.onError(e));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Print out a summary for blob granules
|
||||
ACTOR static Future<Void> print(Reference<BlobManifestLoader> self) {
|
||||
state BlobGranuleVersionVector granules = wait(listGranules(self));
|
||||
state BlobGranuleRestoreVersionVector granules = wait(listGranules(self));
|
||||
for (auto granule : granules) {
|
||||
wait(checkGranuleFiles(self, granule));
|
||||
}
|
||||
|
@ -285,41 +306,9 @@ private:
|
|||
}
|
||||
}
|
||||
|
||||
// Iterate active granules and return their version/sizes
|
||||
ACTOR static Future<BlobGranuleVersionVector> listGranules(Reference<BlobManifestLoader> self) {
|
||||
state Transaction tr(self->db_);
|
||||
loop {
|
||||
state BlobGranuleVersionVector results;
|
||||
tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||
tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
|
||||
tr.setOption(FDBTransactionOptions::LOCK_AWARE);
|
||||
|
||||
try {
|
||||
std::vector<KeyRangeRef> granules;
|
||||
state int i = 0;
|
||||
auto limit = GetRangeLimits::BYTE_LIMIT_UNLIMITED;
|
||||
state RangeResult blobRanges = wait(tr.getRange(blobGranuleMappingKeys, limit));
|
||||
for (i = 0; i < blobRanges.size() - 1; i++) {
|
||||
Key startKey = blobRanges[i].key.removePrefix(blobGranuleMappingKeys.begin);
|
||||
Key endKey = blobRanges[i + 1].key.removePrefix(blobGranuleMappingKeys.begin);
|
||||
state KeyRange granuleRange = KeyRangeRef(startKey, endKey);
|
||||
try {
|
||||
Standalone<BlobGranuleVersion> granule = wait(getGranule(&tr, granuleRange));
|
||||
results.push_back_deep(results.arena(), granule);
|
||||
} catch (Error& e) {
|
||||
dprint("missing data for key range {} \n", granuleRange.toString());
|
||||
}
|
||||
}
|
||||
return results;
|
||||
} catch (Error& e) {
|
||||
wait(tr.onError(e));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Find the newest granule for a key range. The newest granule has the max version and relevant files
|
||||
ACTOR static Future<Standalone<BlobGranuleVersion>> getGranule(Transaction* tr, KeyRangeRef range) {
|
||||
state Standalone<BlobGranuleVersion> granuleVersion;
|
||||
ACTOR static Future<Standalone<BlobGranuleRestoreVersion>> getGranule(Transaction* tr, KeyRangeRef range) {
|
||||
state Standalone<BlobGranuleRestoreVersion> granuleVersion;
|
||||
KeyRange historyKeyRange = blobGranuleHistoryKeyRangeFor(range);
|
||||
// reverse lookup so that the first row is the newest version
|
||||
state RangeResult results =
|
||||
|
@ -389,7 +378,7 @@ private:
|
|||
}
|
||||
|
||||
// Read data from granules and print out summary
|
||||
ACTOR static Future<Void> checkGranuleFiles(Reference<BlobManifestLoader> self, BlobGranuleVersion granule) {
|
||||
ACTOR static Future<Void> checkGranuleFiles(Reference<BlobManifestLoader> self, BlobGranuleRestoreVersion granule) {
|
||||
state KeyRangeRef range = granule.keyRange;
|
||||
state Version readVersion = granule.version;
|
||||
state Transaction tr(self->db_);
|
||||
|
@ -441,3 +430,11 @@ ACTOR Future<Void> printRestoreSummary(Database db, Reference<BlobConnectionProv
|
|||
wait(BlobManifestLoader::print(loader));
|
||||
return Void();
|
||||
}
|
||||
|
||||
// API to list blob granules
|
||||
ACTOR Future<BlobGranuleRestoreVersionVector> listBlobGranules(Database db,
|
||||
Reference<BlobConnectionProvider> blobConn) {
|
||||
Reference<BlobManifestLoader> loader = makeReference<BlobManifestLoader>(db, blobConn);
|
||||
BlobGranuleRestoreVersionVector result = wait(BlobManifestLoader::listGranules(loader));
|
||||
return result;
|
||||
}
|
||||
|
|
|
@ -30,54 +30,312 @@
|
|||
#include "fdbclient/KeyRangeMap.h"
|
||||
#include "fdbclient/SystemData.h"
|
||||
#include "fdbclient/NativeAPI.actor.h"
|
||||
#include "fdbclient/ManagementAPI.actor.h"
|
||||
#include "fdbserver/ServerDBInfo.actor.h"
|
||||
#include "fdbserver/WaitFailure.h"
|
||||
|
||||
#include "fdbserver/MoveKeys.actor.h"
|
||||
#include "fdbserver/BlobGranuleServerCommon.actor.h"
|
||||
#include "flow/actorcompiler.h" // has to be last include
|
||||
#include "flow/network.h"
|
||||
#include <algorithm>
|
||||
#include <string>
|
||||
|
||||
#define ENABLE_DEBUG_MG true
|
||||
|
||||
template <typename... T>
|
||||
static inline void dprint(fmt::format_string<T...> fmt, T&&... args) {
|
||||
if (ENABLE_DEBUG_MG)
|
||||
fmt::print(fmt, std::forward<T>(args)...);
|
||||
}
|
||||
|
||||
// BlobMigrator manages data migration from blob storage to storage server. It implements a minimal set of
|
||||
// StorageServerInterface APIs which are needed for DataDistributor to start data migration.
|
||||
class BlobMigrator : public NonCopyable, public ReferenceCounted<BlobMigrator> {
|
||||
public:
|
||||
BlobMigrator(Reference<AsyncVar<ServerDBInfo> const> dbInfo, BlobMigratorInterface interf)
|
||||
: blobMigratorInterf(interf), actors(false) {
|
||||
if (!blobConn.isValid() && SERVER_KNOBS->BG_METADATA_SOURCE != "tenant") {
|
||||
blobConn = BlobConnectionProvider::newBlobConnectionProvider(SERVER_KNOBS->BG_URL);
|
||||
: interf_(interf), actors_(false) {
|
||||
if (!blobConn_.isValid() && SERVER_KNOBS->BG_METADATA_SOURCE != "tenant") {
|
||||
blobConn_ = BlobConnectionProvider::newBlobConnectionProvider(SERVER_KNOBS->BG_URL);
|
||||
}
|
||||
db = openDBOnServer(dbInfo, TaskPriority::DefaultEndpoint, LockAware::True);
|
||||
db_ = openDBOnServer(dbInfo, TaskPriority::DefaultEndpoint, LockAware::True);
|
||||
}
|
||||
~BlobMigrator() {}
|
||||
|
||||
// Start migration
|
||||
ACTOR static Future<Void> start(Reference<BlobMigrator> self) {
|
||||
self->actors.add(waitFailureServer(self->blobMigratorInterf.waitFailure.getFuture()));
|
||||
if (!isFullRestoreMode()) {
|
||||
return Void();
|
||||
}
|
||||
wait(delay(10)); // TODO need to wait for a signal for readiness of blob manager
|
||||
|
||||
BlobGranuleRestoreVersionVector granules = wait(listBlobGranules(self->db_, self->blobConn_));
|
||||
self->blobGranules_ = granules;
|
||||
|
||||
wait(prepare(self, normalKeys));
|
||||
|
||||
wait(serverLoop(self));
|
||||
return Void();
|
||||
}
|
||||
|
||||
private:
|
||||
// Prepare for data migration for given key range.
|
||||
ACTOR static Future<Void> prepare(Reference<BlobMigrator> self, KeyRangeRef keys) {
|
||||
// Register as a storage server, so that DataDistributor could start data movement after
|
||||
std::pair<Version, Tag> verAndTag = wait(addStorageServer(self->db_, self->interf_.ssi));
|
||||
dprint("Started storage server interface {} {}\n", verAndTag.first, verAndTag.second.toString());
|
||||
|
||||
// Reassign key ranges to the storage server
|
||||
// It'll restart DataDistributor so that internal data structures like ShardTracker, ShardsAffectedByTeamFailure
|
||||
// could be re-initialized. Ideally it should be done within DataDistributor, then we don't need to
|
||||
// restart DataDistributor
|
||||
state int oldMode = wait(setDDMode(self->db_, 0));
|
||||
wait(unassignServerKeys(self, keys));
|
||||
wait(assignKeysToServer(self, keys, self->interf_.ssi.id()));
|
||||
wait(success(setDDMode(self->db_, oldMode)));
|
||||
return Void();
|
||||
}
|
||||
|
||||
// Assign given key range to specified storage server. Subsquent
|
||||
ACTOR static Future<Void> assignKeysToServer(Reference<BlobMigrator> self, KeyRangeRef keys, UID serverUID) {
|
||||
state Transaction tr(self->db_);
|
||||
loop {
|
||||
choose {
|
||||
when(HaltBlobMigratorRequest req = waitNext(self->blobMigratorInterf.haltBlobMigrator.getFuture())) {
|
||||
req.reply.send(Void());
|
||||
TraceEvent("BlobMigratorHalted", self->blobMigratorInterf.id()).detail("ReqID", req.requesterID);
|
||||
break;
|
||||
tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
|
||||
tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||
tr.setOption(FDBTransactionOptions::LOCK_AWARE);
|
||||
try {
|
||||
state Value value = keyServersValue(std::vector<UID>({ serverUID }), std::vector<UID>(), UID(), UID());
|
||||
wait(krmSetRange(&tr, keyServersPrefix, keys, value));
|
||||
wait(krmSetRange(&tr, serverKeysPrefixFor(serverUID), keys, serverKeysTrue));
|
||||
wait(tr.commit());
|
||||
dprint("Assign {} to server {}\n", normalKeys.toString(), serverUID.toString());
|
||||
return Void();
|
||||
} catch (Error& e) {
|
||||
wait(tr.onError(e));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Unassign given key range from its current storage servers
|
||||
ACTOR static Future<Void> unassignServerKeys(Reference<BlobMigrator> self, KeyRangeRef keys) {
|
||||
state Transaction tr(self->db_);
|
||||
loop {
|
||||
tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
|
||||
tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||
tr.setOption(FDBTransactionOptions::LOCK_AWARE);
|
||||
try {
|
||||
state RangeResult serverList = wait(tr.getRange(serverListKeys, CLIENT_KNOBS->TOO_MANY));
|
||||
ASSERT(!serverList.more && serverList.size() < CLIENT_KNOBS->TOO_MANY);
|
||||
for (auto& server : serverList) {
|
||||
state UID id = decodeServerListValue(server.value).id();
|
||||
RangeResult ranges = wait(krmGetRanges(&tr, serverKeysPrefixFor(id), keys));
|
||||
bool owning = false;
|
||||
for (auto& r : ranges) {
|
||||
if (r.value == serverKeysTrue) {
|
||||
owning = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (owning) {
|
||||
dprint("Unassign {} from storage server {}\n", keys.toString(), id.toString());
|
||||
wait(krmSetRange(&tr, serverKeysPrefixFor(id), keys, serverKeysFalse));
|
||||
}
|
||||
}
|
||||
when(wait(self->actors.getResult())) {}
|
||||
wait(tr.commit());
|
||||
return Void();
|
||||
} catch (Error& e) {
|
||||
wait(tr.onError(e));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Main server loop
|
||||
ACTOR static Future<Void> serverLoop(Reference<BlobMigrator> self) {
|
||||
self->actors_.add(waitFailureServer(self->interf_.ssi.waitFailure.getFuture()));
|
||||
self->actors_.add(handleRequest(self));
|
||||
self->actors_.add(handleUnsupportedRequest(self));
|
||||
loop {
|
||||
try {
|
||||
choose {
|
||||
when(HaltBlobMigratorRequest req = waitNext(self->interf_.haltBlobMigrator.getFuture())) {
|
||||
req.reply.send(Void());
|
||||
TraceEvent("BlobMigratorHalted", self->interf_.id()).detail("ReqID", req.requesterID);
|
||||
break;
|
||||
}
|
||||
when(wait(self->actors_.getResult())) {}
|
||||
}
|
||||
} catch (Error& e) {
|
||||
dprint("Unexpected serverLoop error {}\n", e.what());
|
||||
throw;
|
||||
}
|
||||
}
|
||||
return Void();
|
||||
}
|
||||
|
||||
// Handle StorageServerInterface APIs
|
||||
ACTOR static Future<Void> handleRequest(Reference<BlobMigrator> self) {
|
||||
state StorageServerInterface ssi = self->interf_.ssi;
|
||||
loop {
|
||||
try {
|
||||
choose {
|
||||
when(GetShardStateRequest req = waitNext(ssi.getShardState.getFuture())) {
|
||||
dprint("Handle GetShardStateRequest\n");
|
||||
Version version = maxVersion(self);
|
||||
GetShardStateReply rep(version, version);
|
||||
req.reply.send(rep); // return empty shards
|
||||
}
|
||||
when(WaitMetricsRequest req = waitNext(ssi.waitMetrics.getFuture())) {
|
||||
// dprint("Handle WaitMetricsRequest\n");
|
||||
self->actors_.add(processWaitMetricsRequest(self, req));
|
||||
}
|
||||
when(SplitMetricsRequest req = waitNext(ssi.splitMetrics.getFuture())) {
|
||||
dprint("Handle SplitMetrics {}\n", req.keys.toString());
|
||||
SplitMetricsReply rep;
|
||||
for (auto granule : self->blobGranules_) {
|
||||
// TODO: Use granule boundary as split point. A better approach is to split by size
|
||||
if (granule.keyRange.begin > req.keys.begin && granule.keyRange.end < req.keys.end)
|
||||
rep.splits.push_back_deep(rep.splits.arena(), granule.keyRange.begin);
|
||||
}
|
||||
req.reply.send(rep);
|
||||
}
|
||||
when(GetStorageMetricsRequest req = waitNext(ssi.getStorageMetrics.getFuture())) {
|
||||
fmt::print("Handle GetStorageMetrics\n");
|
||||
StorageMetrics metrics;
|
||||
metrics.bytes = sizeInBytes(self);
|
||||
GetStorageMetricsReply resp;
|
||||
resp.load = metrics;
|
||||
req.reply.send(resp);
|
||||
}
|
||||
when(ReplyPromise<KeyValueStoreType> reply = waitNext(ssi.getKeyValueStoreType.getFuture())) {
|
||||
dprint("Handle KeyValueStoreType\n");
|
||||
reply.send(KeyValueStoreType::MEMORY);
|
||||
}
|
||||
}
|
||||
} catch (Error& e) {
|
||||
dprint("Unexpected blob migrator request error {}\n", e.what());
|
||||
throw;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Handle StorageServerInterface APIs that are not supported. Simply log and return error
|
||||
ACTOR static Future<Void> handleUnsupportedRequest(Reference<BlobMigrator> self) {
|
||||
state StorageServerInterface ssi = self->interf_.ssi;
|
||||
loop {
|
||||
try {
|
||||
choose {
|
||||
when(SplitRangeRequest req = waitNext(ssi.getRangeSplitPoints.getFuture())) {
|
||||
dprint("Unsupported SplitRangeRequest\n");
|
||||
req.reply.sendError(unsupported_operation());
|
||||
}
|
||||
when(StorageQueuingMetricsRequest req = waitNext(ssi.getQueuingMetrics.getFuture())) {
|
||||
self->actors_.add(processStorageQueuingMetricsRequest(req));
|
||||
}
|
||||
when(ReadHotSubRangeRequest req = waitNext(ssi.getReadHotRanges.getFuture())) {
|
||||
dprint("Unsupported ReadHotSubRange\n");
|
||||
req.reply.sendError(unsupported_operation());
|
||||
}
|
||||
when(GetKeyValuesStreamRequest req = waitNext(ssi.getKeyValuesStream.getFuture())) {
|
||||
dprint("Unsupported GetKeyValuesStreamRequest\n");
|
||||
req.reply.sendError(unsupported_operation());
|
||||
}
|
||||
when(GetKeyRequest req = waitNext(ssi.getKey.getFuture())) {
|
||||
dprint("Unsupported GetKeyRequest\n");
|
||||
req.reply.sendError(unsupported_operation());
|
||||
}
|
||||
when(GetKeyValuesRequest req = waitNext(ssi.getKeyValues.getFuture())) {
|
||||
/* dprint("Unsupported GetKeyValuesRequest {} - {} @ {}\n",
|
||||
req.begin.getKey().printable(),
|
||||
req.end.getKey().printable(),
|
||||
req.version); */
|
||||
req.reply.sendError(unsupported_operation());
|
||||
}
|
||||
when(GetValueRequest req = waitNext(ssi.getValue.getFuture())) {
|
||||
dprint("Unsupported GetValueRequest\n");
|
||||
req.reply.sendError(unsupported_operation());
|
||||
}
|
||||
when(GetCheckpointRequest req = waitNext(ssi.checkpoint.getFuture())) {
|
||||
dprint("Unsupported GetCheckpoint \n");
|
||||
req.reply.sendError(unsupported_operation());
|
||||
}
|
||||
when(FetchCheckpointRequest req = waitNext(ssi.fetchCheckpoint.getFuture())) {
|
||||
dprint("Unsupported FetchCheckpointRequest\n");
|
||||
req.reply.sendError(unsupported_operation());
|
||||
}
|
||||
when(UpdateCommitCostRequest req = waitNext(ssi.updateCommitCostRequest.getFuture())) {
|
||||
dprint("Unsupported UpdateCommitCostRequest\n");
|
||||
req.reply.sendError(unsupported_operation());
|
||||
}
|
||||
when(FetchCheckpointKeyValuesRequest req = waitNext(ssi.fetchCheckpointKeyValues.getFuture())) {
|
||||
dprint("Unsupported FetchCheckpointKeyValuesRequest\n");
|
||||
req.reply.sendError(unsupported_operation());
|
||||
}
|
||||
}
|
||||
} catch (Error& e) {
|
||||
dprint("Unexpected request handling error {}\n", e.what());
|
||||
throw;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ACTOR static Future<Void> processWaitMetricsRequest(Reference<BlobMigrator> self, WaitMetricsRequest req) {
|
||||
state WaitMetricsRequest waitMetricsRequest = req;
|
||||
// FIXME get rid of this delay. it's a temp solution to avoid starvaion scheduling of DD
|
||||
// processes
|
||||
wait(delay(1));
|
||||
StorageMetrics metrics;
|
||||
metrics.bytes = sizeInBytes(self, waitMetricsRequest.keys);
|
||||
waitMetricsRequest.reply.send(metrics);
|
||||
return Void();
|
||||
}
|
||||
|
||||
ACTOR static Future<Void> processStorageQueuingMetricsRequest(StorageQueuingMetricsRequest req) {
|
||||
dprint("Unsupported StorageQueuingMetricsRequest\n");
|
||||
// FIXME get rid of this delay. it's a temp solution to avoid starvaion scheduling of DD
|
||||
// processes
|
||||
wait(delay(1));
|
||||
req.reply.sendError(unsupported_operation());
|
||||
return Void();
|
||||
}
|
||||
|
||||
// Return total storage size in bytes for migration
|
||||
static int64_t sizeInBytes(Reference<BlobMigrator> self) { return sizeInBytes(self, normalKeys); }
|
||||
|
||||
// Return storage size in bytes for given key range
|
||||
static int64_t sizeInBytes(Reference<BlobMigrator> self, KeyRangeRef range) {
|
||||
int64_t bytes = 0;
|
||||
for (auto granule : self->blobGranules_) {
|
||||
if (range.intersects(granule.keyRange))
|
||||
bytes += granule.sizeInBytes;
|
||||
}
|
||||
return bytes;
|
||||
}
|
||||
|
||||
// Return max version for all blob granules
|
||||
static Version maxVersion(Reference<BlobMigrator> self) {
|
||||
Version max = 0;
|
||||
for (auto granule : self->blobGranules_) {
|
||||
max = std::max(granule.version, max);
|
||||
}
|
||||
return max;
|
||||
}
|
||||
|
||||
private:
|
||||
Database db;
|
||||
Reference<BlobConnectionProvider> blobConn;
|
||||
BlobMigratorInterface blobMigratorInterf;
|
||||
ActorCollection actors;
|
||||
Database db_;
|
||||
Reference<BlobConnectionProvider> blobConn_;
|
||||
BlobGranuleRestoreVersionVector blobGranules_;
|
||||
BlobMigratorInterface interf_;
|
||||
ActorCollection actors_;
|
||||
};
|
||||
|
||||
// Main entry point
|
||||
ACTOR Future<Void> blobMigrator(BlobMigratorInterface ssi, Reference<AsyncVar<ServerDBInfo> const> dbInfo) {
|
||||
fmt::print("Start blob migrator {} \n", ssi.id().toString());
|
||||
ACTOR Future<Void> blobMigrator(BlobMigratorInterface interf, Reference<AsyncVar<ServerDBInfo> const> dbInfo) {
|
||||
fmt::print("Start blob migrator {} \n", interf.id().toString());
|
||||
try {
|
||||
Reference<BlobMigrator> self = makeReference<BlobMigrator>(dbInfo, ssi);
|
||||
Reference<BlobMigrator> self = makeReference<BlobMigrator>(dbInfo, interf);
|
||||
wait(BlobMigrator::start(self));
|
||||
} catch (Error& e) {
|
||||
fmt::print("unexpected blob migrator error {}\n", e.what());
|
||||
dprint("Unexpected blob migrator error {}\n", e.what());
|
||||
TraceEvent("BlobMigratorError", interf.id()).error(e);
|
||||
}
|
||||
return Void();
|
||||
}
|
||||
|
|
|
@ -84,6 +84,15 @@ struct GranuleStartState {
|
|||
Optional<GranuleHistory> history;
|
||||
};
|
||||
|
||||
// TODO: add more (blob file request cost, in-memory mutations vs blob delta file, etc...)
|
||||
struct GranuleReadStats {
|
||||
int64_t deltaBytesRead;
|
||||
|
||||
void reset() { deltaBytesRead = 0; }
|
||||
|
||||
GranuleReadStats() { reset(); }
|
||||
};
|
||||
|
||||
struct GranuleMetadata : NonCopyable, ReferenceCounted<GranuleMetadata> {
|
||||
KeyRange keyRange;
|
||||
|
||||
|
@ -120,11 +129,74 @@ struct GranuleMetadata : NonCopyable, ReferenceCounted<GranuleMetadata> {
|
|||
|
||||
AssignBlobRangeRequest originalReq;
|
||||
|
||||
GranuleReadStats readStats;
|
||||
bool rdcCandidate;
|
||||
Promise<Void> runRDC;
|
||||
|
||||
void resume() {
|
||||
if (resumeSnapshot.canBeSet()) {
|
||||
resumeSnapshot.send(Void());
|
||||
}
|
||||
}
|
||||
|
||||
void resetReadStats() {
|
||||
rdcCandidate = false;
|
||||
readStats.reset();
|
||||
runRDC.reset();
|
||||
}
|
||||
|
||||
// determine eligibility (>1) and priority for re-snapshotting this granule
|
||||
double weightRDC() {
|
||||
// ratio of read amp to write amp that would be incurred by re-snapshotting now
|
||||
int64_t lastSnapshotSize = (files.snapshotFiles.empty()) ? 0 : files.snapshotFiles.back().length;
|
||||
int64_t minSnapshotSize = SERVER_KNOBS->BG_SNAPSHOT_FILE_TARGET_BYTES / 2;
|
||||
lastSnapshotSize = std::max(minSnapshotSize, lastSnapshotSize);
|
||||
|
||||
int64_t writeAmp = lastSnapshotSize + bufferedDeltaBytes + bytesInNewDeltaFiles;
|
||||
// read amp is deltaBytesRead. Read amp must be READ_FACTOR times larger than write amp
|
||||
return (1.0 * readStats.deltaBytesRead) / (writeAmp * SERVER_KNOBS->BG_RDC_READ_FACTOR);
|
||||
}
|
||||
|
||||
bool isEligibleRDC() {
|
||||
// granule should be reasonably read-hot to be eligible
|
||||
int64_t bytesWritten = bufferedDeltaBytes + bytesInNewDeltaFiles;
|
||||
return bytesWritten * SERVER_KNOBS->BG_RDC_READ_FACTOR < readStats.deltaBytesRead;
|
||||
}
|
||||
|
||||
bool updateReadStats(Version readVersion, const BlobGranuleChunkRef& chunk) {
|
||||
// Only update stats for re-compacting for at-latest reads that have to do snapshot + delta merge
|
||||
if (!SERVER_KNOBS->BG_ENABLE_READ_DRIVEN_COMPACTION || !chunk.snapshotFile.present() ||
|
||||
pendingSnapshotVersion != durableSnapshotVersion.get() || readVersion <= pendingSnapshotVersion) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (chunk.newDeltas.empty() && chunk.deltaFiles.empty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
readStats.deltaBytesRead += chunk.newDeltas.expectedSize();
|
||||
for (auto& it : chunk.deltaFiles) {
|
||||
readStats.deltaBytesRead += it.length;
|
||||
}
|
||||
|
||||
if (rdcCandidate) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (isEligibleRDC() && weightRDC() > 1.0) {
|
||||
rdcCandidate = true;
|
||||
CODE_PROBE(true, "Granule read triggering read-driven compaction");
|
||||
if (BW_DEBUG) {
|
||||
fmt::print("Triggering read-driven compaction of [{0} - {1})\n",
|
||||
keyRange.begin.printable(),
|
||||
keyRange.end.printable());
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
inline bool doReadDrivenCompaction() { return runRDC.isSet(); }
|
||||
};
|
||||
|
||||
struct GranuleRangeMetadata {
|
||||
|
@ -200,6 +272,7 @@ struct BlobWorkerData : NonCopyable, ReferenceCounted<BlobWorkerData> {
|
|||
NotifiedVersion grvVersion;
|
||||
Promise<Void> fatalError;
|
||||
Promise<Void> simInjectFailure;
|
||||
Promise<Void> doReadDrivenCompaction;
|
||||
|
||||
Reference<FlowLock> initialSnapshotLock;
|
||||
Reference<FlowLock> resnapshotLock;
|
||||
|
@ -293,6 +366,13 @@ struct BlobWorkerData : NonCopyable, ReferenceCounted<BlobWorkerData> {
|
|||
return stats.estimatedMaxResidentMemory >= memoryFullThreshold;
|
||||
}
|
||||
|
||||
void triggerReadDrivenCompaction() {
|
||||
Promise<Void> doRDC = doReadDrivenCompaction;
|
||||
if (doRDC.canBeSet()) {
|
||||
doRDC.send(Void());
|
||||
}
|
||||
}
|
||||
|
||||
bool maybeInjectTargetedRestart() {
|
||||
// inject a BW restart at most once per test
|
||||
if (g_network->isSimulated() && !g_simulator->speedUpSimulation &&
|
||||
|
@ -1107,7 +1187,6 @@ ACTOR Future<BlobFileIndex> dumpInitialSnapshotFromFDB(Reference<BlobWorkerData>
|
|||
}
|
||||
retries++;
|
||||
CODE_PROBE(true, "Granule initial snapshot failed");
|
||||
// FIXME: why can't we supress error event?
|
||||
TraceEvent(retries < 10 ? SevDebug : SevWarn, "BlobGranuleInitialSnapshotRetry", bwData->id)
|
||||
.error(err)
|
||||
.detail("Granule", metadata->keyRange)
|
||||
|
@ -2043,6 +2122,7 @@ ACTOR Future<Void> blobGranuleUpdateFiles(Reference<BlobWorkerData> bwData,
|
|||
metadata->pendingDeltaVersion = startVersion;
|
||||
metadata->bufferedDeltaVersion = startVersion;
|
||||
metadata->knownCommittedVersion = startVersion;
|
||||
metadata->resetReadStats();
|
||||
|
||||
Reference<ChangeFeedData> cfData = makeReference<ChangeFeedData>(bwData->db.getPtr());
|
||||
|
||||
|
@ -2185,6 +2265,10 @@ ACTOR Future<Void> blobGranuleUpdateFiles(Reference<BlobWorkerData> bwData,
|
|||
}
|
||||
nextForceFlush = metadata->forceFlushVersion.whenAtLeast(lastForceFlushVersion + 1);
|
||||
}
|
||||
when(wait(metadata->runRDC.getFuture())) {
|
||||
// return control flow back to the triggering actor before continuing
|
||||
wait(delay(0));
|
||||
}
|
||||
}
|
||||
} catch (Error& e) {
|
||||
// only error we should expect here is when we finish consuming old change feed
|
||||
|
@ -2311,6 +2395,7 @@ ACTOR Future<Void> blobGranuleUpdateFiles(Reference<BlobWorkerData> bwData,
|
|||
startState.granuleID,
|
||||
inFlightFiles.empty() ? Future<Void>(Void())
|
||||
: success(inFlightFiles.back().future));
|
||||
metadata->resetReadStats();
|
||||
}
|
||||
// reset force flush state, requests should retry and add it back once feed is ready
|
||||
forceFlushVersions.clear();
|
||||
|
@ -2419,20 +2504,20 @@ ACTOR Future<Void> blobGranuleUpdateFiles(Reference<BlobWorkerData> bwData,
|
|||
// The force flush contract is a version cannot be put in forceFlushVersion unless the change feed
|
||||
// is already whenAtLeast that version
|
||||
bool forceFlush = !forceFlushVersions.empty() && forceFlushVersions.back() > metadata->pendingDeltaVersion;
|
||||
bool doReadDrivenFlush = !metadata->currentDeltas.empty() && metadata->doReadDrivenCompaction();
|
||||
CODE_PROBE(forceFlush, "Force flushing granule");
|
||||
if (metadata->bufferedDeltaBytes >= SERVER_KNOBS->BG_DELTA_FILE_TARGET_BYTES || forceFlush) {
|
||||
if (metadata->bufferedDeltaBytes >= SERVER_KNOBS->BG_DELTA_FILE_TARGET_BYTES || forceFlush ||
|
||||
doReadDrivenFlush) {
|
||||
TraceEvent(SevDebug, "BlobGranuleDeltaFile", bwData->id)
|
||||
.detail("Granule", metadata->keyRange)
|
||||
.detail("Version", lastDeltaVersion);
|
||||
|
||||
// sanity check for version order
|
||||
|
||||
if (forceFlush) {
|
||||
if (forceFlush || doReadDrivenFlush) {
|
||||
if (lastDeltaVersion == invalidVersion) {
|
||||
lastDeltaVersion = metadata->currentDeltas.empty() ? metadata->pendingDeltaVersion
|
||||
: metadata->currentDeltas.back().version;
|
||||
lastDeltaVersion = metadata->bufferedDeltaVersion;
|
||||
}
|
||||
if (lastDeltaVersion < forceFlushVersions.back()) {
|
||||
if (!forceFlushVersions.empty() && lastDeltaVersion < forceFlushVersions.back()) {
|
||||
if (BW_DEBUG) {
|
||||
fmt::print("Granule [{0} - {1}) force flushing delta version {2} -> {3}\n",
|
||||
metadata->keyRange.begin.printable(),
|
||||
|
@ -2444,13 +2529,6 @@ ACTOR Future<Void> blobGranuleUpdateFiles(Reference<BlobWorkerData> bwData,
|
|||
}
|
||||
}
|
||||
if (!metadata->currentDeltas.empty()) {
|
||||
if (lastDeltaVersion < metadata->currentDeltas.back().version) {
|
||||
fmt::print("Granule [{0} - {1}) LDV {2} < DeltaBack {3}\n",
|
||||
metadata->keyRange.begin.printable(),
|
||||
metadata->keyRange.end.printable(),
|
||||
lastDeltaVersion,
|
||||
metadata->currentDeltas.back().version);
|
||||
}
|
||||
ASSERT(lastDeltaVersion >= metadata->currentDeltas.back().version);
|
||||
ASSERT(metadata->pendingDeltaVersion < metadata->currentDeltas.front().version);
|
||||
} else {
|
||||
|
@ -2507,6 +2585,7 @@ ACTOR Future<Void> blobGranuleUpdateFiles(Reference<BlobWorkerData> bwData,
|
|||
// add new pending delta file
|
||||
ASSERT(metadata->pendingDeltaVersion < lastDeltaVersion);
|
||||
metadata->pendingDeltaVersion = lastDeltaVersion;
|
||||
ASSERT(metadata->bufferedDeltaVersion <= lastDeltaVersion);
|
||||
metadata->bufferedDeltaVersion = lastDeltaVersion; // In case flush was forced at non-mutation version
|
||||
metadata->bytesInNewDeltaFiles += metadata->bufferedDeltaBytes;
|
||||
|
||||
|
@ -2528,6 +2607,9 @@ ACTOR Future<Void> blobGranuleUpdateFiles(Reference<BlobWorkerData> bwData,
|
|||
// Wait on delta file starting here. If we have too many pending delta file writes, we need to not
|
||||
// continue to consume from the change feed, as that will pile on even more delta files to write
|
||||
wait(startDeltaFileWrite);
|
||||
} else if (metadata->doReadDrivenCompaction()) {
|
||||
ASSERT(metadata->currentDeltas.empty());
|
||||
snapshotEligible = true;
|
||||
}
|
||||
|
||||
// FIXME: if we're still reading from old change feed, we should probably compact if we're
|
||||
|
@ -2535,7 +2617,8 @@ ACTOR Future<Void> blobGranuleUpdateFiles(Reference<BlobWorkerData> bwData,
|
|||
// yet
|
||||
|
||||
// If we have enough delta files, try to re-snapshot
|
||||
if (snapshotEligible && metadata->bytesInNewDeltaFiles >= SERVER_KNOBS->BG_DELTA_BYTES_BEFORE_COMPACT) {
|
||||
if (snapshotEligible && (metadata->doReadDrivenCompaction() ||
|
||||
metadata->bytesInNewDeltaFiles >= SERVER_KNOBS->BG_DELTA_BYTES_BEFORE_COMPACT)) {
|
||||
if (BW_DEBUG && !inFlightFiles.empty()) {
|
||||
fmt::print("Granule [{0} - {1}) ready to re-snapshot at {2} after {3} > {4} bytes, "
|
||||
"waiting for "
|
||||
|
@ -2583,6 +2666,7 @@ ACTOR Future<Void> blobGranuleUpdateFiles(Reference<BlobWorkerData> bwData,
|
|||
|
||||
// reset metadata
|
||||
metadata->bytesInNewDeltaFiles = 0;
|
||||
metadata->resetReadStats();
|
||||
|
||||
// If we have more than one snapshot file and that file is unblocked (committedVersion >=
|
||||
// snapshotVersion), wait for it to finish
|
||||
|
@ -3740,6 +3824,11 @@ ACTOR Future<Void> doBlobGranuleFileRequest(Reference<BlobWorkerData> bwData, Bl
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
// don't update read stats on a summarize read
|
||||
if (metadata->updateReadStats(req.readVersion, chunk)) {
|
||||
bwData->triggerReadDrivenCompaction();
|
||||
}
|
||||
}
|
||||
|
||||
rep.chunks.push_back(rep.arena, chunk);
|
||||
|
@ -3961,7 +4050,7 @@ ACTOR Future<GranuleStartState> openGranule(Reference<BlobWorkerData> bwData, As
|
|||
}
|
||||
}
|
||||
|
||||
if (createChangeFeed) {
|
||||
if (createChangeFeed && !isFullRestoreMode()) {
|
||||
// create new change feed for new version of granule
|
||||
wait(updateChangeFeed(
|
||||
&tr, granuleIDToCFKey(info.granuleID), ChangeFeedStatus::CHANGE_FEED_CREATE, req.keyRange));
|
||||
|
@ -4554,6 +4643,74 @@ ACTOR Future<Void> runGRVChecks(Reference<BlobWorkerData> bwData) {
|
|||
}
|
||||
}
|
||||
|
||||
struct RDCEntry {
|
||||
double weight;
|
||||
Reference<GranuleMetadata> granule;
|
||||
RDCEntry(double weight, Reference<GranuleMetadata> granule) : weight(weight), granule(granule) {}
|
||||
};
|
||||
|
||||
// for a top-k algorithm, we actually want a min-heap, so reverse the sort order
|
||||
struct OrderForTopK {
|
||||
bool operator()(RDCEntry const& a, RDCEntry const& b) const { return b.weight - a.weight; }
|
||||
};
|
||||
|
||||
typedef std::priority_queue<RDCEntry, std::vector<RDCEntry>, OrderForTopK> TopKPQ;
|
||||
|
||||
ACTOR Future<Void> runReadDrivenCompaction(Reference<BlobWorkerData> bwData) {
|
||||
state bool processedAll = true;
|
||||
loop {
|
||||
if (processedAll) {
|
||||
wait(bwData->doReadDrivenCompaction.getFuture());
|
||||
bwData->doReadDrivenCompaction.reset();
|
||||
wait(delay(0));
|
||||
}
|
||||
|
||||
TopKPQ topK;
|
||||
|
||||
// FIXME: possible to scan candidates instead of all granules?
|
||||
int candidates = 0;
|
||||
auto allRanges = bwData->granuleMetadata.intersectingRanges(normalKeys);
|
||||
for (auto& it : allRanges) {
|
||||
if (it.value().activeMetadata.isValid() && it.value().activeMetadata->cancelled.canBeSet()) {
|
||||
auto metadata = it.value().activeMetadata;
|
||||
if (metadata->rdcCandidate && metadata->isEligibleRDC() && metadata->runRDC.canBeSet() &&
|
||||
metadata->pendingSnapshotVersion == metadata->durableSnapshotVersion.get()) {
|
||||
candidates++;
|
||||
double weight = metadata->weightRDC();
|
||||
if (weight > 1.0 &&
|
||||
(topK.size() < SERVER_KNOBS->BLOB_WORKER_RDC_PARALLELISM || weight > topK.top().weight)) {
|
||||
if (topK.size() == SERVER_KNOBS->BLOB_WORKER_RDC_PARALLELISM) {
|
||||
topK.pop();
|
||||
}
|
||||
topK.push(RDCEntry(weight, metadata));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
CODE_PROBE(candidates > topK.size(), "Too many read-driven compaction candidates for one cycle");
|
||||
|
||||
std::vector<Future<Void>> futures;
|
||||
futures.reserve(topK.size());
|
||||
while (!topK.empty()) {
|
||||
++bwData->stats.readDrivenCompactions;
|
||||
Promise<Void> runRDC = topK.top().granule->runRDC;
|
||||
ASSERT(runRDC.canBeSet());
|
||||
Future<Void> waitForSnapshotComplete = topK.top().granule->durableSnapshotVersion.whenAtLeast(
|
||||
topK.top().granule->durableSnapshotVersion.get() + 1) ||
|
||||
topK.top().granule->cancelled.getFuture();
|
||||
futures.push_back(waitForSnapshotComplete);
|
||||
topK.pop();
|
||||
runRDC.send(Void());
|
||||
}
|
||||
processedAll = futures.empty();
|
||||
if (!futures.empty()) {
|
||||
// wait at least one second to throttle this actor a bit
|
||||
wait(waitForAll(futures) && delay(1.0));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// FIXME: better way to do this?
|
||||
// monitor system keyspace for new tenants
|
||||
ACTOR Future<Void> monitorTenants(Reference<BlobWorkerData> bwData) {
|
||||
|
@ -4891,6 +5048,7 @@ ACTOR Future<Void> blobWorker(BlobWorkerInterface bwInterf,
|
|||
self->addActor.send(waitFailureServer(bwInterf.waitFailure.getFuture()));
|
||||
self->addActor.send(runGRVChecks(self));
|
||||
self->addActor.send(monitorTenants(self));
|
||||
self->addActor.send(runReadDrivenCompaction(self));
|
||||
state Future<Void> selfRemoved = monitorRemoval(self);
|
||||
if (g_network->isSimulated() && BUGGIFY_WITH_PROB(0.25)) {
|
||||
self->addActor.send(simForceFileWriteContention(self));
|
||||
|
@ -5024,13 +5182,22 @@ ACTOR Future<Void> blobWorker(BlobWorkerInterface bwInterf,
|
|||
ASSERT(false);
|
||||
throw internal_error();
|
||||
}
|
||||
when(wait(selfRemoved || self->simInjectFailure.getFuture())) {
|
||||
when(wait(selfRemoved)) {
|
||||
if (BW_DEBUG) {
|
||||
printf("Blob worker detected removal. Exiting...\n");
|
||||
}
|
||||
TraceEvent("BlobWorkerRemoved", self->id);
|
||||
break;
|
||||
}
|
||||
when(wait(self->simInjectFailure.getFuture())) {
|
||||
// wait to let triggering actor finish to prevent weird shutdown races
|
||||
wait(delay(0));
|
||||
if (BW_DEBUG) {
|
||||
printf("Blob worker simulation injected failure. Exiting...\n");
|
||||
}
|
||||
TraceEvent("BlobWorkerSimRemoved", self->id);
|
||||
break;
|
||||
}
|
||||
when(wait(self->fatalError.getFuture())) {
|
||||
TraceEvent(SevError, "BlobWorkerActorCollectionFatalErrorNotError", self->id);
|
||||
ASSERT(false);
|
||||
|
|
|
@ -2615,8 +2615,9 @@ ACTOR Future<Void> monitorBlobMigrator(ClusterControllerData* self) {
|
|||
}
|
||||
loop {
|
||||
if (self->db.serverInfo->get().blobMigrator.present() && !self->recruitBlobMigrator.get()) {
|
||||
state Future<Void> wfClient = waitFailureClient(self->db.serverInfo->get().blobMigrator.get().waitFailure,
|
||||
SERVER_KNOBS->BLOB_MIGRATOR_FAILURE_TIME);
|
||||
state Future<Void> wfClient =
|
||||
waitFailureClient(self->db.serverInfo->get().blobMigrator.get().ssi.waitFailure,
|
||||
SERVER_KNOBS->BLOB_MIGRATOR_FAILURE_TIME);
|
||||
loop {
|
||||
choose {
|
||||
when(wait(wfClient)) {
|
||||
|
@ -3006,11 +3007,10 @@ ACTOR Future<Void> clusterControllerCore(ClusterControllerFullInterface interf,
|
|||
self.addActor.send(monitorConsistencyScan(&self));
|
||||
self.addActor.send(metaclusterMetricsUpdater(&self));
|
||||
self.addActor.send(dbInfoUpdater(&self));
|
||||
self.addActor.send(traceCounters("ClusterControllerMetrics",
|
||||
self.id,
|
||||
SERVER_KNOBS->STORAGE_LOGGING_DELAY,
|
||||
&self.clusterControllerMetrics,
|
||||
self.id.toString() + "/ClusterControllerMetrics"));
|
||||
self.addActor.send(self.clusterControllerMetrics.traceCounters("ClusterControllerMetrics",
|
||||
self.id,
|
||||
SERVER_KNOBS->STORAGE_LOGGING_DELAY,
|
||||
self.id.toString() + "/ClusterControllerMetrics"));
|
||||
self.addActor.send(traceRole(Role::CLUSTER_CONTROLLER, interf.id()));
|
||||
// printf("%s: I am the cluster controller\n", g_network->getLocalAddress().toString().c_str());
|
||||
|
||||
|
|
|
@ -183,8 +183,8 @@ class ConfigBroadcasterImpl {
|
|||
id(deterministicRandom()->randomUniqueID()), cc("ConfigBroadcaster"), compactRequest("CompactRequest", cc),
|
||||
successfulChangeRequest("SuccessfulChangeRequest", cc), failedChangeRequest("FailedChangeRequest", cc),
|
||||
snapshotRequest("SnapshotRequest", cc) {
|
||||
logger = traceCounters(
|
||||
"ConfigBroadcasterMetrics", id, SERVER_KNOBS->WORKER_LOGGING_INTERVAL, &cc, "ConfigBroadcasterMetrics");
|
||||
logger = cc.traceCounters(
|
||||
"ConfigBroadcasterMetrics", id, SERVER_KNOBS->WORKER_LOGGING_INTERVAL, "ConfigBroadcasterMetrics");
|
||||
}
|
||||
|
||||
void addChanges(Standalone<VectorRef<VersionedConfigMutationRef>> const& changes,
|
||||
|
|
|
@ -812,7 +812,7 @@ public:
|
|||
successfulCommits("SuccessfulCommits", cc), failedCommits("FailedCommits", cc),
|
||||
setMutations("SetMutations", cc), clearMutations("ClearMutations", cc),
|
||||
getValueRequests("GetValueRequests", cc), getGenerationRequests("GetGenerationRequests", cc) {
|
||||
logger = traceCounters("ConfigNodeMetrics", id, SERVER_KNOBS->WORKER_LOGGING_INTERVAL, &cc, "ConfigNode");
|
||||
logger = cc.traceCounters("ConfigNodeMetrics", id, SERVER_KNOBS->WORKER_LOGGING_INTERVAL, "ConfigNode");
|
||||
TraceEvent(SevInfo, "StartingConfigNode", id).detail("KVStoreAlreadyExists", kvStore.exists());
|
||||
}
|
||||
|
||||
|
|
|
@ -29,7 +29,7 @@
|
|||
#include "fdbclient/ReadYourWrites.h"
|
||||
#include "fdbclient/TagThrottle.actor.h"
|
||||
#include "fdbserver/Knobs.h"
|
||||
#include "fdbserver/StorageMetrics.h"
|
||||
#include "fdbserver/StorageMetrics.actor.h"
|
||||
#include "fdbserver/DataDistribution.actor.h"
|
||||
#include "fdbserver/RatekeeperInterface.h"
|
||||
#include "fdbserver/ServerDBInfo.h"
|
||||
|
@ -393,6 +393,7 @@ ACTOR Future<bool> checkDataConsistency(Database cx,
|
|||
state double rateLimiterStartTime = now();
|
||||
state int64_t bytesReadInthisRound = 0;
|
||||
state bool resume = !(restart || shuffleShards);
|
||||
state bool testResult = true;
|
||||
|
||||
state double dbSize = 100e12;
|
||||
if (g_network->isSimulated()) {
|
||||
|
@ -710,7 +711,7 @@ ACTOR Future<bool> checkDataConsistency(Database cx,
|
|||
(!storageServerInterfaces[j].isTss() &&
|
||||
!storageServerInterfaces[firstValidServer].isTss())) {
|
||||
testFailure("Data inconsistent", performQuiescentChecks, true);
|
||||
return false;
|
||||
testResult = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -949,7 +950,7 @@ ACTOR Future<bool> checkDataConsistency(Database cx,
|
|||
}
|
||||
|
||||
*bytesReadInPrevRound = bytesReadInthisRound;
|
||||
return true;
|
||||
return testResult;
|
||||
}
|
||||
|
||||
ACTOR Future<Void> runDataValidationCheck(ConsistencyScanData* self) {
|
||||
|
|
|
@ -689,6 +689,17 @@ struct DDQueue : public IDDRelocationQueue {
|
|||
|
||||
int moveReusePhysicalShard;
|
||||
int moveCreateNewPhysicalShard;
|
||||
enum RetryFindDstReason {
|
||||
None = 0,
|
||||
RemoteBestTeamNotReady,
|
||||
PrimaryNoHealthyTeam,
|
||||
RemoteNoHealthyTeam,
|
||||
RemoteTeamIsFull,
|
||||
RemoteTeamIsNotHealthy,
|
||||
NoAvailablePhysicalShard,
|
||||
NumberOfTypes,
|
||||
};
|
||||
std::vector<int> retryFindDstReasonCount;
|
||||
|
||||
void startRelocation(int priority, int healthPriority) {
|
||||
// Although PRIORITY_TEAM_REDUNDANT has lower priority than split and merge shard movement,
|
||||
|
@ -754,7 +765,8 @@ struct DDQueue : public IDDRelocationQueue {
|
|||
suppressIntervals(0), rawProcessingUnhealthy(new AsyncVar<bool>(false)),
|
||||
rawProcessingWiggle(new AsyncVar<bool>(false)), unhealthyRelocations(0),
|
||||
movedKeyServersEventHolder(makeReference<EventCacheHolder>("MovedKeyServers")), moveReusePhysicalShard(0),
|
||||
moveCreateNewPhysicalShard(0) {}
|
||||
moveCreateNewPhysicalShard(0), retryFindDstReasonCount(static_cast<int>(RetryFindDstReason::NumberOfTypes), 0) {
|
||||
}
|
||||
DDQueue() = default;
|
||||
|
||||
void validate() {
|
||||
|
@ -1467,6 +1479,7 @@ ACTOR Future<Void> dataDistributionRelocator(DDQueue* self,
|
|||
loop {
|
||||
destOverloadedCount = 0;
|
||||
stuckCount = 0;
|
||||
state DDQueue::RetryFindDstReason retryFindDstReason = DDQueue::RetryFindDstReason::None;
|
||||
// state int bestTeamStuckThreshold = 50;
|
||||
loop {
|
||||
state int tciIndex = 0;
|
||||
|
@ -1493,10 +1506,13 @@ ACTOR Future<Void> dataDistributionRelocator(DDQueue* self,
|
|||
.detail("TeamCollectionIndex", tciIndex)
|
||||
.detail("RestoreDataMoveForDest",
|
||||
describe(tciIndex == 0 ? rd.dataMove->primaryDest : rd.dataMove->remoteDest));
|
||||
retryFindDstReason = DDQueue::RetryFindDstReason::RemoteBestTeamNotReady;
|
||||
foundTeams = false;
|
||||
break;
|
||||
}
|
||||
if (!bestTeam.first.present() || !bestTeam.first.get()->isHealthy()) {
|
||||
retryFindDstReason = tciIndex == 0 ? DDQueue::RetryFindDstReason::PrimaryNoHealthyTeam
|
||||
: DDQueue::RetryFindDstReason::RemoteNoHealthyTeam;
|
||||
foundTeams = false;
|
||||
break;
|
||||
}
|
||||
|
@ -1549,12 +1565,15 @@ ACTOR Future<Void> dataDistributionRelocator(DDQueue* self,
|
|||
// getting the destination team or we could miss failure notifications for the storage
|
||||
// servers in the destination team
|
||||
TraceEvent("BestTeamNotReady");
|
||||
retryFindDstReason = DDQueue::RetryFindDstReason::RemoteBestTeamNotReady;
|
||||
foundTeams = false;
|
||||
break;
|
||||
}
|
||||
// If a DC has no healthy team, we stop checking the other DCs until
|
||||
// the unhealthy DC is healthy again or is excluded.
|
||||
if (!bestTeam.first.present()) {
|
||||
retryFindDstReason = tciIndex == 0 ? DDQueue::RetryFindDstReason::PrimaryNoHealthyTeam
|
||||
: DDQueue::RetryFindDstReason::RemoteNoHealthyTeam;
|
||||
foundTeams = false;
|
||||
break;
|
||||
}
|
||||
|
@ -1578,6 +1597,7 @@ ACTOR Future<Void> dataDistributionRelocator(DDQueue* self,
|
|||
if (tciIndex == 1 && !forceToUseNewPhysicalShard) {
|
||||
bool minAvailableSpaceRatio = bestTeam.first.get()->getMinAvailableSpaceRatio(true);
|
||||
if (minAvailableSpaceRatio < SERVER_KNOBS->TARGET_AVAILABLE_SPACE_RATIO) {
|
||||
retryFindDstReason = DDQueue::RetryFindDstReason::RemoteTeamIsFull;
|
||||
foundTeams = false;
|
||||
break;
|
||||
}
|
||||
|
@ -1620,6 +1640,7 @@ ACTOR Future<Void> dataDistributionRelocator(DDQueue* self,
|
|||
if (SERVER_KNOBS->SHARD_ENCODE_LOCATION_METADATA && SERVER_KNOBS->ENABLE_DD_PHYSICAL_SHARD &&
|
||||
bestTeams.size() > 1 && !forceToUseNewPhysicalShard) {
|
||||
if (!bestTeams[1].first->isHealthy()) {
|
||||
retryFindDstReason = DDQueue::RetryFindDstReason::RemoteTeamIsNotHealthy;
|
||||
foundTeams = false;
|
||||
}
|
||||
}
|
||||
|
@ -1684,6 +1705,14 @@ ACTOR Future<Void> dataDistributionRelocator(DDQueue* self,
|
|||
self->moveReusePhysicalShard++;
|
||||
} else {
|
||||
self->moveCreateNewPhysicalShard++;
|
||||
if (retryFindDstReason == DDQueue::RetryFindDstReason::None) {
|
||||
// When creating a new physical shard, but the reason is none, this can only happen when
|
||||
// determinePhysicalShardIDGivenPrimaryTeam() finds that there is no available physical
|
||||
// shard.
|
||||
self->retryFindDstReasonCount[DDQueue::RetryFindDstReason::NoAvailablePhysicalShard]++;
|
||||
} else {
|
||||
self->retryFindDstReasonCount[retryFindDstReason]++;
|
||||
}
|
||||
}
|
||||
rd.dataMoveId = newShardId(physicalShardIDCandidate, AssignEmptyRange::False);
|
||||
auto inFlightRange = self->inFlight.rangeContaining(rd.keys.begin);
|
||||
|
@ -2485,9 +2514,25 @@ ACTOR Future<Void> dataDistributionQueue(Reference<IDDTxnProcessor> db,
|
|||
if (SERVER_KNOBS->SHARD_ENCODE_LOCATION_METADATA && SERVER_KNOBS->ENABLE_DD_PHYSICAL_SHARD) {
|
||||
TraceEvent("PhysicalShardMoveStats")
|
||||
.detail("MoveCreateNewPhysicalShard", self.moveCreateNewPhysicalShard)
|
||||
.detail("MoveReusePhysicalShard", self.moveReusePhysicalShard);
|
||||
.detail("MoveReusePhysicalShard", self.moveReusePhysicalShard)
|
||||
.detail("RemoteBestTeamNotReady",
|
||||
self.retryFindDstReasonCount[DDQueue::RetryFindDstReason::RemoteBestTeamNotReady])
|
||||
.detail("PrimaryNoHealthyTeam",
|
||||
self.retryFindDstReasonCount[DDQueue::RetryFindDstReason::PrimaryNoHealthyTeam])
|
||||
.detail("RemoteNoHealthyTeam",
|
||||
self.retryFindDstReasonCount[DDQueue::RetryFindDstReason::RemoteNoHealthyTeam])
|
||||
.detail("RemoteTeamIsFull",
|
||||
self.retryFindDstReasonCount[DDQueue::RetryFindDstReason::RemoteTeamIsFull])
|
||||
.detail("RemoteTeamIsNotHealthy",
|
||||
self.retryFindDstReasonCount[DDQueue::RetryFindDstReason::RemoteTeamIsNotHealthy])
|
||||
.detail(
|
||||
"NoAvailablePhysicalShard",
|
||||
self.retryFindDstReasonCount[DDQueue::RetryFindDstReason::NoAvailablePhysicalShard]);
|
||||
self.moveCreateNewPhysicalShard = 0;
|
||||
self.moveReusePhysicalShard = 0;
|
||||
for (int i = 0; i < self.retryFindDstReasonCount.size(); ++i) {
|
||||
self.retryFindDstReasonCount[i] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
when(wait(self.error.getFuture())) {} // Propagate errors from dataDistributionRelocator
|
||||
|
|
|
@ -212,7 +212,7 @@ ShardSizeBounds calculateShardSizeBounds(const KeyRange& keys,
|
|||
const Reference<AsyncVar<Optional<ShardMetrics>>>& shardMetrics,
|
||||
const BandwidthStatus& bandwidthStatus,
|
||||
PromiseStream<KeyRange> readHotShard) {
|
||||
ShardSizeBounds bounds;
|
||||
ShardSizeBounds bounds = ShardSizeBounds::shardSizeBoundsBeforeTrack();
|
||||
if (shardMetrics->get().present()) {
|
||||
auto bytes = shardMetrics->get().get().metrics.bytes;
|
||||
auto readBandwidthStatus = getReadBandwidthStatus(shardMetrics->get().get().metrics);
|
||||
|
@ -259,21 +259,7 @@ ShardSizeBounds calculateShardSizeBounds(const KeyRange& keys,
|
|||
} else {
|
||||
ASSERT(false);
|
||||
}
|
||||
} else {
|
||||
bounds.max.bytes = -1;
|
||||
bounds.min.bytes = -1;
|
||||
bounds.permittedError.bytes = -1;
|
||||
bounds.max.bytesPerKSecond = bounds.max.infinity;
|
||||
bounds.min.bytesPerKSecond = 0;
|
||||
bounds.permittedError.bytesPerKSecond = bounds.permittedError.infinity;
|
||||
bounds.max.bytesReadPerKSecond = bounds.max.infinity;
|
||||
bounds.min.bytesReadPerKSecond = 0;
|
||||
bounds.permittedError.bytesReadPerKSecond = bounds.permittedError.infinity;
|
||||
}
|
||||
|
||||
bounds.max.iosPerKSecond = bounds.max.infinity;
|
||||
bounds.min.iosPerKSecond = 0;
|
||||
bounds.permittedError.iosPerKSecond = bounds.permittedError.infinity;
|
||||
return bounds;
|
||||
}
|
||||
|
||||
|
|
|
@ -895,7 +895,7 @@ public:
|
|||
if (maxPriority < SERVER_KNOBS->PRIORITY_TEAM_FAILED) {
|
||||
std::pair<std::vector<ShardsAffectedByTeamFailure::Team>,
|
||||
std::vector<ShardsAffectedByTeamFailure::Team>>
|
||||
teams = self->shardsAffectedByTeamFailure->getTeamsFor(shards[i]);
|
||||
teams = self->shardsAffectedByTeamFailure->getTeamsForFirstShard(shards[i]);
|
||||
for (int j = 0; j < teams.first.size() + teams.second.size(); j++) {
|
||||
// t is the team in primary DC or the remote DC
|
||||
auto& t =
|
||||
|
|
|
@ -763,7 +763,7 @@ std::vector<DDShardInfo> DDMockTxnProcessor::getDDShardInfos() const {
|
|||
KeyRangeRef curRange = it->range();
|
||||
DDShardInfo info(curRange.begin);
|
||||
|
||||
auto teams = mgs->shardMapping->getTeamsFor(curRange);
|
||||
auto teams = mgs->shardMapping->getTeamsForFirstShard(curRange);
|
||||
if (!teams.first.empty() && !teams.second.empty()) {
|
||||
CODE_PROBE(true, "Mock InitialDataDistribution In-Flight shard");
|
||||
info.hasDest = true;
|
||||
|
@ -816,7 +816,7 @@ Future<Void> DDMockTxnProcessor::removeStorageServer(const UID& serverID,
|
|||
const Optional<UID>& tssPairID,
|
||||
const MoveKeysLock& lock,
|
||||
const DDEnabledState* ddEnabledState) const {
|
||||
ASSERT(mgs->allShardRemovedFromServer(serverID));
|
||||
ASSERT(mgs->allShardsRemovedFromServer(serverID));
|
||||
mgs->allServers.erase(serverID);
|
||||
return Void();
|
||||
}
|
||||
|
@ -862,16 +862,14 @@ Future<HealthMetrics> DDMockTxnProcessor::getHealthMetrics(bool detailed) const
|
|||
return Future<HealthMetrics>();
|
||||
}
|
||||
|
||||
// FIXME: finish implementation
|
||||
Future<Standalone<VectorRef<KeyRef>>> DDMockTxnProcessor::splitStorageMetrics(
|
||||
const KeyRange& keys,
|
||||
const StorageMetrics& limit,
|
||||
const StorageMetrics& estimated,
|
||||
const Optional<int>& minSplitBytes) const {
|
||||
return Future<Standalone<VectorRef<KeyRef>>>();
|
||||
return mgs->splitStorageMetrics(keys, limit, estimated, minSplitBytes);
|
||||
}
|
||||
|
||||
// FIXME: finish implementation
|
||||
Future<std::pair<Optional<StorageMetrics>, int>> DDMockTxnProcessor::waitStorageMetrics(
|
||||
const KeyRange& keys,
|
||||
const StorageMetrics& min,
|
||||
|
@ -879,7 +877,7 @@ Future<std::pair<Optional<StorageMetrics>, int>> DDMockTxnProcessor::waitStorage
|
|||
const StorageMetrics& permittedError,
|
||||
int shardLimit,
|
||||
int expectedShardCount) const {
|
||||
return Future<std::pair<Optional<StorageMetrics>, int>>();
|
||||
return mgs->waitStorageMetrics(keys, min, max, permittedError, shardLimit, expectedShardCount);
|
||||
}
|
||||
|
||||
// FIXME: finish implementation
|
||||
|
@ -910,7 +908,7 @@ void DDMockTxnProcessor::rawFinishMovement(MoveKeysParams& params,
|
|||
ASSERT(params.finishMoveKeysParallelismLock->take().isReady());
|
||||
|
||||
// get source and dest teams
|
||||
auto [destTeams, srcTeams] = mgs->shardMapping->getTeamsFor(params.keys);
|
||||
auto [destTeams, srcTeams] = mgs->shardMapping->getTeamsForFirstShard(params.keys);
|
||||
|
||||
ASSERT_EQ(destTeams.size(), 0);
|
||||
if (destTeams.front() != ShardsAffectedByTeamFailure::Team{ params.destinationTeam, true }) {
|
||||
|
|
|
@ -53,6 +53,20 @@
|
|||
#include "fdbserver/DDSharedContext.h"
|
||||
#include "flow/actorcompiler.h" // This must be the last #include.
|
||||
|
||||
ShardSizeBounds ShardSizeBounds::shardSizeBoundsBeforeTrack() {
|
||||
return ShardSizeBounds{
|
||||
.max = StorageMetrics{ .bytes = -1,
|
||||
.bytesPerKSecond = StorageMetrics::infinity,
|
||||
.iosPerKSecond = StorageMetrics::infinity,
|
||||
.bytesReadPerKSecond = StorageMetrics::infinity },
|
||||
.min = StorageMetrics{ .bytes = -1, .bytesPerKSecond = 0, .iosPerKSecond = 0, .bytesReadPerKSecond = 0 },
|
||||
.permittedError = StorageMetrics{ .bytes = -1,
|
||||
.bytesPerKSecond = StorageMetrics::infinity,
|
||||
.iosPerKSecond = StorageMetrics::infinity,
|
||||
.bytesReadPerKSecond = StorageMetrics::infinity }
|
||||
};
|
||||
}
|
||||
|
||||
struct DDAudit {
|
||||
DDAudit(UID id, KeyRange range, AuditType type)
|
||||
: id(id), range(range), type(type), auditMap(AuditPhase::Invalid, allKeys.end), actors(true) {}
|
||||
|
@ -286,8 +300,6 @@ public:
|
|||
PromiseStream<RelocateShard> relocationProducer, relocationConsumer;
|
||||
Reference<PhysicalShardCollection> physicalShardCollection;
|
||||
|
||||
StorageQuotaInfo storageQuotaInfo;
|
||||
|
||||
Promise<Void> initialized;
|
||||
|
||||
std::unordered_map<AuditType, std::vector<std::shared_ptr<DDAudit>>> audits;
|
||||
|
@ -542,27 +554,6 @@ public:
|
|||
}
|
||||
};
|
||||
|
||||
ACTOR Future<Void> storageQuotaTracker(Database cx, StorageQuotaInfo* storageQuotaInfo) {
|
||||
loop {
|
||||
state Transaction tr(cx);
|
||||
loop {
|
||||
try {
|
||||
state RangeResult currentQuotas = wait(tr.getRange(storageQuotaKeys, CLIENT_KNOBS->TOO_MANY));
|
||||
TraceEvent("StorageQuota_ReadCurrentQuotas").detail("Size", currentQuotas.size());
|
||||
for (auto const kv : currentQuotas) {
|
||||
Key const key = kv.key.removePrefix(storageQuotaPrefix);
|
||||
uint64_t const quota = BinaryReader::fromStringRef<uint64_t>(kv.value, Unversioned());
|
||||
storageQuotaInfo->quotaMap[key] = quota;
|
||||
}
|
||||
wait(delay(5.0));
|
||||
break;
|
||||
} catch (Error& e) {
|
||||
wait(tr.onError(e));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Periodically check and log the physicalShard status; clean up empty physicalShard;
|
||||
ACTOR Future<Void> monitorPhysicalShardStatus(Reference<PhysicalShardCollection> self) {
|
||||
ASSERT(SERVER_KNOBS->SHARD_ENCODE_LOCATION_METADATA);
|
||||
|
@ -683,16 +674,15 @@ ACTOR Future<Void> dataDistribution(Reference<DataDistributor> self,
|
|||
self->ddId,
|
||||
&normalDDQueueErrors()));
|
||||
|
||||
actors.push_back(reportErrorsExcept(storageQuotaTracker(cx, &self->storageQuotaInfo),
|
||||
"StorageQuotaTracker",
|
||||
self->ddId,
|
||||
&normalDDQueueErrors()));
|
||||
|
||||
if (ddIsTenantAware) {
|
||||
actors.push_back(reportErrorsExcept(ddTenantCache.get()->monitorTenantMap(),
|
||||
"DDTenantCacheMonitor",
|
||||
self->ddId,
|
||||
&normalDDQueueErrors()));
|
||||
actors.push_back(reportErrorsExcept(ddTenantCache.get()->monitorStorageQuota(),
|
||||
"StorageQuotaTracker",
|
||||
self->ddId,
|
||||
&normalDDQueueErrors()));
|
||||
actors.push_back(reportErrorsExcept(ddTenantCache.get()->monitorStorageUsage(),
|
||||
"StorageUsageTracker",
|
||||
self->ddId,
|
||||
|
|
|
@ -202,7 +202,8 @@ class GlobalTagThrottlerImpl {
|
|||
for (const auto& [id, _] : throughput) {
|
||||
result += getCurrentCost(id, tag).orDefault(0);
|
||||
}
|
||||
TraceEvent("GlobalTagThrottler_GetCurrentCost").detail("Tag", printable(tag)).detail("Cost", result);
|
||||
// FIXME: Disabled due to noisy trace events. Fix the noise and reenabled
|
||||
//TraceEvent("GlobalTagThrottler_GetCurrentCost").detail("Tag", printable(tag)).detail("Cost", result);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
@ -235,10 +236,13 @@ class GlobalTagThrottlerImpl {
|
|||
return 1.0;
|
||||
}
|
||||
auto const transactionRate = stats.get().getTransactionRate();
|
||||
// FIXME: Disabled due to noisy trace events. Fix the noise and reenabled
|
||||
/*
|
||||
TraceEvent("GlobalTagThrottler_GetAverageTransactionCost")
|
||||
.detail("Tag", tag)
|
||||
.detail("TransactionRate", transactionRate)
|
||||
.detail("Cost", cost);
|
||||
*/
|
||||
if (transactionRate == 0.0) {
|
||||
return 1.0;
|
||||
} else {
|
||||
|
|
|
@ -154,7 +154,7 @@ struct GrvProxyStats {
|
|||
return int64_t(100 * this->percentageOfBatchGRVQueueProcessed);
|
||||
});
|
||||
|
||||
logger = traceCounters("GrvProxyMetrics", id, SERVER_KNOBS->WORKER_LOGGING_INTERVAL, &cc, "GrvProxyMetrics");
|
||||
logger = cc.traceCounters("GrvProxyMetrics", id, SERVER_KNOBS->WORKER_LOGGING_INTERVAL, "GrvProxyMetrics");
|
||||
for (int i = 0; i < FLOW_KNOBS->BASIC_LOAD_BALANCE_BUCKETS; i++) {
|
||||
requestBuckets.push_back(0);
|
||||
}
|
||||
|
@ -459,9 +459,9 @@ void dropRequestFromQueue(Deque<GetReadVersionRequest>* queue, GrvProxyStats* st
|
|||
|
||||
// Put a GetReadVersion request into the queue corresponding to its priority.
|
||||
ACTOR Future<Void> queueGetReadVersionRequests(Reference<AsyncVar<ServerDBInfo> const> db,
|
||||
SpannedDeque<GetReadVersionRequest>* systemQueue,
|
||||
SpannedDeque<GetReadVersionRequest>* defaultQueue,
|
||||
SpannedDeque<GetReadVersionRequest>* batchQueue,
|
||||
Deque<GetReadVersionRequest>* systemQueue,
|
||||
Deque<GetReadVersionRequest>* defaultQueue,
|
||||
Deque<GetReadVersionRequest>* batchQueue,
|
||||
FutureStream<GetReadVersionRequest> readVersionRequests,
|
||||
PromiseStream<Void> GRVTimer,
|
||||
double* lastGRVTime,
|
||||
|
@ -531,7 +531,6 @@ ACTOR Future<Void> queueGetReadVersionRequests(Reference<AsyncVar<ServerDBInfo>
|
|||
stats->txnSystemPriorityStartIn += req.transactionCount;
|
||||
++stats->systemGRVQueueSize;
|
||||
systemQueue->push_back(req);
|
||||
// systemQueue->span.addParent(req.spanContext);
|
||||
} else if (req.priority >= TransactionPriority::DEFAULT) {
|
||||
++stats->txnRequestIn;
|
||||
stats->txnStartIn += req.transactionCount;
|
||||
|
@ -542,7 +541,6 @@ ACTOR Future<Void> queueGetReadVersionRequests(Reference<AsyncVar<ServerDBInfo>
|
|||
} else {
|
||||
defaultQueue->push_back(req);
|
||||
}
|
||||
// defaultQueue->span.addParent(req.spanContext);
|
||||
} else {
|
||||
// Return error for batch_priority GRV requests
|
||||
int64_t proxiesCount = std::max((int)db->get().client.grvProxies.size(), 1);
|
||||
|
@ -559,7 +557,6 @@ ACTOR Future<Void> queueGetReadVersionRequests(Reference<AsyncVar<ServerDBInfo>
|
|||
} else {
|
||||
batchQueue->push_back(req);
|
||||
}
|
||||
// batchQueue->span.addParent(req.spanContext);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -607,7 +604,7 @@ ACTOR Future<Void> lastCommitUpdater(GrvProxyData* self, PromiseStream<Future<Vo
|
|||
}
|
||||
}
|
||||
|
||||
ACTOR Future<GetReadVersionReply> getLiveCommittedVersion(SpanContext parentSpan,
|
||||
ACTOR Future<GetReadVersionReply> getLiveCommittedVersion(std::vector<SpanContext> spanContexts,
|
||||
GrvProxyData* grvProxyData,
|
||||
uint32_t flags,
|
||||
Optional<UID> debugID,
|
||||
|
@ -620,7 +617,10 @@ ACTOR Future<GetReadVersionReply> getLiveCommittedVersion(SpanContext parentSpan
|
|||
// before the request returns, so it is committed. (2) No proxy on our list reported committed a higher version
|
||||
// before this request was received, because then its committedVersion would have been higher,
|
||||
// and no other proxy could have already committed anything without first ending the epoch
|
||||
state Span span("GP:getLiveCommittedVersion"_loc, parentSpan);
|
||||
state Span span("GP:getLiveCommittedVersion"_loc);
|
||||
for (const SpanContext& spanContext : spanContexts) {
|
||||
span.addLink(spanContext);
|
||||
}
|
||||
++grvProxyData->stats.txnStartBatch;
|
||||
|
||||
state double grvStart = now();
|
||||
|
@ -826,15 +826,14 @@ ACTOR static Future<Void> transactionStarter(GrvProxyInterface proxy,
|
|||
state GrvTransactionRateInfo batchRateInfo(0);
|
||||
state GrvProxyTransactionTagThrottler tagThrottler;
|
||||
|
||||
state SpannedDeque<GetReadVersionRequest> systemQueue("GP:transactionStarterSystemQueue"_loc);
|
||||
state SpannedDeque<GetReadVersionRequest> defaultQueue("GP:transactionStarterDefaultQueue"_loc);
|
||||
state SpannedDeque<GetReadVersionRequest> batchQueue("GP:transactionStarterBatchQueue"_loc);
|
||||
state Deque<GetReadVersionRequest> systemQueue;
|
||||
state Deque<GetReadVersionRequest> defaultQueue;
|
||||
state Deque<GetReadVersionRequest> batchQueue;
|
||||
|
||||
state TransactionTagMap<uint64_t> transactionTagCounter;
|
||||
state PrioritizedTransactionTagMap<ClientTagThrottleLimits> clientThrottledTags;
|
||||
|
||||
state PromiseStream<double> normalGRVLatency;
|
||||
// state Span span;
|
||||
|
||||
state int64_t midShardSize = SERVER_KNOBS->MIN_SHARD_BYTES;
|
||||
getCurrentLineage()->modify(&TransactionLineage::operation) =
|
||||
|
@ -911,7 +910,7 @@ ACTOR static Future<Void> transactionStarter(GrvProxyInterface proxy,
|
|||
uint32_t defaultQueueSize = defaultQueue.size();
|
||||
uint32_t batchQueueSize = batchQueue.size();
|
||||
while (requestsToStart < SERVER_KNOBS->START_TRANSACTION_MAX_REQUESTS_TO_START) {
|
||||
SpannedDeque<GetReadVersionRequest>* transactionQueue;
|
||||
Deque<GetReadVersionRequest>* transactionQueue;
|
||||
if (!systemQueue.empty()) {
|
||||
transactionQueue = &systemQueue;
|
||||
} else if (!defaultQueue.empty()) {
|
||||
|
@ -921,7 +920,6 @@ ACTOR static Future<Void> transactionStarter(GrvProxyInterface proxy,
|
|||
} else {
|
||||
break;
|
||||
}
|
||||
// transactionQueue->span.swap(span);
|
||||
|
||||
auto& req = transactionQueue->front();
|
||||
int tc = req.transactionCount;
|
||||
|
@ -1017,7 +1015,13 @@ ACTOR static Future<Void> transactionStarter(GrvProxyInterface proxy,
|
|||
int batchGRVProcessed = 0;
|
||||
for (int i = 0; i < start.size(); i++) {
|
||||
if (start[i].size()) {
|
||||
Future<GetReadVersionReply> readVersionReply = getLiveCommittedVersion(SpanContext(),
|
||||
std::vector<SpanContext> spanContexts;
|
||||
spanContexts.reserve(start[i].size());
|
||||
for (const GetReadVersionRequest& request : start[i]) {
|
||||
spanContexts.push_back(request.spanContext);
|
||||
}
|
||||
|
||||
Future<GetReadVersionReply> readVersionReply = getLiveCommittedVersion(spanContexts,
|
||||
grvProxyData,
|
||||
i,
|
||||
debugID,
|
||||
|
@ -1041,7 +1045,6 @@ ACTOR static Future<Void> transactionStarter(GrvProxyInterface proxy,
|
|||
batchGRVProcessed += batchPriTransactionsStarted[i];
|
||||
}
|
||||
}
|
||||
// span = Span(span.location);
|
||||
|
||||
grvProxyData->stats.percentageOfDefaultGRVQueueProcessed =
|
||||
defaultQueueSize ? (double)defaultGRVProcessed / defaultQueueSize : 1;
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "fdbclient/Knobs.h"
|
||||
#include "fdbserver/GrvProxyTransactionTagThrottler.h"
|
||||
#include "flow/UnitTest.h"
|
||||
#include "flow/actorcompiler.h" // must be last include
|
||||
|
@ -28,6 +29,10 @@ void GrvProxyTransactionTagThrottler::DelayedRequest::updateProxyTagThrottledDur
|
|||
req.proxyTagThrottledDuration = now() - startTime;
|
||||
}
|
||||
|
||||
bool GrvProxyTransactionTagThrottler::DelayedRequest::isMaxThrottled() const {
|
||||
return now() - startTime > CLIENT_KNOBS->PROXY_MAX_TAG_THROTTLE_DURATION;
|
||||
}
|
||||
|
||||
void GrvProxyTransactionTagThrottler::TagQueue::setRate(double rate) {
|
||||
if (rateInfo.present()) {
|
||||
rateInfo.get().setRate(rate);
|
||||
|
@ -36,6 +41,20 @@ void GrvProxyTransactionTagThrottler::TagQueue::setRate(double rate) {
|
|||
}
|
||||
}
|
||||
|
||||
bool GrvProxyTransactionTagThrottler::TagQueue::isMaxThrottled() const {
|
||||
return !requests.empty() && requests.front().isMaxThrottled();
|
||||
}
|
||||
|
||||
void GrvProxyTransactionTagThrottler::TagQueue::rejectRequests() {
|
||||
CODE_PROBE(true, "GrvProxyTransactionTagThrottler rejecting requests");
|
||||
while (!requests.empty()) {
|
||||
auto& delayedReq = requests.front();
|
||||
delayedReq.updateProxyTagThrottledDuration();
|
||||
delayedReq.req.reply.sendError(proxy_tag_throttled());
|
||||
requests.pop_front();
|
||||
}
|
||||
}
|
||||
|
||||
void GrvProxyTransactionTagThrottler::updateRates(TransactionTagMap<double> const& newRates) {
|
||||
for (const auto& [tag, rate] : newRates) {
|
||||
auto it = queues.find(tag);
|
||||
|
@ -73,6 +92,7 @@ void GrvProxyTransactionTagThrottler::addRequest(GetReadVersionRequest const& re
|
|||
// SERVER_KNOBS->ENFORCE_TAG_THROTTLING_ON_PROXIES is enabled, there may be
|
||||
// unexpected behaviour, because only one tag is used for throttling.
|
||||
TraceEvent(SevWarnAlways, "GrvProxyTransactionTagThrottler_MultipleTags")
|
||||
.suppressFor(1.0)
|
||||
.detail("NumTags", req.tags.size())
|
||||
.detail("UsingTag", printable(tag));
|
||||
}
|
||||
|
@ -80,8 +100,8 @@ void GrvProxyTransactionTagThrottler::addRequest(GetReadVersionRequest const& re
|
|||
}
|
||||
|
||||
void GrvProxyTransactionTagThrottler::releaseTransactions(double elapsed,
|
||||
SpannedDeque<GetReadVersionRequest>& outBatchPriority,
|
||||
SpannedDeque<GetReadVersionRequest>& outDefaultPriority) {
|
||||
Deque<GetReadVersionRequest>& outBatchPriority,
|
||||
Deque<GetReadVersionRequest>& outDefaultPriority) {
|
||||
// Pointer to a TagQueue with some extra metadata stored alongside
|
||||
struct TagQueueHandle {
|
||||
// Store pointers here to avoid frequent std::unordered_map lookups
|
||||
|
@ -140,6 +160,11 @@ void GrvProxyTransactionTagThrottler::releaseTransactions(double elapsed,
|
|||
// Cannot release any more transaction from this tag (don't push the tag queue handle back into
|
||||
// pqOfQueues)
|
||||
CODE_PROBE(true, "GrvProxyTransactionTagThrottler throttling transaction");
|
||||
if (tagQueueHandle.queue->isMaxThrottled()) {
|
||||
// Requests in this queue have been throttled too long and errors
|
||||
// should be sent to clients.
|
||||
tagQueueHandle.queue->rejectRequests();
|
||||
}
|
||||
break;
|
||||
} else {
|
||||
if (tagQueueHandle.nextSeqNo < nextQueueSeqNo) {
|
||||
|
@ -255,8 +280,8 @@ ACTOR static Future<Void> mockFifoClient(GrvProxyTransactionTagThrottler* thrott
|
|||
}
|
||||
|
||||
ACTOR static Future<Void> mockServer(GrvProxyTransactionTagThrottler* throttler) {
|
||||
state SpannedDeque<GetReadVersionRequest> outBatchPriority("TestGrvProxyTransactionTagThrottler_Batch"_loc);
|
||||
state SpannedDeque<GetReadVersionRequest> outDefaultPriority("TestGrvProxyTransactionTagThrottler_Default"_loc);
|
||||
state Deque<GetReadVersionRequest> outBatchPriority;
|
||||
state Deque<GetReadVersionRequest> outDefaultPriority;
|
||||
loop {
|
||||
state double elapsed = (0.009 + 0.002 * deterministicRandom()->random01());
|
||||
wait(delay(elapsed));
|
||||
|
@ -379,8 +404,8 @@ TEST_CASE("/GrvProxyTransactionTagThrottler/Cleanup2") {
|
|||
throttler.updateRates(TransactionTagMap<double>{});
|
||||
ASSERT_EQ(throttler.size(), 1);
|
||||
{
|
||||
SpannedDeque<GetReadVersionRequest> outBatchPriority("TestGrvProxyTransactionTagThrottler_Batch"_loc);
|
||||
SpannedDeque<GetReadVersionRequest> outDefaultPriority("TestGrvProxyTransactionTagThrottler_Default"_loc);
|
||||
Deque<GetReadVersionRequest> outBatchPriority;
|
||||
Deque<GetReadVersionRequest> outDefaultPriority;
|
||||
throttler.releaseTransactions(0.1, outBatchPriority, outDefaultPriority);
|
||||
}
|
||||
// Calling updates cleans up the queues in throttler
|
||||
|
|
|
@ -347,8 +347,8 @@ public:
|
|||
Randomize::False,
|
||||
g_network->isSimulated() ? IsSimulated::True : IsSimulated::False);
|
||||
}
|
||||
logger = traceCounters(
|
||||
"LocalConfigurationMetrics", id, SERVER_KNOBS->WORKER_LOGGING_INTERVAL, &cc, "LocalConfigurationMetrics");
|
||||
logger = cc.traceCounters(
|
||||
"LocalConfigurationMetrics", id, SERVER_KNOBS->WORKER_LOGGING_INTERVAL, "LocalConfigurationMetrics");
|
||||
}
|
||||
|
||||
Future<Void> addChanges(Standalone<VectorRef<VersionedConfigMutationRef>> changes,
|
||||
|
|
|
@ -190,15 +190,14 @@ struct LogRouterData {
|
|||
});
|
||||
specialCounter(cc, "Generation", [this]() { return this->generation; });
|
||||
specialCounter(cc, "ActivePeekStreams", [this]() { return this->activePeekStreams; });
|
||||
logger = traceCounters("LogRouterMetrics",
|
||||
dbgid,
|
||||
SERVER_KNOBS->WORKER_LOGGING_INTERVAL,
|
||||
&cc,
|
||||
"LogRouterMetrics",
|
||||
[this](TraceEvent& te) {
|
||||
te.detail("PrimaryPeekLocation", this->primaryPeekLocation);
|
||||
te.detail("RouterTag", this->routerTag.toString());
|
||||
});
|
||||
logger = cc.traceCounters("LogRouterMetrics",
|
||||
dbgid,
|
||||
SERVER_KNOBS->WORKER_LOGGING_INTERVAL,
|
||||
"LogRouterMetrics",
|
||||
[this](TraceEvent& te) {
|
||||
te.detail("PrimaryPeekLocation", this->primaryPeekLocation);
|
||||
te.detail("RouterTag", this->routerTag.toString());
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -0,0 +1,623 @@
|
|||
/*
|
||||
* MockGlobalState.actor.cpp
|
||||
*
|
||||
* This source file is part of the FoundationDB open source project
|
||||
*
|
||||
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "fdbserver/MockGlobalState.h"
|
||||
#include "fdbserver/workloads/workloads.actor.h"
|
||||
#include "fdbserver/DataDistribution.actor.h"
|
||||
#include "flow/actorcompiler.h"
|
||||
|
||||
class MockGlobalStateImpl {
|
||||
public:
|
||||
ACTOR static Future<std::pair<Optional<StorageMetrics>, int>> waitStorageMetrics(MockGlobalState* mgs,
|
||||
KeyRange keys,
|
||||
StorageMetrics min,
|
||||
StorageMetrics max,
|
||||
StorageMetrics permittedError,
|
||||
int shardLimit,
|
||||
int expectedShardCount) {
|
||||
state TenantInfo tenantInfo;
|
||||
loop {
|
||||
auto locations = mgs->getKeyRangeLocations(tenantInfo,
|
||||
keys,
|
||||
shardLimit,
|
||||
Reverse::False,
|
||||
SpanContext(),
|
||||
Optional<UID>(),
|
||||
UseProvisionalProxies::False,
|
||||
0)
|
||||
.get();
|
||||
TraceEvent(SevDebug, "MGSWaitStorageMetrics").detail("Phase", "GetLocation");
|
||||
// NOTE(xwang): in native API, there's code handling the non-equal situation, but I think in mock world
|
||||
// there shouldn't have any delay to update the locations.
|
||||
ASSERT_EQ(expectedShardCount, locations.size());
|
||||
|
||||
Optional<StorageMetrics> res =
|
||||
wait(::waitStorageMetricsWithLocation(tenantInfo, keys, locations, min, max, permittedError));
|
||||
|
||||
if (res.present()) {
|
||||
return std::make_pair(res, -1);
|
||||
}
|
||||
wait(delay(CLIENT_KNOBS->WRONG_SHARD_SERVER_DELAY, TaskPriority::DataDistribution));
|
||||
}
|
||||
}
|
||||
|
||||
// SOMEDAY: reuse the NativeAPI implementation
|
||||
ACTOR static Future<Standalone<VectorRef<KeyRef>>> splitStorageMetrics(MockGlobalState* mgs,
|
||||
KeyRange keys,
|
||||
StorageMetrics limit,
|
||||
StorageMetrics estimated,
|
||||
Optional<int> minSplitBytes) {
|
||||
state TenantInfo tenantInfo;
|
||||
loop {
|
||||
state std::vector<KeyRangeLocationInfo> locations =
|
||||
mgs->getKeyRangeLocations(tenantInfo,
|
||||
keys,
|
||||
CLIENT_KNOBS->STORAGE_METRICS_SHARD_LIMIT,
|
||||
Reverse::False,
|
||||
SpanContext(),
|
||||
Optional<UID>(),
|
||||
UseProvisionalProxies::False,
|
||||
0)
|
||||
.get();
|
||||
|
||||
// Same solution to NativeAPI::splitStorageMetrics, wait some merge finished
|
||||
if (locations.size() == CLIENT_KNOBS->STORAGE_METRICS_SHARD_LIMIT) {
|
||||
wait(delay(CLIENT_KNOBS->STORAGE_METRICS_TOO_MANY_SHARDS_DELAY, TaskPriority::DataDistribution));
|
||||
}
|
||||
|
||||
Optional<Standalone<VectorRef<KeyRef>>> results =
|
||||
wait(splitStorageMetricsWithLocations(locations, keys, limit, estimated, minSplitBytes));
|
||||
|
||||
if (results.present()) {
|
||||
return results.get();
|
||||
}
|
||||
|
||||
wait(delay(CLIENT_KNOBS->WRONG_SHARD_SERVER_DELAY, TaskPriority::DataDistribution));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class MockStorageServerImpl {
|
||||
public:
|
||||
ACTOR static Future<Void> waitMetricsTenantAware(MockStorageServer* self, WaitMetricsRequest req) {
|
||||
if (req.tenantInfo.present() && req.tenantInfo.get().tenantId != TenantInfo::INVALID_TENANT) {
|
||||
// TODO(xwang) add support for tenant test, search for tenant entry
|
||||
Optional<TenantMapEntry> entry;
|
||||
Optional<Key> tenantPrefix = entry.map<Key>([](TenantMapEntry e) { return e.prefix; });
|
||||
if (tenantPrefix.present()) {
|
||||
UNREACHABLE();
|
||||
// req.keys = req.keys.withPrefix(tenantPrefix.get(), req.arena);
|
||||
}
|
||||
}
|
||||
|
||||
if (!self->isReadable(req.keys)) {
|
||||
self->sendErrorWithPenalty(req.reply, wrong_shard_server(), self->getPenalty());
|
||||
} else {
|
||||
wait(self->metrics.waitMetrics(req, delayJittered(SERVER_KNOBS->STORAGE_METRIC_TIMEOUT)));
|
||||
}
|
||||
return Void();
|
||||
}
|
||||
};
|
||||
|
||||
bool MockStorageServer::allShardStatusEqual(KeyRangeRef range, MockShardStatus status) {
|
||||
auto ranges = serverKeys.intersectingRanges(range);
|
||||
ASSERT(!ranges.empty()); // at least the range is allKeys
|
||||
|
||||
for (auto it = ranges.begin(); it != ranges.end(); ++it) {
|
||||
if (it->cvalue().status != status)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void MockStorageServer::setShardStatus(KeyRangeRef range, MockShardStatus status, bool restrictSize) {
|
||||
auto ranges = serverKeys.intersectingRanges(range);
|
||||
ASSERT(!ranges.empty());
|
||||
if (ranges.begin().range().contains(range)) {
|
||||
CODE_PROBE(true, "Implicitly split single shard to 3 pieces");
|
||||
threeWayShardSplitting(ranges.begin().range(), range, ranges.begin().cvalue().shardSize, restrictSize);
|
||||
return;
|
||||
}
|
||||
if (ranges.begin().begin() < range.begin) {
|
||||
CODE_PROBE(true, "Implicitly split begin range to 2 pieces");
|
||||
twoWayShardSplitting(ranges.begin().range(), range.begin, ranges.begin().cvalue().shardSize, restrictSize);
|
||||
}
|
||||
if (ranges.end().end() > range.end) {
|
||||
CODE_PROBE(true, "Implicitly split end range to 2 pieces");
|
||||
twoWayShardSplitting(ranges.end().range(), range.end, ranges.end().cvalue().shardSize, restrictSize);
|
||||
}
|
||||
ranges = serverKeys.containedRanges(range);
|
||||
// now the boundary must be aligned
|
||||
ASSERT(ranges.begin().begin() == range.begin);
|
||||
ASSERT(ranges.end().end() == range.end);
|
||||
uint64_t newSize = 0;
|
||||
for (auto it = ranges.begin(); it != ranges.end(); ++it) {
|
||||
newSize += it->cvalue().shardSize;
|
||||
}
|
||||
for (auto it = ranges.begin(); it != ranges.end(); ++it) {
|
||||
auto oldStatus = it.value().status;
|
||||
if (isStatusTransitionValid(oldStatus, status)) {
|
||||
it.value() = ShardInfo{ status, newSize };
|
||||
} else if (oldStatus == MockShardStatus::COMPLETED && status == MockShardStatus::INFLIGHT) {
|
||||
CODE_PROBE(true, "Shard already on server");
|
||||
} else {
|
||||
TraceEvent(SevError, "MockShardStatusTransitionError")
|
||||
.detail("From", oldStatus)
|
||||
.detail("To", status)
|
||||
.detail("ID", id)
|
||||
.detail("KeyBegin", range.begin.toHexString())
|
||||
.detail("KeyEnd", range.begin.toHexString());
|
||||
}
|
||||
}
|
||||
serverKeys.coalesce(range);
|
||||
}
|
||||
|
||||
// split the out range [a, d) based on the inner range's boundary [b, c). The result would be [a,b), [b,c), [c,d). The
|
||||
// size of the new shards are randomly split from old size of [a, d)
|
||||
void MockStorageServer::threeWayShardSplitting(KeyRangeRef outerRange,
|
||||
KeyRangeRef innerRange,
|
||||
uint64_t outerRangeSize,
|
||||
bool restrictSize) {
|
||||
ASSERT(outerRange.contains(innerRange));
|
||||
|
||||
Key left = outerRange.begin;
|
||||
// random generate 3 shard sizes, the caller guarantee that the min, max parameters are always valid.
|
||||
int leftSize = deterministicRandom()->randomInt(
|
||||
SERVER_KNOBS->MIN_SHARD_BYTES,
|
||||
restrictSize ? outerRangeSize - 2 * SERVER_KNOBS->MIN_SHARD_BYTES + 1 : SERVER_KNOBS->MAX_SHARD_BYTES);
|
||||
int midSize = deterministicRandom()->randomInt(
|
||||
SERVER_KNOBS->MIN_SHARD_BYTES,
|
||||
restrictSize ? outerRangeSize - leftSize - SERVER_KNOBS->MIN_SHARD_BYTES + 1 : SERVER_KNOBS->MAX_SHARD_BYTES);
|
||||
int rightSize =
|
||||
restrictSize ? outerRangeSize - leftSize - midSize
|
||||
: deterministicRandom()->randomInt(SERVER_KNOBS->MIN_SHARD_BYTES, SERVER_KNOBS->MAX_SHARD_BYTES);
|
||||
|
||||
serverKeys.insert(innerRange, { serverKeys[left].status, (uint64_t)midSize });
|
||||
serverKeys[left].shardSize = leftSize;
|
||||
serverKeys[innerRange.end].shardSize = rightSize;
|
||||
}
|
||||
|
||||
// split the range [a,c) with split point b. The result would be [a, b), [b, c). The
|
||||
// size of the new shards are randomly split from old size of [a, c)
|
||||
void MockStorageServer::twoWayShardSplitting(KeyRangeRef range,
|
||||
KeyRef splitPoint,
|
||||
uint64_t rangeSize,
|
||||
bool restrictSize) {
|
||||
Key left = range.begin;
|
||||
// random generate 3 shard sizes, the caller guarantee that the min, max parameters are always valid.
|
||||
int leftSize = deterministicRandom()->randomInt(SERVER_KNOBS->MIN_SHARD_BYTES,
|
||||
restrictSize ? rangeSize - SERVER_KNOBS->MIN_SHARD_BYTES + 1
|
||||
: SERVER_KNOBS->MAX_SHARD_BYTES);
|
||||
int rightSize =
|
||||
restrictSize ? rangeSize - leftSize
|
||||
: deterministicRandom()->randomInt(SERVER_KNOBS->MIN_SHARD_BYTES, SERVER_KNOBS->MAX_SHARD_BYTES);
|
||||
serverKeys.rawInsert(splitPoint, { serverKeys[left].status, (uint64_t)rightSize });
|
||||
serverKeys[left].shardSize = leftSize;
|
||||
}
|
||||
|
||||
void MockStorageServer::removeShard(KeyRangeRef range) {
|
||||
auto ranges = serverKeys.containedRanges(range);
|
||||
ASSERT(ranges.begin().range() == range);
|
||||
serverKeys.rawErase(range);
|
||||
}
|
||||
|
||||
uint64_t MockStorageServer::sumRangeSize(KeyRangeRef range) const {
|
||||
auto ranges = serverKeys.intersectingRanges(range);
|
||||
uint64_t totalSize = 0;
|
||||
for (auto it = ranges.begin(); it != ranges.end(); ++it) {
|
||||
totalSize += it->cvalue().shardSize;
|
||||
}
|
||||
return totalSize;
|
||||
}
|
||||
|
||||
void MockStorageServer::addActor(Future<Void> future) {
|
||||
actors.add(future);
|
||||
}
|
||||
|
||||
void MockStorageServer::getSplitPoints(const SplitRangeRequest& req) {}
|
||||
|
||||
Future<Void> MockStorageServer::waitMetricsTenantAware(const WaitMetricsRequest& req) {
|
||||
return MockStorageServerImpl::waitMetricsTenantAware(this, req);
|
||||
}
|
||||
|
||||
void MockStorageServer::getStorageMetrics(const GetStorageMetricsRequest& req) {}
|
||||
|
||||
Future<Void> MockStorageServer::run() {
|
||||
ssi.locality = LocalityData(Optional<Standalone<StringRef>>(),
|
||||
Standalone<StringRef>(deterministicRandom()->randomUniqueID().toString()),
|
||||
Standalone<StringRef>(deterministicRandom()->randomUniqueID().toString()),
|
||||
Optional<Standalone<StringRef>>());
|
||||
ssi.initEndpoints();
|
||||
ssi.startAcceptingRequests();
|
||||
TraceEvent("MockStorageServerStart").detail("Address", ssi.address());
|
||||
return serveStorageMetricsRequests(this, ssi);
|
||||
}
|
||||
|
||||
void MockGlobalState::initializeAsEmptyDatabaseMGS(const DatabaseConfiguration& conf, uint64_t defaultDiskSpace) {
|
||||
ASSERT(conf.storageTeamSize > 0);
|
||||
configuration = conf;
|
||||
std::vector<UID> serverIds;
|
||||
for (int i = 1; i <= conf.storageTeamSize; ++i) {
|
||||
UID id = indexToUID(i);
|
||||
serverIds.push_back(id);
|
||||
allServers[id] = MockStorageServer(id, defaultDiskSpace);
|
||||
allServers[id].serverKeys.insert(allKeys, { MockShardStatus::COMPLETED, 0 });
|
||||
}
|
||||
shardMapping->assignRangeToTeams(allKeys, { Team(serverIds, true) });
|
||||
}
|
||||
|
||||
void MockGlobalState::addStorageServer(StorageServerInterface server, uint64_t diskSpace) {
|
||||
allServers[server.id()] = MockStorageServer(server, diskSpace);
|
||||
}
|
||||
|
||||
bool MockGlobalState::serverIsSourceForShard(const UID& serverId, KeyRangeRef shard, bool inFlightShard) {
|
||||
if (!allServers.count(serverId))
|
||||
return false;
|
||||
|
||||
// check serverKeys
|
||||
auto& mss = allServers.at(serverId);
|
||||
if (!mss.allShardStatusEqual(shard, MockShardStatus::COMPLETED)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// check keyServers
|
||||
auto teams = shardMapping->getTeamsForFirstShard(shard);
|
||||
if (inFlightShard) {
|
||||
return std::any_of(teams.second.begin(), teams.second.end(), [&serverId](const Team& team) {
|
||||
return team.hasServer(serverId);
|
||||
});
|
||||
}
|
||||
return std::any_of(
|
||||
teams.first.begin(), teams.first.end(), [&serverId](const Team& team) { return team.hasServer(serverId); });
|
||||
}
|
||||
|
||||
bool MockGlobalState::serverIsDestForShard(const UID& serverId, KeyRangeRef shard) {
|
||||
if (!allServers.count(serverId))
|
||||
return false;
|
||||
|
||||
// check serverKeys
|
||||
auto& mss = allServers.at(serverId);
|
||||
if (!mss.allShardStatusEqual(shard, MockShardStatus::INFLIGHT)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// check keyServers
|
||||
auto teams = shardMapping->getTeamsForFirstShard(shard);
|
||||
return !teams.second.empty() && std::any_of(teams.first.begin(), teams.first.end(), [&serverId](const Team& team) {
|
||||
return team.hasServer(serverId);
|
||||
});
|
||||
}
|
||||
|
||||
bool MockGlobalState::allShardsRemovedFromServer(const UID& serverId) {
|
||||
return allServers.count(serverId) && shardMapping->getNumberOfShards(serverId) == 0;
|
||||
}
|
||||
|
||||
Future<std::pair<Optional<StorageMetrics>, int>> MockGlobalState::waitStorageMetrics(
|
||||
const KeyRange& keys,
|
||||
const StorageMetrics& min,
|
||||
const StorageMetrics& max,
|
||||
const StorageMetrics& permittedError,
|
||||
int shardLimit,
|
||||
int expectedShardCount) {
|
||||
return MockGlobalStateImpl::waitStorageMetrics(
|
||||
this, keys, min, max, permittedError, shardLimit, expectedShardCount);
|
||||
}
|
||||
|
||||
Reference<LocationInfo> buildLocationInfo(const std::vector<StorageServerInterface>& interfaces) {
|
||||
// construct the location info with the servers
|
||||
std::vector<Reference<ReferencedInterface<StorageServerInterface>>> serverRefs;
|
||||
serverRefs.reserve(interfaces.size());
|
||||
for (const auto& interf : interfaces) {
|
||||
serverRefs.push_back(makeReference<ReferencedInterface<StorageServerInterface>>(interf));
|
||||
}
|
||||
|
||||
return makeReference<LocationInfo>(serverRefs);
|
||||
}
|
||||
|
||||
Future<KeyRangeLocationInfo> MockGlobalState::getKeyLocation(TenantInfo tenant,
|
||||
Key key,
|
||||
SpanContext spanContext,
|
||||
Optional<UID> debugID,
|
||||
UseProvisionalProxies useProvisionalProxies,
|
||||
Reverse isBackward,
|
||||
Version version) {
|
||||
if (isBackward) {
|
||||
// DD never ask for backward range.
|
||||
UNREACHABLE();
|
||||
}
|
||||
ASSERT(key < allKeys.end);
|
||||
|
||||
GetKeyServerLocationsReply rep;
|
||||
KeyRange single = singleKeyRange(key);
|
||||
auto teamPair = shardMapping->getTeamsForFirstShard(single);
|
||||
auto& srcTeam = teamPair.second.empty() ? teamPair.first : teamPair.second;
|
||||
ASSERT_EQ(srcTeam.size(), 1);
|
||||
rep.results.emplace_back(single, extractStorageServerInterfaces(srcTeam.front().servers));
|
||||
|
||||
return KeyRangeLocationInfo(
|
||||
rep.tenantEntry,
|
||||
KeyRange(toPrefixRelativeRange(rep.results[0].first, rep.tenantEntry.prefix), rep.arena),
|
||||
buildLocationInfo(rep.results[0].second));
|
||||
}
|
||||
|
||||
Future<std::vector<KeyRangeLocationInfo>> MockGlobalState::getKeyRangeLocations(
|
||||
TenantInfo tenant,
|
||||
KeyRange keys,
|
||||
int limit,
|
||||
Reverse reverse,
|
||||
SpanContext spanContext,
|
||||
Optional<UID> debugID,
|
||||
UseProvisionalProxies useProvisionalProxies,
|
||||
Version version) {
|
||||
|
||||
if (reverse) {
|
||||
// DD never ask for backward range.
|
||||
ASSERT(false);
|
||||
}
|
||||
ASSERT(keys.begin < keys.end);
|
||||
|
||||
GetKeyServerLocationsReply rep;
|
||||
auto ranges = shardMapping->intersectingRanges(keys);
|
||||
auto it = ranges.begin();
|
||||
for (int count = 0; it != ranges.end() && count < limit; ++it, ++count) {
|
||||
auto teamPair = shardMapping->getTeamsFor(it->begin());
|
||||
auto& srcTeam = teamPair.second.empty() ? teamPair.first : teamPair.second;
|
||||
ASSERT_EQ(srcTeam.size(), 1);
|
||||
rep.results.emplace_back(it->range(), extractStorageServerInterfaces(srcTeam.front().servers));
|
||||
}
|
||||
CODE_PROBE(it != ranges.end(), "getKeyRangeLocations is limited", probe::decoration::rare);
|
||||
|
||||
std::vector<KeyRangeLocationInfo> results;
|
||||
for (int shard = 0; shard < rep.results.size(); shard++) {
|
||||
results.emplace_back(rep.tenantEntry,
|
||||
(toPrefixRelativeRange(rep.results[shard].first, rep.tenantEntry.prefix) & keys),
|
||||
buildLocationInfo(rep.results[shard].second));
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
std::vector<StorageServerInterface> MockGlobalState::extractStorageServerInterfaces(const std::vector<UID>& ids) const {
|
||||
std::vector<StorageServerInterface> interfaces;
|
||||
for (auto& id : ids) {
|
||||
interfaces.emplace_back(allServers.at(id).ssi);
|
||||
}
|
||||
return interfaces;
|
||||
}
|
||||
|
||||
Future<Standalone<VectorRef<KeyRef>>> MockGlobalState::splitStorageMetrics(const KeyRange& keys,
|
||||
const StorageMetrics& limit,
|
||||
const StorageMetrics& estimated,
|
||||
const Optional<int>& minSplitBytes) {
|
||||
return MockGlobalStateImpl::splitStorageMetrics(this, keys, limit, estimated, minSplitBytes);
|
||||
}
|
||||
|
||||
TEST_CASE("/MockGlobalState/initializeAsEmptyDatabaseMGS/SimpleThree") {
|
||||
BasicTestConfig testConfig;
|
||||
testConfig.simpleConfig = true;
|
||||
testConfig.minimumReplication = 3;
|
||||
testConfig.logAntiQuorum = 0;
|
||||
DatabaseConfiguration dbConfig = generateNormalDatabaseConfiguration(testConfig);
|
||||
TraceEvent("UnitTestDbConfig").detail("Config", dbConfig.toString());
|
||||
|
||||
auto mgs = std::make_shared<MockGlobalState>();
|
||||
mgs->initializeAsEmptyDatabaseMGS(dbConfig);
|
||||
for (int i = 1; i <= dbConfig.storageTeamSize; ++i) {
|
||||
auto id = MockGlobalState::indexToUID(i);
|
||||
std::cout << "Check server " << i << "\n";
|
||||
ASSERT(mgs->serverIsSourceForShard(id, allKeys));
|
||||
ASSERT(mgs->allServers.at(id).sumRangeSize(allKeys) == 0);
|
||||
}
|
||||
|
||||
return Void();
|
||||
}
|
||||
|
||||
struct MockGlobalStateTester {
|
||||
|
||||
// expectation [r0.begin, r0.end) => [r0.begin, x1), [x1, x2), [x2, r0.end)
|
||||
void testThreeWaySplitFirstRange(MockStorageServer& mss) {
|
||||
auto it = mss.serverKeys.ranges().begin();
|
||||
uint64_t oldSize =
|
||||
deterministicRandom()->randomInt(SERVER_KNOBS->MIN_SHARD_BYTES, std::numeric_limits<int>::max());
|
||||
MockShardStatus oldStatus = it.cvalue().status;
|
||||
it->value().shardSize = oldSize;
|
||||
KeyRangeRef outerRange = it->range();
|
||||
Key x1 = keyAfter(it->range().begin);
|
||||
Key x2 = keyAfter(x1);
|
||||
std::cout << "it->range.begin: " << it->range().begin.toHexString() << " size: " << oldSize << "\n";
|
||||
|
||||
mss.threeWayShardSplitting(outerRange, KeyRangeRef(x1, x2), oldSize, false);
|
||||
auto ranges = mss.serverKeys.containedRanges(outerRange);
|
||||
ASSERT(ranges.begin().range() == KeyRangeRef(outerRange.begin, x1));
|
||||
ranges.pop_front();
|
||||
ASSERT(ranges.begin().range() == KeyRangeRef(x1, x2));
|
||||
ASSERT(ranges.begin().cvalue().status == oldStatus);
|
||||
ranges.pop_front();
|
||||
ASSERT(ranges.begin().range() == KeyRangeRef(x2, outerRange.end));
|
||||
ranges.pop_front();
|
||||
ASSERT(ranges.empty());
|
||||
}
|
||||
|
||||
// expectation [r0.begin, r0.end) => [r0.begin, x1), [x1, r0.end)
|
||||
void testTwoWaySplitFirstRange(MockStorageServer& mss) {
|
||||
auto it = mss.serverKeys.nthRange(0);
|
||||
MockShardStatus oldStatus = it.cvalue().status;
|
||||
uint64_t oldSize =
|
||||
deterministicRandom()->randomInt(SERVER_KNOBS->MIN_SHARD_BYTES, std::numeric_limits<int>::max());
|
||||
it->value().shardSize = oldSize;
|
||||
KeyRangeRef outerRange = it->range();
|
||||
Key x1 = keyAfter(it->range().begin);
|
||||
std::cout << "it->range.begin: " << it->range().begin.toHexString() << " size: " << oldSize << "\n";
|
||||
|
||||
mss.twoWayShardSplitting(it->range(), x1, oldSize, false);
|
||||
auto ranges = mss.serverKeys.containedRanges(outerRange);
|
||||
ASSERT(ranges.begin().range() == KeyRangeRef(outerRange.begin, x1));
|
||||
ranges.pop_front();
|
||||
ASSERT(ranges.begin().range() == KeyRangeRef(x1, outerRange.end));
|
||||
ASSERT(ranges.begin().cvalue().status == oldStatus);
|
||||
ranges.pop_front();
|
||||
ASSERT(ranges.empty());
|
||||
}
|
||||
|
||||
KeyRangeLocationInfo getKeyLocationInfo(KeyRef key, std::shared_ptr<MockGlobalState> mgs) {
|
||||
return mgs
|
||||
->getKeyLocation(
|
||||
TenantInfo(), key, SpanContext(), Optional<UID>(), UseProvisionalProxies::False, Reverse::False, 0)
|
||||
.get();
|
||||
}
|
||||
|
||||
std::vector<KeyRangeLocationInfo> getKeyRangeLocations(KeyRangeRef keys,
|
||||
int limit,
|
||||
std::shared_ptr<MockGlobalState> mgs) {
|
||||
return mgs
|
||||
->getKeyRangeLocations(TenantInfo(),
|
||||
keys,
|
||||
limit,
|
||||
Reverse::False,
|
||||
SpanContext(),
|
||||
Optional<UID>(),
|
||||
UseProvisionalProxies::False,
|
||||
0)
|
||||
.get();
|
||||
}
|
||||
};
|
||||
|
||||
TEST_CASE("/MockGlobalState/MockStorageServer/SplittingFunctions") {
|
||||
BasicTestConfig testConfig;
|
||||
testConfig.simpleConfig = true;
|
||||
testConfig.minimumReplication = 1;
|
||||
testConfig.logAntiQuorum = 0;
|
||||
DatabaseConfiguration dbConfig = generateNormalDatabaseConfiguration(testConfig);
|
||||
TraceEvent("UnitTestDbConfig").detail("Config", dbConfig.toString());
|
||||
|
||||
auto mgs = std::make_shared<MockGlobalState>();
|
||||
mgs->initializeAsEmptyDatabaseMGS(dbConfig);
|
||||
|
||||
MockGlobalStateTester tester;
|
||||
auto& mss = mgs->allServers.at(MockGlobalState::indexToUID(1));
|
||||
std::cout << "Test 3-way splitting...\n";
|
||||
tester.testThreeWaySplitFirstRange(mss);
|
||||
std::cout << "Test 2-way splitting...\n";
|
||||
mss.serverKeys.insert(allKeys, { MockShardStatus::COMPLETED, 0 }); // reset to empty
|
||||
tester.testTwoWaySplitFirstRange(mss);
|
||||
|
||||
return Void();
|
||||
}
|
||||
|
||||
namespace {
|
||||
inline bool locationInfoEqualsToTeam(Reference<LocationInfo> loc, const std::vector<UID>& ids) {
|
||||
return loc->locations()->size() == ids.size() &&
|
||||
std::all_of(ids.begin(), ids.end(), [loc](const UID& id) { return loc->locations()->hasInterface(id); });
|
||||
}
|
||||
}; // namespace
|
||||
TEST_CASE("/MockGlobalState/MockStorageServer/GetKeyLocations") {
|
||||
BasicTestConfig testConfig;
|
||||
testConfig.simpleConfig = true;
|
||||
testConfig.minimumReplication = 1;
|
||||
testConfig.logAntiQuorum = 0;
|
||||
DatabaseConfiguration dbConfig = generateNormalDatabaseConfiguration(testConfig);
|
||||
TraceEvent("UnitTestDbConfig").detail("Config", dbConfig.toString());
|
||||
|
||||
auto mgs = std::make_shared<MockGlobalState>();
|
||||
mgs->initializeAsEmptyDatabaseMGS(dbConfig);
|
||||
// add one empty server
|
||||
mgs->addStorageServer(StorageServerInterface(mgs->indexToUID(mgs->allServers.size() + 1)));
|
||||
|
||||
// define 3 ranges:
|
||||
// team 1 (UID 1,2,...,n-1):[begin, 1.0), [2.0, end)
|
||||
// team 2 (UID 2,3,...n-1, n): [1.0, 2.0)
|
||||
ShardsAffectedByTeamFailure::Team team1, team2;
|
||||
for (int i = 0; i < mgs->allServers.size() - 1; ++i) {
|
||||
UID id = mgs->indexToUID(i + 1);
|
||||
team1.servers.emplace_back(id);
|
||||
id = mgs->indexToUID(i + 2);
|
||||
team2.servers.emplace_back(id);
|
||||
}
|
||||
Key one = doubleToTestKey(1.0), two = doubleToTestKey(2.0);
|
||||
std::vector<KeyRangeRef> ranges{ KeyRangeRef(allKeys.begin, one),
|
||||
KeyRangeRef(one, two),
|
||||
KeyRangeRef(two, allKeys.end) };
|
||||
mgs->shardMapping->assignRangeToTeams(ranges[0], { team1 });
|
||||
mgs->shardMapping->assignRangeToTeams(ranges[1], { team2 });
|
||||
mgs->shardMapping->assignRangeToTeams(ranges[2], { team1 });
|
||||
|
||||
// query key location
|
||||
MockGlobalStateTester tester;
|
||||
// -- team 1
|
||||
Key testKey = doubleToTestKey(0.5);
|
||||
auto locInfo = tester.getKeyLocationInfo(testKey, mgs);
|
||||
ASSERT(locationInfoEqualsToTeam(locInfo.locations, team1.servers));
|
||||
|
||||
// -- team 2
|
||||
testKey = doubleToTestKey(1.3);
|
||||
locInfo = tester.getKeyLocationInfo(testKey, mgs);
|
||||
ASSERT(locationInfoEqualsToTeam(locInfo.locations, team2.servers));
|
||||
|
||||
// query range location
|
||||
testKey = doubleToTestKey(3.0);
|
||||
// team 1,2,1
|
||||
auto locInfos = tester.getKeyRangeLocations(KeyRangeRef(allKeys.begin, testKey), 100, mgs);
|
||||
ASSERT(locInfos.size() == 3);
|
||||
ASSERT(locInfos[0].range == ranges[0]);
|
||||
ASSERT(locationInfoEqualsToTeam(locInfos[0].locations, team1.servers));
|
||||
ASSERT(locInfos[1].range == ranges[1]);
|
||||
ASSERT(locationInfoEqualsToTeam(locInfos[1].locations, team2.servers));
|
||||
ASSERT(locInfos[2].range == KeyRangeRef(ranges[2].begin, testKey));
|
||||
ASSERT(locationInfoEqualsToTeam(locInfos[2].locations, team1.servers));
|
||||
|
||||
// team 1,2
|
||||
locInfos = tester.getKeyRangeLocations(KeyRangeRef(allKeys.begin, testKey), 2, mgs);
|
||||
ASSERT(locInfos.size() == 2);
|
||||
ASSERT(locInfos[0].range == ranges[0]);
|
||||
ASSERT(locationInfoEqualsToTeam(locInfos[0].locations, team1.servers));
|
||||
ASSERT(locInfos[1].range == ranges[1]);
|
||||
ASSERT(locationInfoEqualsToTeam(locInfos[1].locations, team2.servers));
|
||||
|
||||
return Void();
|
||||
}
|
||||
|
||||
TEST_CASE("/MockGlobalState/MockStorageServer/WaitStorageMetricsRequest") {
|
||||
BasicTestConfig testConfig;
|
||||
testConfig.simpleConfig = true;
|
||||
testConfig.minimumReplication = 1;
|
||||
testConfig.logAntiQuorum = 0;
|
||||
DatabaseConfiguration dbConfig = generateNormalDatabaseConfiguration(testConfig);
|
||||
TraceEvent("WaitStorageMetricsRequestUnitTestConfig").detail("Config", dbConfig.toString());
|
||||
|
||||
state std::shared_ptr<MockGlobalState> mgs = std::make_shared<MockGlobalState>();
|
||||
mgs->initializeAsEmptyDatabaseMGS(dbConfig);
|
||||
state ActorCollection actors;
|
||||
|
||||
ActorCollection* ptr = &actors; // get around ACTOR syntax restriction
|
||||
std::for_each(mgs->allServers.begin(), mgs->allServers.end(), [ptr](auto& server) {
|
||||
ptr->add(server.second.run());
|
||||
IFailureMonitor::failureMonitor().setStatus(server.second.ssi.address(), FailureStatus(false));
|
||||
server.second.metrics.byteSample.sample.insert("something"_sr, 500000);
|
||||
});
|
||||
|
||||
KeyRange testRange = allKeys;
|
||||
ShardSizeBounds bounds = ShardSizeBounds::shardSizeBoundsBeforeTrack();
|
||||
std::pair<Optional<StorageMetrics>, int> res =
|
||||
wait(mgs->waitStorageMetrics(testRange, bounds.min, bounds.max, bounds.permittedError, 1, 1));
|
||||
// std::cout << "get result " << res.second << "\n";
|
||||
// std::cout << "get byte "<< res.first.get().bytes << "\n";
|
||||
ASSERT_EQ(res.second, -1); // the valid result always return -1, strange contraction though.
|
||||
ASSERT_EQ(res.first.get().bytes, 500000);
|
||||
return Void();
|
||||
}
|
|
@ -1,281 +0,0 @@
|
|||
/*
|
||||
* MockGlobalState.cpp
|
||||
*
|
||||
* This source file is part of the FoundationDB open source project
|
||||
*
|
||||
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "fdbserver/MockGlobalState.h"
|
||||
|
||||
bool MockStorageServer::allShardStatusEqual(KeyRangeRef range, MockShardStatus status) {
|
||||
auto ranges = serverKeys.intersectingRanges(range);
|
||||
ASSERT(!ranges.empty()); // at least the range is allKeys
|
||||
|
||||
for (auto it = ranges.begin(); it != ranges.end(); ++it) {
|
||||
if (it->cvalue().status != status)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void MockStorageServer::setShardStatus(KeyRangeRef range, MockShardStatus status, bool restrictSize) {
|
||||
auto ranges = serverKeys.intersectingRanges(range);
|
||||
ASSERT(!ranges.empty());
|
||||
if (ranges.begin().range().contains(range)) {
|
||||
CODE_PROBE(true, "Implicitly split single shard to 3 pieces");
|
||||
threeWayShardSplitting(ranges.begin().range(), range, ranges.begin().cvalue().shardSize, restrictSize);
|
||||
return;
|
||||
}
|
||||
if (ranges.begin().begin() < range.begin) {
|
||||
CODE_PROBE(true, "Implicitly split begin range to 2 pieces");
|
||||
twoWayShardSplitting(ranges.begin().range(), range.begin, ranges.begin().cvalue().shardSize, restrictSize);
|
||||
}
|
||||
if (ranges.end().end() > range.end) {
|
||||
CODE_PROBE(true, "Implicitly split end range to 2 pieces");
|
||||
twoWayShardSplitting(ranges.end().range(), range.end, ranges.end().cvalue().shardSize, restrictSize);
|
||||
}
|
||||
ranges = serverKeys.containedRanges(range);
|
||||
// now the boundary must be aligned
|
||||
ASSERT(ranges.begin().begin() == range.begin);
|
||||
ASSERT(ranges.end().end() == range.end);
|
||||
uint64_t newSize = 0;
|
||||
for (auto it = ranges.begin(); it != ranges.end(); ++it) {
|
||||
newSize += it->cvalue().shardSize;
|
||||
}
|
||||
for (auto it = ranges.begin(); it != ranges.end(); ++it) {
|
||||
auto oldStatus = it.value().status;
|
||||
if (isStatusTransitionValid(oldStatus, status)) {
|
||||
it.value() = ShardInfo{ status, newSize };
|
||||
} else if (oldStatus == MockShardStatus::COMPLETED && status == MockShardStatus::INFLIGHT) {
|
||||
CODE_PROBE(true, "Shard already on server");
|
||||
} else {
|
||||
TraceEvent(SevError, "MockShardStatusTransitionError")
|
||||
.detail("From", oldStatus)
|
||||
.detail("To", status)
|
||||
.detail("ID", id)
|
||||
.detail("KeyBegin", range.begin.toHexString())
|
||||
.detail("KeyEnd", range.begin.toHexString());
|
||||
}
|
||||
}
|
||||
serverKeys.coalesce(range);
|
||||
}
|
||||
|
||||
// split the out range [a, d) based on the inner range's boundary [b, c). The result would be [a,b), [b,c), [c,d). The
|
||||
// size of the new shards are randomly split from old size of [a, d)
|
||||
void MockStorageServer::threeWayShardSplitting(KeyRangeRef outerRange,
|
||||
KeyRangeRef innerRange,
|
||||
uint64_t outerRangeSize,
|
||||
bool restrictSize) {
|
||||
ASSERT(outerRange.contains(innerRange));
|
||||
|
||||
Key left = outerRange.begin;
|
||||
// random generate 3 shard sizes, the caller guarantee that the min, max parameters are always valid.
|
||||
int leftSize = deterministicRandom()->randomInt(
|
||||
SERVER_KNOBS->MIN_SHARD_BYTES,
|
||||
restrictSize ? outerRangeSize - 2 * SERVER_KNOBS->MIN_SHARD_BYTES + 1 : SERVER_KNOBS->MAX_SHARD_BYTES);
|
||||
int midSize = deterministicRandom()->randomInt(
|
||||
SERVER_KNOBS->MIN_SHARD_BYTES,
|
||||
restrictSize ? outerRangeSize - leftSize - SERVER_KNOBS->MIN_SHARD_BYTES + 1 : SERVER_KNOBS->MAX_SHARD_BYTES);
|
||||
int rightSize =
|
||||
restrictSize ? outerRangeSize - leftSize - midSize
|
||||
: deterministicRandom()->randomInt(SERVER_KNOBS->MIN_SHARD_BYTES, SERVER_KNOBS->MAX_SHARD_BYTES);
|
||||
|
||||
serverKeys.insert(innerRange, { serverKeys[left].status, (uint64_t)midSize });
|
||||
serverKeys[left].shardSize = leftSize;
|
||||
serverKeys[innerRange.end].shardSize = rightSize;
|
||||
}
|
||||
|
||||
// split the range [a,c) with split point b. The result would be [a, b), [b, c). The
|
||||
// size of the new shards are randomly split from old size of [a, c)
|
||||
void MockStorageServer::twoWayShardSplitting(KeyRangeRef range,
|
||||
KeyRef splitPoint,
|
||||
uint64_t rangeSize,
|
||||
bool restrictSize) {
|
||||
Key left = range.begin;
|
||||
// random generate 3 shard sizes, the caller guarantee that the min, max parameters are always valid.
|
||||
int leftSize = deterministicRandom()->randomInt(SERVER_KNOBS->MIN_SHARD_BYTES,
|
||||
restrictSize ? rangeSize - SERVER_KNOBS->MIN_SHARD_BYTES + 1
|
||||
: SERVER_KNOBS->MAX_SHARD_BYTES);
|
||||
int rightSize =
|
||||
restrictSize ? rangeSize - leftSize
|
||||
: deterministicRandom()->randomInt(SERVER_KNOBS->MIN_SHARD_BYTES, SERVER_KNOBS->MAX_SHARD_BYTES);
|
||||
serverKeys.rawInsert(splitPoint, { serverKeys[left].status, (uint64_t)rightSize });
|
||||
serverKeys[left].shardSize = leftSize;
|
||||
}
|
||||
|
||||
void MockStorageServer::removeShard(KeyRangeRef range) {
|
||||
auto ranges = serverKeys.containedRanges(range);
|
||||
ASSERT(ranges.begin().range() == range);
|
||||
serverKeys.rawErase(range);
|
||||
}
|
||||
|
||||
uint64_t MockStorageServer::sumRangeSize(KeyRangeRef range) const {
|
||||
auto ranges = serverKeys.intersectingRanges(range);
|
||||
uint64_t totalSize = 0;
|
||||
for (auto it = ranges.begin(); it != ranges.end(); ++it) {
|
||||
totalSize += it->cvalue().shardSize;
|
||||
}
|
||||
return totalSize;
|
||||
}
|
||||
|
||||
void MockGlobalState::initializeAsEmptyDatabaseMGS(const DatabaseConfiguration& conf, uint64_t defaultDiskSpace) {
|
||||
ASSERT(conf.storageTeamSize > 0);
|
||||
configuration = conf;
|
||||
std::vector<UID> serverIds;
|
||||
for (int i = 1; i <= conf.storageTeamSize; ++i) {
|
||||
UID id = indexToUID(i);
|
||||
serverIds.push_back(id);
|
||||
allServers[id] = MockStorageServer(id, defaultDiskSpace);
|
||||
allServers[id].serverKeys.insert(allKeys, { MockShardStatus::COMPLETED, 0 });
|
||||
}
|
||||
shardMapping->assignRangeToTeams(allKeys, { Team(serverIds, true) });
|
||||
}
|
||||
|
||||
void MockGlobalState::addStorageServer(StorageServerInterface server, uint64_t diskSpace) {
|
||||
allServers[server.id()] = MockStorageServer(server, diskSpace);
|
||||
}
|
||||
|
||||
bool MockGlobalState::serverIsSourceForShard(const UID& serverId, KeyRangeRef shard, bool inFlightShard) {
|
||||
if (!allServers.count(serverId))
|
||||
return false;
|
||||
|
||||
// check serverKeys
|
||||
auto& mss = allServers.at(serverId);
|
||||
if (!mss.allShardStatusEqual(shard, MockShardStatus::COMPLETED)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// check keyServers
|
||||
auto teams = shardMapping->getTeamsFor(shard);
|
||||
if (inFlightShard) {
|
||||
return std::any_of(teams.second.begin(), teams.second.end(), [&serverId](const Team& team) {
|
||||
return team.hasServer(serverId);
|
||||
});
|
||||
}
|
||||
return std::any_of(
|
||||
teams.first.begin(), teams.first.end(), [&serverId](const Team& team) { return team.hasServer(serverId); });
|
||||
}
|
||||
|
||||
bool MockGlobalState::serverIsDestForShard(const UID& serverId, KeyRangeRef shard) {
|
||||
if (!allServers.count(serverId))
|
||||
return false;
|
||||
|
||||
// check serverKeys
|
||||
auto& mss = allServers.at(serverId);
|
||||
if (!mss.allShardStatusEqual(shard, MockShardStatus::INFLIGHT)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// check keyServers
|
||||
auto teams = shardMapping->getTeamsFor(shard);
|
||||
return !teams.second.empty() && std::any_of(teams.first.begin(), teams.first.end(), [&serverId](const Team& team) {
|
||||
return team.hasServer(serverId);
|
||||
});
|
||||
}
|
||||
|
||||
bool MockGlobalState::allShardRemovedFromServer(const UID& serverId) {
|
||||
return allServers.count(serverId) && shardMapping->getNumberOfShards(serverId) == 0;
|
||||
}
|
||||
|
||||
TEST_CASE("/MockGlobalState/initializeAsEmptyDatabaseMGS/SimpleThree") {
|
||||
BasicTestConfig testConfig;
|
||||
testConfig.simpleConfig = true;
|
||||
testConfig.minimumReplication = 3;
|
||||
testConfig.logAntiQuorum = 0;
|
||||
DatabaseConfiguration dbConfig = generateNormalDatabaseConfiguration(testConfig);
|
||||
TraceEvent("UnitTestDbConfig").detail("Config", dbConfig.toString());
|
||||
|
||||
auto mgs = std::make_shared<MockGlobalState>();
|
||||
mgs->initializeAsEmptyDatabaseMGS(dbConfig);
|
||||
for (int i = 1; i <= dbConfig.storageTeamSize; ++i) {
|
||||
auto id = MockGlobalState::indexToUID(i);
|
||||
std::cout << "Check server " << i << "\n";
|
||||
ASSERT(mgs->serverIsSourceForShard(id, allKeys));
|
||||
ASSERT(mgs->allServers.at(id).sumRangeSize(allKeys) == 0);
|
||||
}
|
||||
|
||||
return Void();
|
||||
}
|
||||
|
||||
struct MockGlobalStateTester {
|
||||
|
||||
// expectation [r0.begin, r0.end) => [r0.begin, x1), [x1, x2), [x2, r0.end)
|
||||
void testThreeWaySplitFirstRange(MockStorageServer& mss) {
|
||||
auto it = mss.serverKeys.ranges().begin();
|
||||
uint64_t oldSize =
|
||||
deterministicRandom()->randomInt(SERVER_KNOBS->MIN_SHARD_BYTES, std::numeric_limits<int>::max());
|
||||
MockShardStatus oldStatus = it.cvalue().status;
|
||||
it->value().shardSize = oldSize;
|
||||
KeyRangeRef outerRange = it->range();
|
||||
Key x1 = keyAfter(it->range().begin);
|
||||
Key x2 = keyAfter(x1);
|
||||
std::cout << "it->range.begin: " << it->range().begin.toHexString() << " size: " << oldSize << "\n";
|
||||
|
||||
mss.threeWayShardSplitting(outerRange, KeyRangeRef(x1, x2), oldSize, false);
|
||||
auto ranges = mss.serverKeys.containedRanges(outerRange);
|
||||
ASSERT(ranges.begin().range() == KeyRangeRef(outerRange.begin, x1));
|
||||
ranges.pop_front();
|
||||
ASSERT(ranges.begin().range() == KeyRangeRef(x1, x2));
|
||||
ASSERT(ranges.begin().cvalue().status == oldStatus);
|
||||
ranges.pop_front();
|
||||
ASSERT(ranges.begin().range() == KeyRangeRef(x2, outerRange.end));
|
||||
ranges.pop_front();
|
||||
ASSERT(ranges.empty());
|
||||
}
|
||||
|
||||
// expectation [r0.begin, r0.end) => [r0.begin, x1), [x1, r0.end)
|
||||
void testTwoWaySplitFirstRange(MockStorageServer& mss) {
|
||||
auto it = mss.serverKeys.nthRange(0);
|
||||
MockShardStatus oldStatus = it.cvalue().status;
|
||||
uint64_t oldSize =
|
||||
deterministicRandom()->randomInt(SERVER_KNOBS->MIN_SHARD_BYTES, std::numeric_limits<int>::max());
|
||||
it->value().shardSize = oldSize;
|
||||
KeyRangeRef outerRange = it->range();
|
||||
Key x1 = keyAfter(it->range().begin);
|
||||
std::cout << "it->range.begin: " << it->range().begin.toHexString() << " size: " << oldSize << "\n";
|
||||
|
||||
mss.twoWayShardSplitting(it->range(), x1, oldSize, false);
|
||||
auto ranges = mss.serverKeys.containedRanges(outerRange);
|
||||
ASSERT(ranges.begin().range() == KeyRangeRef(outerRange.begin, x1));
|
||||
ranges.pop_front();
|
||||
ASSERT(ranges.begin().range() == KeyRangeRef(x1, outerRange.end));
|
||||
ASSERT(ranges.begin().cvalue().status == oldStatus);
|
||||
ranges.pop_front();
|
||||
ASSERT(ranges.empty());
|
||||
}
|
||||
};
|
||||
|
||||
TEST_CASE("/MockGlobalState/MockStorageServer/SplittingFunctions") {
|
||||
BasicTestConfig testConfig;
|
||||
testConfig.simpleConfig = true;
|
||||
testConfig.minimumReplication = 1;
|
||||
testConfig.logAntiQuorum = 0;
|
||||
DatabaseConfiguration dbConfig = generateNormalDatabaseConfiguration(testConfig);
|
||||
TraceEvent("UnitTestDbConfig").detail("Config", dbConfig.toString());
|
||||
|
||||
auto mgs = std::make_shared<MockGlobalState>();
|
||||
mgs->initializeAsEmptyDatabaseMGS(dbConfig);
|
||||
|
||||
MockGlobalStateTester tester;
|
||||
auto& mss = mgs->allServers.at(MockGlobalState::indexToUID(1));
|
||||
std::cout << "Test 3-way splitting...\n";
|
||||
tester.testThreeWaySplitFirstRange(mss);
|
||||
std::cout << "Test 2-way splitting...\n";
|
||||
mss.serverKeys.insert(allKeys, { MockShardStatus::COMPLETED, 0 }); // reset to empty
|
||||
tester.testTwoWaySplitFirstRange(mss);
|
||||
|
||||
return Void();
|
||||
}
|
|
@ -447,10 +447,10 @@ struct LogData : NonCopyable, public ReferenceCounted<LogData> {
|
|||
"Restored");
|
||||
addActor.send(traceRole(Role::TRANSACTION_LOG, interf.id()));
|
||||
|
||||
persistentDataVersion.init("TLog.PersistentDataVersion"_sr, cc.id);
|
||||
persistentDataDurableVersion.init("TLog.PersistentDataDurableVersion"_sr, cc.id);
|
||||
version.initMetric("TLog.Version"_sr, cc.id);
|
||||
queueCommittedVersion.initMetric("TLog.QueueCommittedVersion"_sr, cc.id);
|
||||
persistentDataVersion.init("TLog.PersistentDataVersion"_sr, cc.getId());
|
||||
persistentDataDurableVersion.init("TLog.PersistentDataDurableVersion"_sr, cc.getId());
|
||||
version.initMetric("TLog.Version"_sr, cc.getId());
|
||||
queueCommittedVersion.initMetric("TLog.QueueCommittedVersion"_sr, cc.getId());
|
||||
|
||||
specialCounter(cc, "Version", [this]() { return this->version.get(); });
|
||||
specialCounter(cc, "SharedBytesInput", [tLogData]() { return tLogData->bytesInput; });
|
||||
|
@ -1399,26 +1399,26 @@ ACTOR Future<Void> tLogCore(TLogData* self, Reference<LogData> logData) {
|
|||
logData->addActor.send(waitFailureServer(logData->tli.waitFailure.getFuture()));
|
||||
logData->addActor.send(logData->removed);
|
||||
// FIXME: update tlogMetrics to include new information, or possibly only have one copy for the shared instance
|
||||
logData->addActor.send(traceCounters("TLogMetrics",
|
||||
logData->logId,
|
||||
SERVER_KNOBS->STORAGE_LOGGING_DELAY,
|
||||
&logData->cc,
|
||||
logData->logId.toString() + "/TLogMetrics",
|
||||
[self = self](TraceEvent& te) {
|
||||
StorageBytes sbTlog = self->persistentData->getStorageBytes();
|
||||
te.detail("KvstoreBytesUsed", sbTlog.used);
|
||||
te.detail("KvstoreBytesFree", sbTlog.free);
|
||||
te.detail("KvstoreBytesAvailable", sbTlog.available);
|
||||
te.detail("KvstoreBytesTotal", sbTlog.total);
|
||||
te.detail("KvstoreBytesTemp", sbTlog.temp);
|
||||
logData->addActor.send(logData->cc.traceCounters("TLogMetrics",
|
||||
logData->logId,
|
||||
SERVER_KNOBS->STORAGE_LOGGING_DELAY,
|
||||
logData->logId.toString() + "/TLogMetrics",
|
||||
[self = self](TraceEvent& te) {
|
||||
StorageBytes sbTlog = self->persistentData->getStorageBytes();
|
||||
te.detail("KvstoreBytesUsed", sbTlog.used);
|
||||
te.detail("KvstoreBytesFree", sbTlog.free);
|
||||
te.detail("KvstoreBytesAvailable", sbTlog.available);
|
||||
te.detail("KvstoreBytesTotal", sbTlog.total);
|
||||
te.detail("KvstoreBytesTemp", sbTlog.temp);
|
||||
|
||||
StorageBytes sbQueue = self->rawPersistentQueue->getStorageBytes();
|
||||
te.detail("QueueDiskBytesUsed", sbQueue.used);
|
||||
te.detail("QueueDiskBytesFree", sbQueue.free);
|
||||
te.detail("QueueDiskBytesAvailable", sbQueue.available);
|
||||
te.detail("QueueDiskBytesTotal", sbQueue.total);
|
||||
te.detail("QueueDiskBytesTemp", sbQueue.temp);
|
||||
}));
|
||||
StorageBytes sbQueue =
|
||||
self->rawPersistentQueue->getStorageBytes();
|
||||
te.detail("QueueDiskBytesUsed", sbQueue.used);
|
||||
te.detail("QueueDiskBytesFree", sbQueue.free);
|
||||
te.detail("QueueDiskBytesAvailable", sbQueue.available);
|
||||
te.detail("QueueDiskBytesTotal", sbQueue.total);
|
||||
te.detail("QueueDiskBytesTemp", sbQueue.temp);
|
||||
}));
|
||||
|
||||
logData->addActor.send(serveTLogInterface(self, logData->tli, logData, warningCollectorInput));
|
||||
|
||||
|
|
|
@ -533,10 +533,10 @@ struct LogData : NonCopyable, public ReferenceCounted<LogData> {
|
|||
context);
|
||||
addActor.send(traceRole(Role::TRANSACTION_LOG, interf.id()));
|
||||
|
||||
persistentDataVersion.init("TLog.PersistentDataVersion"_sr, cc.id);
|
||||
persistentDataDurableVersion.init("TLog.PersistentDataDurableVersion"_sr, cc.id);
|
||||
version.initMetric("TLog.Version"_sr, cc.id);
|
||||
queueCommittedVersion.initMetric("TLog.QueueCommittedVersion"_sr, cc.id);
|
||||
persistentDataVersion.init("TLog.PersistentDataVersion"_sr, cc.getId());
|
||||
persistentDataDurableVersion.init("TLog.PersistentDataDurableVersion"_sr, cc.getId());
|
||||
version.initMetric("TLog.Version"_sr, cc.getId());
|
||||
queueCommittedVersion.initMetric("TLog.QueueCommittedVersion"_sr, cc.getId());
|
||||
|
||||
specialCounter(cc, "Version", [this]() { return this->version.get(); });
|
||||
specialCounter(cc, "QueueCommittedVersion", [this]() { return this->queueCommittedVersion.get(); });
|
||||
|
@ -2212,26 +2212,26 @@ ACTOR Future<Void> tLogCore(TLogData* self,
|
|||
logData->addActor.send(waitFailureServer(tli.waitFailure.getFuture()));
|
||||
logData->addActor.send(logData->removed);
|
||||
// FIXME: update tlogMetrics to include new information, or possibly only have one copy for the shared instance
|
||||
logData->addActor.send(traceCounters("TLogMetrics",
|
||||
logData->logId,
|
||||
SERVER_KNOBS->STORAGE_LOGGING_DELAY,
|
||||
&logData->cc,
|
||||
logData->logId.toString() + "/TLogMetrics",
|
||||
[self = self](TraceEvent& te) {
|
||||
StorageBytes sbTlog = self->persistentData->getStorageBytes();
|
||||
te.detail("KvstoreBytesUsed", sbTlog.used);
|
||||
te.detail("KvstoreBytesFree", sbTlog.free);
|
||||
te.detail("KvstoreBytesAvailable", sbTlog.available);
|
||||
te.detail("KvstoreBytesTotal", sbTlog.total);
|
||||
te.detail("KvstoreBytesTemp", sbTlog.temp);
|
||||
logData->addActor.send(logData->cc.traceCounters("TLogMetrics",
|
||||
logData->logId,
|
||||
SERVER_KNOBS->STORAGE_LOGGING_DELAY,
|
||||
logData->logId.toString() + "/TLogMetrics",
|
||||
[self = self](TraceEvent& te) {
|
||||
StorageBytes sbTlog = self->persistentData->getStorageBytes();
|
||||
te.detail("KvstoreBytesUsed", sbTlog.used);
|
||||
te.detail("KvstoreBytesFree", sbTlog.free);
|
||||
te.detail("KvstoreBytesAvailable", sbTlog.available);
|
||||
te.detail("KvstoreBytesTotal", sbTlog.total);
|
||||
te.detail("KvstoreBytesTemp", sbTlog.temp);
|
||||
|
||||
StorageBytes sbQueue = self->rawPersistentQueue->getStorageBytes();
|
||||
te.detail("QueueDiskBytesUsed", sbQueue.used);
|
||||
te.detail("QueueDiskBytesFree", sbQueue.free);
|
||||
te.detail("QueueDiskBytesAvailable", sbQueue.available);
|
||||
te.detail("QueueDiskBytesTotal", sbQueue.total);
|
||||
te.detail("QueueDiskBytesTemp", sbQueue.temp);
|
||||
}));
|
||||
StorageBytes sbQueue =
|
||||
self->rawPersistentQueue->getStorageBytes();
|
||||
te.detail("QueueDiskBytesUsed", sbQueue.used);
|
||||
te.detail("QueueDiskBytesFree", sbQueue.free);
|
||||
te.detail("QueueDiskBytesAvailable", sbQueue.available);
|
||||
te.detail("QueueDiskBytesTotal", sbQueue.total);
|
||||
te.detail("QueueDiskBytesTemp", sbQueue.temp);
|
||||
}));
|
||||
|
||||
logData->addActor.send(serveTLogInterface(self, tli, logData, warningCollectorInput));
|
||||
logData->addActor.send(cleanupPeekTrackers(logData.getPtr()));
|
||||
|
|
|
@ -616,10 +616,10 @@ struct LogData : NonCopyable, public ReferenceCounted<LogData> {
|
|||
context);
|
||||
addActor.send(traceRole(Role::TRANSACTION_LOG, interf.id()));
|
||||
|
||||
persistentDataVersion.init("TLog.PersistentDataVersion"_sr, cc.id);
|
||||
persistentDataDurableVersion.init("TLog.PersistentDataDurableVersion"_sr, cc.id);
|
||||
version.initMetric("TLog.Version"_sr, cc.id);
|
||||
queueCommittedVersion.initMetric("TLog.QueueCommittedVersion"_sr, cc.id);
|
||||
persistentDataVersion.init("TLog.PersistentDataVersion"_sr, cc.getId());
|
||||
persistentDataDurableVersion.init("TLog.PersistentDataDurableVersion"_sr, cc.getId());
|
||||
version.initMetric("TLog.Version"_sr, cc.getId());
|
||||
queueCommittedVersion.initMetric("TLog.QueueCommittedVersion"_sr, cc.getId());
|
||||
|
||||
specialCounter(cc, "Version", [this]() { return this->version.get(); });
|
||||
specialCounter(cc, "QueueCommittedVersion", [this]() { return this->queueCommittedVersion.get(); });
|
||||
|
@ -2671,26 +2671,26 @@ ACTOR Future<Void> tLogCore(TLogData* self,
|
|||
logData->addActor.send(waitFailureServer(tli.waitFailure.getFuture()));
|
||||
logData->addActor.send(logData->removed);
|
||||
// FIXME: update tlogMetrics to include new information, or possibly only have one copy for the shared instance
|
||||
logData->addActor.send(traceCounters("TLogMetrics",
|
||||
logData->logId,
|
||||
SERVER_KNOBS->STORAGE_LOGGING_DELAY,
|
||||
&logData->cc,
|
||||
logData->logId.toString() + "/TLogMetrics",
|
||||
[self = self](TraceEvent& te) {
|
||||
StorageBytes sbTlog = self->persistentData->getStorageBytes();
|
||||
te.detail("KvstoreBytesUsed", sbTlog.used);
|
||||
te.detail("KvstoreBytesFree", sbTlog.free);
|
||||
te.detail("KvstoreBytesAvailable", sbTlog.available);
|
||||
te.detail("KvstoreBytesTotal", sbTlog.total);
|
||||
te.detail("KvstoreBytesTemp", sbTlog.temp);
|
||||
logData->addActor.send(logData->cc.traceCounters("TLogMetrics",
|
||||
logData->logId,
|
||||
SERVER_KNOBS->STORAGE_LOGGING_DELAY,
|
||||
logData->logId.toString() + "/TLogMetrics",
|
||||
[self = self](TraceEvent& te) {
|
||||
StorageBytes sbTlog = self->persistentData->getStorageBytes();
|
||||
te.detail("KvstoreBytesUsed", sbTlog.used);
|
||||
te.detail("KvstoreBytesFree", sbTlog.free);
|
||||
te.detail("KvstoreBytesAvailable", sbTlog.available);
|
||||
te.detail("KvstoreBytesTotal", sbTlog.total);
|
||||
te.detail("KvstoreBytesTemp", sbTlog.temp);
|
||||
|
||||
StorageBytes sbQueue = self->rawPersistentQueue->getStorageBytes();
|
||||
te.detail("QueueDiskBytesUsed", sbQueue.used);
|
||||
te.detail("QueueDiskBytesFree", sbQueue.free);
|
||||
te.detail("QueueDiskBytesAvailable", sbQueue.available);
|
||||
te.detail("QueueDiskBytesTotal", sbQueue.total);
|
||||
te.detail("QueueDiskBytesTemp", sbQueue.temp);
|
||||
}));
|
||||
StorageBytes sbQueue =
|
||||
self->rawPersistentQueue->getStorageBytes();
|
||||
te.detail("QueueDiskBytesUsed", sbQueue.used);
|
||||
te.detail("QueueDiskBytesFree", sbQueue.free);
|
||||
te.detail("QueueDiskBytesAvailable", sbQueue.available);
|
||||
te.detail("QueueDiskBytesTotal", sbQueue.total);
|
||||
te.detail("QueueDiskBytesTemp", sbQueue.temp);
|
||||
}));
|
||||
|
||||
logData->addActor.send(serveTLogInterface(self, tli, logData, warningCollectorInput));
|
||||
logData->addActor.send(cleanupPeekTrackers(logData.getPtr()));
|
||||
|
|
|
@ -35,7 +35,7 @@
|
|||
#include "fdbserver/ResolverInterface.h"
|
||||
#include "fdbserver/RestoreUtil.h"
|
||||
#include "fdbserver/ServerDBInfo.h"
|
||||
#include "fdbserver/StorageMetrics.h"
|
||||
#include "fdbserver/StorageMetrics.actor.h"
|
||||
#include "fdbserver/WaitFailure.h"
|
||||
#include "fdbserver/WorkerInterface.actor.h"
|
||||
#include "flow/ActorCollection.h"
|
||||
|
@ -188,7 +188,7 @@ struct Resolver : ReferenceCounted<Resolver> {
|
|||
specialCounter(cc, "NeededVersion", [this]() { return this->neededVersion.get(); });
|
||||
specialCounter(cc, "TotalStateBytes", [this]() { return this->totalStateBytes.get(); });
|
||||
|
||||
logger = traceCounters("ResolverMetrics", dbgid, SERVER_KNOBS->WORKER_LOGGING_INTERVAL, &cc, "ResolverMetrics");
|
||||
logger = cc.traceCounters("ResolverMetrics", dbgid, SERVER_KNOBS->WORKER_LOGGING_INTERVAL, "ResolverMetrics");
|
||||
}
|
||||
~Resolver() { destroyConflictSet(conflictSet); }
|
||||
};
|
||||
|
|
|
@ -30,7 +30,7 @@
|
|||
#include "fdbserver/RestoreLoader.actor.h"
|
||||
#include "fdbserver/RestoreRoleCommon.actor.h"
|
||||
#include "fdbserver/MutationTracking.h"
|
||||
#include "fdbserver/StorageMetrics.h"
|
||||
#include "fdbserver/StorageMetrics.actor.h"
|
||||
|
||||
#include "flow/actorcompiler.h" // This must be the last #include.
|
||||
|
||||
|
@ -405,10 +405,6 @@ ACTOR static Future<Void> _parsePartitionedLogFileOnLoader(
|
|||
.detail("Offset", asset.offset)
|
||||
.detail("Length", asset.len);
|
||||
|
||||
// Ensure data blocks in the same file are processed in order
|
||||
wait(processedFileOffset->whenAtLeast(asset.offset));
|
||||
ASSERT(processedFileOffset->get() == asset.offset);
|
||||
|
||||
state Arena tempArena;
|
||||
state StringRefReader reader(buf, restore_corrupted_data());
|
||||
try {
|
||||
|
@ -430,8 +426,9 @@ ACTOR static Future<Void> _parsePartitionedLogFileOnLoader(
|
|||
const uint8_t* message = reader.consume(msgSize);
|
||||
|
||||
// Skip mutations out of the version range
|
||||
if (!asset.isInVersionRange(msgVersion.version))
|
||||
if (!asset.isInVersionRange(msgVersion.version)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
state VersionedMutationsMap::iterator it;
|
||||
bool inserted;
|
||||
|
@ -452,6 +449,7 @@ ACTOR static Future<Void> _parsePartitionedLogFileOnLoader(
|
|||
// Skip mutation whose commitVesion < range kv's version
|
||||
if (logMutationTooOld(pRangeVersions, mutation, msgVersion.version)) {
|
||||
cc->oldLogMutations += 1;
|
||||
wait(yield()); // avoid potential stack overflows
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -459,6 +457,7 @@ ACTOR static Future<Void> _parsePartitionedLogFileOnLoader(
|
|||
if (mutation.param1 >= asset.range.end ||
|
||||
(isRangeMutation(mutation) && mutation.param2 < asset.range.begin) ||
|
||||
(!isRangeMutation(mutation) && mutation.param1 < asset.range.begin)) {
|
||||
wait(yield()); // avoid potential stack overflows
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -509,7 +508,6 @@ ACTOR static Future<Void> _parsePartitionedLogFileOnLoader(
|
|||
.detail("BlockLen", asset.len);
|
||||
throw;
|
||||
}
|
||||
processedFileOffset->set(asset.offset + asset.len);
|
||||
return Void();
|
||||
}
|
||||
|
||||
|
@ -526,8 +524,19 @@ ACTOR static Future<Void> parsePartitionedLogFileOnLoader(
|
|||
state int readFileRetries = 0;
|
||||
loop {
|
||||
try {
|
||||
// Ensure data blocks in the same file are processed in order
|
||||
wait(processedFileOffset->whenAtLeast(asset.offset));
|
||||
ASSERT(processedFileOffset->get() == asset.offset);
|
||||
|
||||
wait(_parsePartitionedLogFileOnLoader(
|
||||
pRangeVersions, processedFileOffset, kvOpsIter, samplesIter, cc, bc, asset, cx));
|
||||
processedFileOffset->set(asset.offset + asset.len);
|
||||
|
||||
TraceEvent("FastRestoreLoaderDecodingLogFileDone")
|
||||
.detail("BatchIndex", asset.batchIndex)
|
||||
.detail("Filename", asset.filename)
|
||||
.detail("Offset", asset.offset)
|
||||
.detail("Length", asset.len);
|
||||
break;
|
||||
} catch (Error& e) {
|
||||
if (e.code() == error_code_restore_bad_read || e.code() == error_code_restore_unsupported_file_version ||
|
||||
|
|
|
@ -40,10 +40,16 @@ int ShardsAffectedByTeamFailure::getNumberOfShards(UID ssID) const {
|
|||
}
|
||||
|
||||
std::pair<std::vector<ShardsAffectedByTeamFailure::Team>, std::vector<ShardsAffectedByTeamFailure::Team>>
|
||||
ShardsAffectedByTeamFailure::getTeamsFor(KeyRangeRef keys) {
|
||||
ShardsAffectedByTeamFailure::getTeamsForFirstShard(KeyRangeRef keys) {
|
||||
return shard_teams[keys.begin];
|
||||
}
|
||||
|
||||
std::pair<std::vector<ShardsAffectedByTeamFailure::Team>, std::vector<ShardsAffectedByTeamFailure::Team>>
|
||||
|
||||
ShardsAffectedByTeamFailure::getTeamsFor(KeyRef key) {
|
||||
return shard_teams[key];
|
||||
}
|
||||
|
||||
void ShardsAffectedByTeamFailure::erase(Team team, KeyRange const& range) {
|
||||
DisabledTraceEvent(SevDebug, "ShardsAffectedByTeamFailureErase")
|
||||
.detail("Range", range)
|
||||
|
@ -236,3 +242,7 @@ void ShardsAffectedByTeamFailure::removeFailedServerForRange(KeyRangeRef keys, c
|
|||
}
|
||||
check();
|
||||
}
|
||||
|
||||
auto ShardsAffectedByTeamFailure::intersectingRanges(KeyRangeRef keyRange) const -> decltype(shard_teams)::ConstRanges {
|
||||
return shard_teams.intersectingRanges(keyRange);
|
||||
}
|
||||
|
|
|
@ -166,8 +166,8 @@ public:
|
|||
successfulChangeRequest("SuccessfulChangeRequest", cc), failedChangeRequest("FailedChangeRequest", cc),
|
||||
snapshotRequest("SnapshotRequest", cc) {
|
||||
cfi = getConfigFollowerInterface(configSource);
|
||||
logger = traceCounters(
|
||||
"ConfigConsumerMetrics", id, SERVER_KNOBS->WORKER_LOGGING_INTERVAL, &cc, "ConfigConsumerMetrics");
|
||||
logger = cc.traceCounters(
|
||||
"ConfigConsumerMetrics", id, SERVER_KNOBS->WORKER_LOGGING_INTERVAL, "ConfigConsumerMetrics");
|
||||
}
|
||||
|
||||
Future<Void> consume(ConfigBroadcaster& broadcaster) {
|
||||
|
|
|
@ -248,9 +248,9 @@ public:
|
|||
lastTLogVersion(0), lastVersionWithData(0), peekVersion(0), compactionInProgress(Void()),
|
||||
fetchKeysParallelismLock(SERVER_KNOBS->FETCH_KEYS_PARALLELISM_BYTES), debug_inApplyUpdate(false),
|
||||
debug_lastValidateTime(0), versionLag(0), behind(false), counters(this) {
|
||||
version.initMetric("StorageCacheData.Version"_sr, counters.cc.id);
|
||||
desiredOldestVersion.initMetric("StorageCacheData.DesriedOldestVersion"_sr, counters.cc.id);
|
||||
oldestVersion.initMetric("StorageCacheData.OldestVersion"_sr, counters.cc.id);
|
||||
version.initMetric("StorageCacheData.Version"_sr, counters.cc.getId());
|
||||
desiredOldestVersion.initMetric("StorageCacheData.DesriedOldestVersion"_sr, counters.cc.getId());
|
||||
oldestVersion.initMetric("StorageCacheData.OldestVersion"_sr, counters.cc.getId());
|
||||
|
||||
newestAvailableVersion.insert(allKeys, invalidVersion);
|
||||
newestDirtyVersion.insert(allKeys, invalidVersion);
|
||||
|
@ -2224,11 +2224,10 @@ ACTOR Future<Void> storageCacheServer(StorageServerInterface ssi,
|
|||
self.ck = cacheKeysPrefixFor(id).withPrefix(systemKeys.begin); // FFFF/02cacheKeys/[this server]/
|
||||
|
||||
actors.add(waitFailureServer(ssi.waitFailure.getFuture()));
|
||||
actors.add(traceCounters("CacheMetrics",
|
||||
self.thisServerID,
|
||||
SERVER_KNOBS->STORAGE_LOGGING_DELAY,
|
||||
&self.counters.cc,
|
||||
self.thisServerID.toString() + "/CacheMetrics"));
|
||||
actors.add(self.counters.cc.traceCounters("CacheMetrics",
|
||||
self.thisServerID,
|
||||
SERVER_KNOBS->STORAGE_LOGGING_DELAY,
|
||||
self.thisServerID.toString() + "/CacheMetrics"));
|
||||
|
||||
// fetch already cached ranges from the database and apply them before proceeding
|
||||
wait(storageCacheStartUpWarmup(&self));
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
*/
|
||||
|
||||
#include "flow/UnitTest.h"
|
||||
#include "fdbserver/StorageMetrics.h"
|
||||
#include "fdbserver/StorageMetrics.actor.h"
|
||||
#include "flow/actorcompiler.h" // This must be the last #include.
|
||||
|
||||
int64_t StorageMetricSample::getEstimate(KeyRangeRef keys) const {
|
||||
|
|
|
@ -652,10 +652,10 @@ struct LogData : NonCopyable, public ReferenceCounted<LogData> {
|
|||
context);
|
||||
addActor.send(traceRole(Role::TRANSACTION_LOG, interf.id()));
|
||||
|
||||
persistentDataVersion.init("TLog.PersistentDataVersion"_sr, cc.id);
|
||||
persistentDataDurableVersion.init("TLog.PersistentDataDurableVersion"_sr, cc.id);
|
||||
version.initMetric("TLog.Version"_sr, cc.id);
|
||||
queueCommittedVersion.initMetric("TLog.QueueCommittedVersion"_sr, cc.id);
|
||||
persistentDataVersion.init("TLog.PersistentDataVersion"_sr, cc.getId());
|
||||
persistentDataDurableVersion.init("TLog.PersistentDataDurableVersion"_sr, cc.getId());
|
||||
version.initMetric("TLog.Version"_sr, cc.getId());
|
||||
queueCommittedVersion.initMetric("TLog.QueueCommittedVersion"_sr, cc.getId());
|
||||
|
||||
specialCounter(cc, "Version", [this]() { return this->version.get(); });
|
||||
specialCounter(cc, "QueueCommittedVersion", [this]() { return this->queueCommittedVersion.get(); });
|
||||
|
@ -2930,26 +2930,26 @@ ACTOR Future<Void> tLogCore(TLogData* self,
|
|||
logData->addActor.send(waitFailureServer(tli.waitFailure.getFuture()));
|
||||
logData->addActor.send(logData->removed);
|
||||
// FIXME: update tlogMetrics to include new information, or possibly only have one copy for the shared instance
|
||||
logData->addActor.send(traceCounters("TLogMetrics",
|
||||
logData->logId,
|
||||
SERVER_KNOBS->STORAGE_LOGGING_DELAY,
|
||||
&logData->cc,
|
||||
logData->logId.toString() + "/TLogMetrics",
|
||||
[self = self](TraceEvent& te) {
|
||||
StorageBytes sbTlog = self->persistentData->getStorageBytes();
|
||||
te.detail("KvstoreBytesUsed", sbTlog.used);
|
||||
te.detail("KvstoreBytesFree", sbTlog.free);
|
||||
te.detail("KvstoreBytesAvailable", sbTlog.available);
|
||||
te.detail("KvstoreBytesTotal", sbTlog.total);
|
||||
te.detail("KvstoreBytesTemp", sbTlog.temp);
|
||||
logData->addActor.send(logData->cc.traceCounters("TLogMetrics",
|
||||
logData->logId,
|
||||
SERVER_KNOBS->STORAGE_LOGGING_DELAY,
|
||||
logData->logId.toString() + "/TLogMetrics",
|
||||
[self = self](TraceEvent& te) {
|
||||
StorageBytes sbTlog = self->persistentData->getStorageBytes();
|
||||
te.detail("KvstoreBytesUsed", sbTlog.used);
|
||||
te.detail("KvstoreBytesFree", sbTlog.free);
|
||||
te.detail("KvstoreBytesAvailable", sbTlog.available);
|
||||
te.detail("KvstoreBytesTotal", sbTlog.total);
|
||||
te.detail("KvstoreBytesTemp", sbTlog.temp);
|
||||
|
||||
StorageBytes sbQueue = self->rawPersistentQueue->getStorageBytes();
|
||||
te.detail("QueueDiskBytesUsed", sbQueue.used);
|
||||
te.detail("QueueDiskBytesFree", sbQueue.free);
|
||||
te.detail("QueueDiskBytesAvailable", sbQueue.available);
|
||||
te.detail("QueueDiskBytesTotal", sbQueue.total);
|
||||
te.detail("QueueDiskBytesTemp", sbQueue.temp);
|
||||
}));
|
||||
StorageBytes sbQueue =
|
||||
self->rawPersistentQueue->getStorageBytes();
|
||||
te.detail("QueueDiskBytesUsed", sbQueue.used);
|
||||
te.detail("QueueDiskBytesFree", sbQueue.free);
|
||||
te.detail("QueueDiskBytesAvailable", sbQueue.available);
|
||||
te.detail("QueueDiskBytesTotal", sbQueue.total);
|
||||
te.detail("QueueDiskBytesTemp", sbQueue.temp);
|
||||
}));
|
||||
|
||||
logData->addActor.send(serveTLogInterface(self, tli, logData, warningCollectorInput));
|
||||
logData->addActor.send(cleanupPeekTrackers(logData.getPtr()));
|
||||
|
|
|
@ -122,19 +122,20 @@ public:
|
|||
ACTOR static Future<Void> monitorStorageUsage(TenantCache* tenantCache) {
|
||||
TraceEvent(SevInfo, "StartingTenantCacheStorageUsageMonitor", tenantCache->id()).log();
|
||||
|
||||
state int refreshInterval = SERVER_KNOBS->TENANT_CACHE_STORAGE_REFRESH_INTERVAL;
|
||||
state int refreshInterval = SERVER_KNOBS->TENANT_CACHE_STORAGE_USAGE_REFRESH_INTERVAL;
|
||||
state double lastTenantListFetchTime = now();
|
||||
|
||||
loop {
|
||||
state double fetchStartTime = now();
|
||||
state std::vector<std::pair<KeyRef, TenantName>> tenantList = tenantCache->getTenantList();
|
||||
state std::vector<TenantName> tenants = tenantCache->getTenantList();
|
||||
state int i;
|
||||
for (i = 0; i < tenantList.size(); i++) {
|
||||
state ReadYourWritesTransaction tr(tenantCache->dbcx(), tenantList[i].second);
|
||||
for (i = 0; i < tenants.size(); i++) {
|
||||
state ReadYourWritesTransaction tr(tenantCache->dbcx(), tenants[i]);
|
||||
loop {
|
||||
try {
|
||||
state int64_t size = wait(tr.getEstimatedRangeSizeBytes(normalKeys));
|
||||
tenantCache->updateStorageUsage(tenantList[i].first, size);
|
||||
tenantCache->tenantStorageMap[tenants[i]].usage = size;
|
||||
break;
|
||||
} catch (Error& e) {
|
||||
TraceEvent("TenantCacheGetStorageUsageError", tenantCache->id()).error(e);
|
||||
wait(tr.onError(e));
|
||||
|
@ -149,6 +150,31 @@ public:
|
|||
wait(delay(refreshInterval));
|
||||
}
|
||||
}
|
||||
|
||||
ACTOR static Future<Void> monitorStorageQuota(TenantCache* tenantCache) {
|
||||
TraceEvent(SevInfo, "StartingTenantCacheStorageQuotaMonitor", tenantCache->id()).log();
|
||||
|
||||
state Transaction tr(tenantCache->dbcx());
|
||||
|
||||
loop {
|
||||
loop {
|
||||
try {
|
||||
state RangeResult currentQuotas = wait(tr.getRange(storageQuotaKeys, CLIENT_KNOBS->TOO_MANY));
|
||||
for (auto const kv : currentQuotas) {
|
||||
TenantName const tenant = kv.key.removePrefix(storageQuotaPrefix);
|
||||
int64_t const quota = BinaryReader::fromStringRef<int64_t>(kv.value, Unversioned());
|
||||
tenantCache->tenantStorageMap[tenant].quota = quota;
|
||||
}
|
||||
tr.reset();
|
||||
break;
|
||||
} catch (Error& e) {
|
||||
TraceEvent("TenantCacheGetStorageQuotaError", tenantCache->id()).error(e);
|
||||
wait(tr.onError(e));
|
||||
}
|
||||
}
|
||||
wait(delay(SERVER_KNOBS->TENANT_CACHE_STORAGE_QUOTA_REFRESH_INTERVAL));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
void TenantCache::insert(TenantName& tenantName, TenantMapEntry& tenant) {
|
||||
|
@ -203,21 +229,14 @@ int TenantCache::cleanup() {
|
|||
return tenantsRemoved;
|
||||
}
|
||||
|
||||
std::vector<std::pair<KeyRef, TenantName>> TenantCache::getTenantList() const {
|
||||
std::vector<std::pair<KeyRef, TenantName>> tenants;
|
||||
std::vector<TenantName> TenantCache::getTenantList() const {
|
||||
std::vector<TenantName> tenants;
|
||||
for (const auto& [prefix, entry] : tenantCache) {
|
||||
tenants.push_back({ prefix, entry->name() });
|
||||
tenants.push_back(entry->name());
|
||||
}
|
||||
return tenants;
|
||||
}
|
||||
|
||||
void TenantCache::updateStorageUsage(KeyRef prefix, int64_t size) {
|
||||
auto it = tenantCache.find(prefix);
|
||||
if (it != tenantCache.end()) {
|
||||
it->value->updateStorageUsage(size);
|
||||
}
|
||||
}
|
||||
|
||||
std::string TenantCache::desc() const {
|
||||
std::string s("@Generation: ");
|
||||
s += std::to_string(generation) + " ";
|
||||
|
@ -264,6 +283,16 @@ Optional<Reference<TCTenantInfo>> TenantCache::tenantOwning(KeyRef key) const {
|
|||
return it->value;
|
||||
}
|
||||
|
||||
std::vector<TenantName> TenantCache::getTenantsOverQuota() const {
|
||||
std::vector<TenantName> tenants;
|
||||
for (const auto& [tenant, storage] : tenantStorageMap) {
|
||||
if (storage.usage > storage.quota) {
|
||||
tenants.push_back(tenant);
|
||||
}
|
||||
}
|
||||
return tenants;
|
||||
}
|
||||
|
||||
Future<Void> TenantCache::monitorTenantMap() {
|
||||
return TenantCacheImpl::monitorTenantMap(this);
|
||||
}
|
||||
|
@ -272,6 +301,10 @@ Future<Void> TenantCache::monitorStorageUsage() {
|
|||
return TenantCacheImpl::monitorStorageUsage(this);
|
||||
}
|
||||
|
||||
Future<Void> TenantCache::monitorStorageQuota() {
|
||||
return TenantCacheImpl::monitorStorageQuota(this);
|
||||
}
|
||||
|
||||
class TenantCacheUnitTest {
|
||||
public:
|
||||
ACTOR static Future<Void> InsertAndTestPresence() {
|
||||
|
|
|
@ -140,9 +140,27 @@ private:
|
|||
Future<Void> collection;
|
||||
};
|
||||
|
||||
// Defines granule info that interests full restore
|
||||
struct BlobGranuleRestoreVersion {
|
||||
// Two constructors required by VectorRef
|
||||
BlobGranuleRestoreVersion() {}
|
||||
BlobGranuleRestoreVersion(Arena& a, const BlobGranuleRestoreVersion& copyFrom)
|
||||
: granuleID(copyFrom.granuleID), keyRange(a, copyFrom.keyRange), version(copyFrom.version),
|
||||
sizeInBytes(copyFrom.sizeInBytes) {}
|
||||
|
||||
UID granuleID;
|
||||
KeyRangeRef keyRange;
|
||||
Version version;
|
||||
int64_t sizeInBytes;
|
||||
};
|
||||
|
||||
// Defines a vector for BlobGranuleVersion
|
||||
typedef Standalone<VectorRef<BlobGranuleRestoreVersion>> BlobGranuleRestoreVersionVector;
|
||||
|
||||
ACTOR Future<Void> dumpManifest(Database db, Reference<BlobConnectionProvider> blobConn, int64_t epoch, int64_t seqNo);
|
||||
ACTOR Future<Void> loadManifest(Database db, Reference<BlobConnectionProvider> blobConn);
|
||||
ACTOR Future<Void> printRestoreSummary(Database db, Reference<BlobConnectionProvider> blobConn);
|
||||
ACTOR Future<BlobGranuleRestoreVersionVector> listBlobGranules(Database db, Reference<BlobConnectionProvider> blobConn);
|
||||
inline bool isFullRestoreMode() {
|
||||
return SERVER_KNOBS->BLOB_FULL_RESTORE_MODE;
|
||||
};
|
||||
|
|
|
@ -30,23 +30,25 @@
|
|||
struct BlobMigratorInterface {
|
||||
constexpr static FileIdentifier file_identifier = 869199;
|
||||
RequestStream<struct HaltBlobMigratorRequest> haltBlobMigrator;
|
||||
RequestStream<ReplyPromise<Void>> waitFailure;
|
||||
LocalityData locality;
|
||||
UID uniqueID;
|
||||
StorageServerInterface ssi;
|
||||
|
||||
BlobMigratorInterface() {}
|
||||
BlobMigratorInterface(const struct LocalityData& l, UID id) : uniqueID(id), locality(l) {}
|
||||
BlobMigratorInterface(const struct LocalityData& l, UID id) : uniqueID(id), locality(l) {
|
||||
ssi.locality = l;
|
||||
ssi.uniqueID = id;
|
||||
}
|
||||
|
||||
void initEndpoints() {}
|
||||
void initEndpoints() { ssi.initEndpoints(); }
|
||||
UID id() const { return uniqueID; }
|
||||
NetworkAddress address() const { return waitFailure.getEndpoint().getPrimaryAddress(); }
|
||||
NetworkAddress address() const { return haltBlobMigrator.getEndpoint().getPrimaryAddress(); }
|
||||
bool operator==(const BlobMigratorInterface& r) const { return id() == r.id(); }
|
||||
bool operator!=(const BlobMigratorInterface& r) const { return !(*this == r); }
|
||||
|
||||
template <class Archive>
|
||||
void serialize(Archive& ar) {
|
||||
// StorageServerInterface::serialize(ar);
|
||||
serializer(ar, waitFailure, haltBlobMigrator, locality, uniqueID);
|
||||
serializer(ar, locality, uniqueID, haltBlobMigrator);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -289,11 +289,10 @@ struct ClusterRecoveryData : NonCopyable, ReferenceCounted<ClusterRecoveryData>
|
|||
getRecoveryEventName(ClusterRecoveryEventType::CLUSTER_RECOVERY_DURATION_EVENT_NAME));
|
||||
clusterRecoveryAvailableEventHolder = makeReference<EventCacheHolder>(
|
||||
getRecoveryEventName(ClusterRecoveryEventType::CLUSTER_RECOVERY_AVAILABLE_EVENT_NAME));
|
||||
logger = traceCounters(getRecoveryEventName(ClusterRecoveryEventType::CLUSTER_RECOVERY_METRICS_EVENT_NAME),
|
||||
dbgid,
|
||||
SERVER_KNOBS->WORKER_LOGGING_INTERVAL,
|
||||
&cc,
|
||||
getRecoveryEventName(ClusterRecoveryEventType::CLUSTER_RECOVERY_METRICS_EVENT_NAME));
|
||||
logger = cc.traceCounters(getRecoveryEventName(ClusterRecoveryEventType::CLUSTER_RECOVERY_METRICS_EVENT_NAME),
|
||||
dbgid,
|
||||
SERVER_KNOBS->WORKER_LOGGING_INTERVAL,
|
||||
getRecoveryEventName(ClusterRecoveryEventType::CLUSTER_RECOVERY_METRICS_EVENT_NAME));
|
||||
if (forceRecovery && !controllerData->clusterControllerDcId.present()) {
|
||||
TraceEvent(SevError, "ForcedRecoveryRequiresDcID").log();
|
||||
forceRecovery = false;
|
||||
|
|
|
@ -117,6 +117,7 @@ public:
|
|||
|
||||
virtual Future<Void> moveKeys(const MoveKeysParams& params) = 0;
|
||||
|
||||
// metrics.second is the number of key-ranges (i.e., shards) in the 'keys' key-range
|
||||
virtual Future<std::pair<Optional<StorageMetrics>, int>> waitStorageMetrics(KeyRange const& keys,
|
||||
StorageMetrics const& min,
|
||||
StorageMetrics const& max,
|
||||
|
|
|
@ -476,6 +476,8 @@ struct ShardSizeBounds {
|
|||
bool operator==(ShardSizeBounds const& rhs) const {
|
||||
return max == rhs.max && min == rhs.min && permittedError == rhs.permittedError;
|
||||
}
|
||||
|
||||
static ShardSizeBounds shardSizeBoundsBeforeTrack();
|
||||
};
|
||||
|
||||
// Gets the permitted size and IO bounds for a shard
|
||||
|
@ -484,10 +486,6 @@ ShardSizeBounds getShardSizeBounds(KeyRangeRef shard, int64_t maxShardSize);
|
|||
// Determines the maximum shard size based on the size of the database
|
||||
int64_t getMaxShardSize(double dbSizeEstimate);
|
||||
|
||||
struct StorageQuotaInfo {
|
||||
std::map<Key, uint64_t> quotaMap;
|
||||
};
|
||||
|
||||
#ifndef __INTEL_COMPILER
|
||||
#pragma endregion
|
||||
#endif
|
||||
|
|
|
@ -46,6 +46,7 @@ class GrvProxyTransactionTagThrottler {
|
|||
: req(req), startTime(now()), sequenceNumber(++lastSequenceNumber) {}
|
||||
|
||||
void updateProxyTagThrottledDuration();
|
||||
bool isMaxThrottled() const;
|
||||
};
|
||||
|
||||
struct TagQueue {
|
||||
|
@ -56,6 +57,8 @@ class GrvProxyTransactionTagThrottler {
|
|||
explicit TagQueue(double rate) : rateInfo(rate) {}
|
||||
|
||||
void setRate(double rate);
|
||||
bool isMaxThrottled() const;
|
||||
void rejectRequests();
|
||||
};
|
||||
|
||||
// Track the budgets for each tag
|
||||
|
@ -69,8 +72,8 @@ public:
|
|||
// If a request is ready to be executed, it is sent to the deque
|
||||
// corresponding to its priority. If not, the request remains queued.
|
||||
void releaseTransactions(double elapsed,
|
||||
SpannedDeque<GetReadVersionRequest>& outBatchPriority,
|
||||
SpannedDeque<GetReadVersionRequest>& outDefaultPriority);
|
||||
Deque<GetReadVersionRequest>& outBatchPriority,
|
||||
Deque<GetReadVersionRequest>& outDefaultPriority);
|
||||
|
||||
void addRequest(GetReadVersionRequest const&);
|
||||
|
||||
|
|
|
@ -29,7 +29,7 @@
|
|||
#include "fdbserver/IClosable.h"
|
||||
#include "fdbserver/IPageEncryptionKeyProvider.actor.h"
|
||||
#include "fdbserver/ServerDBInfo.h"
|
||||
#include "fdbserver/StorageMetrics.h"
|
||||
#include "fdbserver/StorageMetrics.actor.h"
|
||||
|
||||
struct CheckpointRequest {
|
||||
const Version version; // The FDB version at which the checkpoint is created.
|
||||
|
|
|
@ -21,10 +21,11 @@
|
|||
#ifndef FOUNDATIONDB_MOCKGLOBALSTATE_H
|
||||
#define FOUNDATIONDB_MOCKGLOBALSTATE_H
|
||||
|
||||
#include "StorageMetrics.h"
|
||||
#include "StorageMetrics.actor.h"
|
||||
#include "fdbclient/KeyRangeMap.h"
|
||||
#include "fdbclient/StorageServerInterface.h"
|
||||
#include "fdbclient/DatabaseConfiguration.h"
|
||||
#include "fdbclient/KeyLocationService.h"
|
||||
#include "SimulatedCluster.h"
|
||||
#include "ShardsAffectedByTeamFailure.h"
|
||||
|
||||
|
@ -51,9 +52,11 @@ inline bool isStatusTransitionValid(MockShardStatus from, MockShardStatus to) {
|
|||
return false;
|
||||
}
|
||||
|
||||
class MockStorageServer {
|
||||
class MockStorageServer : public IStorageMetricsService {
|
||||
friend struct MockGlobalStateTester;
|
||||
|
||||
ActorCollection actors;
|
||||
|
||||
public:
|
||||
struct ShardInfo {
|
||||
MockShardStatus status;
|
||||
|
@ -73,8 +76,6 @@ public:
|
|||
// size() and nthRange() would use the metrics as index instead
|
||||
KeyRangeMap<ShardInfo> serverKeys;
|
||||
|
||||
// sampled metrics
|
||||
StorageServerMetrics metrics;
|
||||
CoalescedKeyRangeMap<bool, int64_t, KeyBytesMetric<int64_t>> byteSampleClears;
|
||||
|
||||
StorageServerInterface ssi; // serve RPC requests
|
||||
|
@ -103,6 +104,35 @@ public:
|
|||
|
||||
uint64_t sumRangeSize(KeyRangeRef range) const;
|
||||
|
||||
void addActor(Future<Void> future) override;
|
||||
|
||||
void getSplitPoints(SplitRangeRequest const& req) override;
|
||||
|
||||
Future<Void> waitMetricsTenantAware(const WaitMetricsRequest& req) override;
|
||||
|
||||
void getStorageMetrics(const GetStorageMetricsRequest& req) override;
|
||||
|
||||
template <class Reply>
|
||||
static constexpr bool isLoadBalancedReply = std::is_base_of_v<LoadBalancedReply, Reply>;
|
||||
|
||||
template <class Reply>
|
||||
typename std::enable_if_t<isLoadBalancedReply<Reply>, void> sendErrorWithPenalty(const ReplyPromise<Reply>& promise,
|
||||
const Error& err,
|
||||
double penalty) {
|
||||
Reply reply;
|
||||
reply.error = err;
|
||||
reply.penalty = penalty;
|
||||
promise.send(reply);
|
||||
}
|
||||
|
||||
template <class Reply>
|
||||
typename std::enable_if_t<!isLoadBalancedReply<Reply>, void>
|
||||
sendErrorWithPenalty(const ReplyPromise<Reply>& promise, const Error& err, double) {
|
||||
promise.sendError(err);
|
||||
}
|
||||
|
||||
Future<Void> run();
|
||||
|
||||
protected:
|
||||
void threeWayShardSplitting(KeyRangeRef outerRange,
|
||||
KeyRangeRef innerRange,
|
||||
|
@ -112,8 +142,13 @@ protected:
|
|||
void twoWayShardSplitting(KeyRangeRef range, KeyRef splitPoint, uint64_t rangeSize, bool restrictSize);
|
||||
};
|
||||
|
||||
class MockGlobalState {
|
||||
class MockGlobalStateImpl;
|
||||
|
||||
class MockGlobalState : public IKeyLocationService {
|
||||
friend struct MockGlobalStateTester;
|
||||
friend class MockGlobalStateImpl;
|
||||
|
||||
std::vector<StorageServerInterface> extractStorageServerInterfaces(const std::vector<UID>& ids) const;
|
||||
|
||||
public:
|
||||
typedef ShardsAffectedByTeamFailure::Team Team;
|
||||
|
@ -162,7 +197,37 @@ public:
|
|||
* * mgs.shardMapping doesn’t have any information about X
|
||||
* * mgs.allServer[X] is existed
|
||||
*/
|
||||
bool allShardRemovedFromServer(const UID& serverId);
|
||||
bool allShardsRemovedFromServer(const UID& serverId);
|
||||
|
||||
// SOMEDAY: NativeAPI::waitStorageMetrics should share the code in the future, this is a simpler version of it
|
||||
Future<std::pair<Optional<StorageMetrics>, int>> waitStorageMetrics(KeyRange const& keys,
|
||||
StorageMetrics const& min,
|
||||
StorageMetrics const& max,
|
||||
StorageMetrics const& permittedError,
|
||||
int shardLimit,
|
||||
int expectedShardCount);
|
||||
|
||||
Future<Standalone<VectorRef<KeyRef>>> splitStorageMetrics(const KeyRange& keys,
|
||||
const StorageMetrics& limit,
|
||||
const StorageMetrics& estimated,
|
||||
const Optional<int>& minSplitBytes);
|
||||
|
||||
Future<KeyRangeLocationInfo> getKeyLocation(TenantInfo tenant,
|
||||
Key key,
|
||||
SpanContext spanContext,
|
||||
Optional<UID> debugID,
|
||||
UseProvisionalProxies useProvisionalProxies,
|
||||
Reverse isBackward,
|
||||
Version version) override;
|
||||
|
||||
Future<std::vector<KeyRangeLocationInfo>> getKeyRangeLocations(TenantInfo tenant,
|
||||
KeyRange keys,
|
||||
int limit,
|
||||
Reverse reverse,
|
||||
SpanContext spanContext,
|
||||
Optional<UID> debugID,
|
||||
UseProvisionalProxies useProvisionalProxies,
|
||||
Version version) override;
|
||||
};
|
||||
|
||||
#endif // FOUNDATIONDB_MOCKGLOBALSTATE_H
|
||||
|
|
|
@ -156,7 +156,7 @@ struct ProxyStats {
|
|||
specialCounter(cc, "NumTenants", [pTenantMap]() { return pTenantMap ? pTenantMap->size() : 0; });
|
||||
specialCounter(cc, "MaxCompute", [this]() { return this->getAndResetMaxCompute(); });
|
||||
specialCounter(cc, "MinCompute", [this]() { return this->getAndResetMinCompute(); });
|
||||
logger = traceCounters("ProxyMetrics", id, SERVER_KNOBS->WORKER_LOGGING_INTERVAL, &cc, "ProxyMetrics");
|
||||
logger = cc.traceCounters("ProxyMetrics", id, SERVER_KNOBS->WORKER_LOGGING_INTERVAL, "ProxyMetrics");
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -284,11 +284,11 @@ struct ApplierBatchData : public ReferenceCounted<ApplierBatchData> {
|
|||
: vbState(ApplierVersionBatchState::NOT_INIT), receiveMutationReqs(0), receivedBytes(0), appliedBytes(0),
|
||||
targetWriteRateMB(SERVER_KNOBS->FASTRESTORE_WRITE_BW_MB / SERVER_KNOBS->FASTRESTORE_NUM_APPLIERS),
|
||||
totalBytesToWrite(-1), applyingDataBytes(0), counters(this, nodeID, batchIndex) {
|
||||
pollMetrics = traceCounters(format("FastRestoreApplierMetrics%d", batchIndex),
|
||||
nodeID,
|
||||
SERVER_KNOBS->FASTRESTORE_ROLE_LOGGING_DELAY,
|
||||
&counters.cc,
|
||||
nodeID.toString() + "/RestoreApplierMetrics/" + std::to_string(batchIndex));
|
||||
pollMetrics =
|
||||
counters.cc.traceCounters(format("FastRestoreApplierMetrics%d", batchIndex),
|
||||
nodeID,
|
||||
SERVER_KNOBS->FASTRESTORE_ROLE_LOGGING_DELAY,
|
||||
nodeID.toString() + "/RestoreApplierMetrics/" + std::to_string(batchIndex));
|
||||
TraceEvent("FastRestoreApplierMetricsCreated").detail("Node", nodeID);
|
||||
}
|
||||
~ApplierBatchData() {
|
||||
|
|
|
@ -93,11 +93,11 @@ struct LoaderBatchData : public ReferenceCounted<LoaderBatchData> {
|
|||
|
||||
explicit LoaderBatchData(UID nodeID, int batchIndex)
|
||||
: vbState(LoaderVersionBatchState::NOT_INIT), loadFileReqs(0), counters(this, nodeID, batchIndex) {
|
||||
pollMetrics = traceCounters(format("FastRestoreLoaderMetrics%d", batchIndex),
|
||||
nodeID,
|
||||
SERVER_KNOBS->FASTRESTORE_ROLE_LOGGING_DELAY,
|
||||
&counters.cc,
|
||||
nodeID.toString() + "/RestoreLoaderMetrics/" + std::to_string(batchIndex));
|
||||
pollMetrics =
|
||||
counters.cc.traceCounters(format("FastRestoreLoaderMetrics%d", batchIndex),
|
||||
nodeID,
|
||||
SERVER_KNOBS->FASTRESTORE_ROLE_LOGGING_DELAY,
|
||||
nodeID.toString() + "/RestoreLoaderMetrics/" + std::to_string(batchIndex));
|
||||
TraceEvent("FastRestoreLoaderMetricsCreated").detail("Node", nodeID);
|
||||
}
|
||||
|
||||
|
|
|
@ -80,8 +80,12 @@ public:
|
|||
bool hasShards(Team team) const;
|
||||
|
||||
// The first element of the pair is either the source for non-moving shards or the destination team for in-flight
|
||||
// shards The second element of the pair is all previous sources for in-flight shards
|
||||
std::pair<std::vector<Team>, std::vector<Team>> getTeamsFor(KeyRangeRef keys);
|
||||
// shards. The second element of the pair is all previous sources for in-flight shards. This function only returns
|
||||
// the teams for the first shard in [keys.begin, keys.end)
|
||||
std::pair<std::vector<Team>, std::vector<Team>> getTeamsForFirstShard(KeyRangeRef keys);
|
||||
|
||||
std::pair<std::vector<Team>, std::vector<Team>> getTeamsFor(KeyRef key);
|
||||
|
||||
// Shard boundaries are modified in defineShard and the content of what servers correspond to each shard is a copy
|
||||
// or union of the shards already there
|
||||
void defineShard(KeyRangeRef keys);
|
||||
|
@ -124,6 +128,7 @@ private:
|
|||
public:
|
||||
// return the iterator that traversing all ranges
|
||||
auto getAllRanges() const -> decltype(shard_teams)::ConstRanges;
|
||||
auto intersectingRanges(KeyRangeRef keyRange) const -> decltype(shard_teams)::ConstRanges;
|
||||
// get total shards count
|
||||
size_t getNumberOfShards() const;
|
||||
void removeFailedServerForRange(KeyRangeRef keys, const UID& serverID);
|
||||
|
|
|
@ -19,13 +19,18 @@
|
|||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#if defined(NO_INTELLISENSE) && !defined(FDBSERVER_STORAGEMETRICS_G_H)
|
||||
#define FDBSERVER_STORAGEMETRICS_G_H
|
||||
#include "fdbserver/StorageMetrics.actor.g.h"
|
||||
#elif !defined(FDBSERVER_STORAGEMETRICS_H)
|
||||
#define FDBSERVER_STORAGEMETRICS_H
|
||||
#include "fdbclient/FDBTypes.h"
|
||||
#include "fdbrpc/simulator.h"
|
||||
#include "flow/UnitTest.h"
|
||||
#include "fdbclient/StorageServerInterface.h"
|
||||
#include "fdbclient/KeyRangeMap.h"
|
||||
#include "fdbserver/Knobs.h"
|
||||
#include "flow/actorcompiler.h"
|
||||
|
||||
const StringRef STORAGESERVER_HISTOGRAM_GROUP = "StorageServer"_sr;
|
||||
const StringRef FETCH_KEYS_LATENCY_HISTOGRAM = "FetchKeysLatency"_sr;
|
||||
|
@ -152,3 +157,76 @@ struct ByteSampleInfo {
|
|||
// Determines whether a key-value pair should be included in a byte sample
|
||||
// Also returns size information about the sample
|
||||
ByteSampleInfo isKeyValueInSample(KeyValueRef keyValue);
|
||||
|
||||
class IStorageMetricsService {
|
||||
public:
|
||||
StorageServerMetrics metrics;
|
||||
|
||||
// penalty used by loadBalance() to balance requests among service instances
|
||||
virtual double getPenalty() const { return 1; }
|
||||
|
||||
virtual bool isReadable(KeyRangeRef const& keys) const { return true; }
|
||||
|
||||
virtual void addActor(Future<Void> future) = 0;
|
||||
|
||||
virtual void getSplitPoints(SplitRangeRequest const& req) = 0;
|
||||
|
||||
virtual Future<Void> waitMetricsTenantAware(const WaitMetricsRequest& req) = 0;
|
||||
|
||||
virtual void getStorageMetrics(const GetStorageMetricsRequest& req) = 0;
|
||||
|
||||
// NOTE: also need to have this function but template can't be a virtual so...
|
||||
// template <class Reply>
|
||||
// void sendErrorWithPenalty(const ReplyPromise<Reply>& promise, const Error& err, double penalty);
|
||||
};
|
||||
|
||||
ACTOR template <class ServiceType>
|
||||
Future<Void> serveStorageMetricsRequests(ServiceType* self, StorageServerInterface ssi) {
|
||||
state Future<Void> doPollMetrics = Void();
|
||||
loop {
|
||||
choose {
|
||||
when(state WaitMetricsRequest req = waitNext(ssi.waitMetrics.getFuture())) {
|
||||
if (!req.tenantInfo.present() && !self->isReadable(req.keys)) {
|
||||
CODE_PROBE(true, "waitMetrics immediate wrong_shard_server()");
|
||||
self->sendErrorWithPenalty(req.reply, wrong_shard_server(), self->getPenalty());
|
||||
} else {
|
||||
self->addActor(self->waitMetricsTenantAware(req));
|
||||
}
|
||||
}
|
||||
when(SplitMetricsRequest req = waitNext(ssi.splitMetrics.getFuture())) {
|
||||
if (!self->isReadable(req.keys)) {
|
||||
CODE_PROBE(true, "splitMetrics immediate wrong_shard_server()");
|
||||
self->sendErrorWithPenalty(req.reply, wrong_shard_server(), self->getPenalty());
|
||||
} else {
|
||||
self->metrics.splitMetrics(req);
|
||||
}
|
||||
}
|
||||
when(GetStorageMetricsRequest req = waitNext(ssi.getStorageMetrics.getFuture())) {
|
||||
self->getStorageMetrics(req);
|
||||
}
|
||||
when(ReadHotSubRangeRequest req = waitNext(ssi.getReadHotRanges.getFuture())) {
|
||||
if (!self->isReadable(req.keys)) {
|
||||
CODE_PROBE(true, "readHotSubRanges immediate wrong_shard_server()", probe::decoration::rare);
|
||||
self->sendErrorWithPenalty(req.reply, wrong_shard_server(), self->getPenalty());
|
||||
} else {
|
||||
self->metrics.getReadHotRanges(req);
|
||||
}
|
||||
}
|
||||
when(SplitRangeRequest req = waitNext(ssi.getRangeSplitPoints.getFuture())) {
|
||||
if (!self->isReadable(req.keys)) {
|
||||
CODE_PROBE(true, "getSplitPoints immediate wrong_shard_server()");
|
||||
self->sendErrorWithPenalty(req.reply, wrong_shard_server(), self->getPenalty());
|
||||
} else {
|
||||
self->getSplitPoints(req);
|
||||
}
|
||||
}
|
||||
when(wait(doPollMetrics)) {
|
||||
self->metrics.poll();
|
||||
doPollMetrics = delay(SERVER_KNOBS->STORAGE_SERVER_POLL_METRICS_DELAY);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#include "flow/unactorcompiler.h"
|
||||
#endif // FDBSERVER_STORAGEMETRICS_H
|
|
@ -268,5 +268,4 @@ public:
|
|||
void removeTeam(TCTeamInfo team);
|
||||
void updateCacheGeneration(int64_t generation) { m_cacheGeneration = generation; }
|
||||
int64_t cacheGeneration() const { return m_cacheGeneration; }
|
||||
void updateStorageUsage(int64_t size) { m_tenantInfo.storageUsage = size; }
|
||||
};
|
||||
|
|
|
@ -32,6 +32,12 @@
|
|||
|
||||
typedef Map<KeyRef, Reference<TCTenantInfo>> TenantMapByPrefix;
|
||||
|
||||
struct Storage {
|
||||
int64_t quota = std::numeric_limits<int64_t>::max();
|
||||
int64_t usage = 0;
|
||||
};
|
||||
typedef std::unordered_map<TenantName, Storage> TenantStorageMap;
|
||||
|
||||
struct TenantCacheTenantCreated {
|
||||
KeyRange keys;
|
||||
Promise<bool> reply;
|
||||
|
@ -50,6 +56,9 @@ private:
|
|||
uint64_t generation;
|
||||
TenantMapByPrefix tenantCache;
|
||||
|
||||
// Map from tenant names to storage quota and usage
|
||||
TenantStorageMap tenantStorageMap;
|
||||
|
||||
// mark the start of a new sweep of the tenant cache
|
||||
void startRefresh();
|
||||
|
||||
|
@ -62,11 +71,8 @@ private:
|
|||
// return count of tenants that were found to be stale and removed from the cache
|
||||
int cleanup();
|
||||
|
||||
// return the mapping from prefix -> tenant name for all tenants stored in the cache
|
||||
std::vector<std::pair<KeyRef, TenantName>> getTenantList() const;
|
||||
|
||||
// update the size for a tenant; do nothing if the tenant doesn't exist in the map
|
||||
void updateStorageUsage(KeyRef prefix, int64_t size);
|
||||
// return all the TenantName for all tenants stored in the cache
|
||||
std::vector<TenantName> getTenantList() const;
|
||||
|
||||
UID id() const { return distributorID; }
|
||||
|
||||
|
@ -85,9 +91,14 @@ public:
|
|||
|
||||
Future<Void> monitorStorageUsage();
|
||||
|
||||
Future<Void> monitorStorageQuota();
|
||||
|
||||
std::string desc() const;
|
||||
|
||||
bool isTenantKey(KeyRef key) const;
|
||||
|
||||
Optional<Reference<TCTenantInfo>> tenantOwning(KeyRef key) const;
|
||||
|
||||
// Get the list of tenants where the storage bytes currently used is greater than the quota allocated
|
||||
std::vector<TenantName> getTenantsOverQuota() const;
|
||||
};
|
||||
|
|
|
@ -114,7 +114,7 @@ struct MasterData : NonCopyable, ReferenceCounted<MasterData> {
|
|||
SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL,
|
||||
SERVER_KNOBS->LATENCY_SAMPLE_SIZE),
|
||||
addActor(addActor) {
|
||||
logger = traceCounters("MasterMetrics", dbgid, SERVER_KNOBS->WORKER_LOGGING_INTERVAL, &cc, "MasterMetrics");
|
||||
logger = cc.traceCounters("MasterMetrics", dbgid, SERVER_KNOBS->WORKER_LOGGING_INTERVAL, "MasterMetrics");
|
||||
if (forceRecovery && !myInterface.locality.dcId().present()) {
|
||||
TraceEvent(SevError, "ForcedRecoveryRequiresDcID").log();
|
||||
forceRecovery = false;
|
||||
|
|
|
@ -81,11 +81,12 @@
|
|||
#include "fdbserver/ServerCheckpoint.actor.h"
|
||||
#include "fdbserver/ServerDBInfo.h"
|
||||
#include "fdbserver/SpanContextMessage.h"
|
||||
#include "fdbserver/StorageMetrics.h"
|
||||
#include "fdbserver/StorageMetrics.actor.h"
|
||||
#include "fdbserver/TLogInterface.h"
|
||||
#include "fdbserver/TransactionTagCounter.h"
|
||||
#include "fdbserver/WaitFailure.h"
|
||||
#include "fdbserver/WorkerInterface.actor.h"
|
||||
#include "fdbserver/BlobGranuleServerCommon.actor.h"
|
||||
#include "flow/ActorCollection.h"
|
||||
#include "flow/Arena.h"
|
||||
#include "flow/Error.h"
|
||||
|
@ -641,7 +642,7 @@ struct BusiestWriteTagContext {
|
|||
busiestWriteTagEventHolder(makeReference<EventCacheHolder>(busiestWriteTagTrackingKey)), lastUpdateTime(-1) {}
|
||||
};
|
||||
|
||||
struct StorageServer {
|
||||
struct StorageServer : public IStorageMetricsService {
|
||||
typedef VersionedMap<KeyRef, ValueOrClearToRef> VersionedData;
|
||||
|
||||
private:
|
||||
|
@ -807,8 +808,8 @@ public:
|
|||
VersionedData const& data() const { return versionedData; }
|
||||
VersionedData& mutableData() { return versionedData; }
|
||||
|
||||
double old_rate = 1.0;
|
||||
double currentRate() {
|
||||
mutable double old_rate = 1.0;
|
||||
double currentRate() const {
|
||||
auto versionLag = version.get() - durableVersion.get();
|
||||
double res;
|
||||
if (versionLag >= SERVER_KNOBS->STORAGE_DURABILITY_LAG_HARD_MAX) {
|
||||
|
@ -988,7 +989,6 @@ public:
|
|||
Database cx;
|
||||
ActorCollection actors;
|
||||
|
||||
StorageServerMetrics metrics;
|
||||
CoalescedKeyRangeMap<bool, int64_t, KeyBytesMetric<int64_t>> byteSampleClears;
|
||||
AsyncVar<bool> byteSampleClearsTooLarge;
|
||||
Future<Void> byteSampleRecovery;
|
||||
|
@ -1308,10 +1308,10 @@ public:
|
|||
storageServerSourceTLogIDEventHolder(
|
||||
makeReference<EventCacheHolder>(ssi.id().toString() + "/StorageServerSourceTLogID")) {
|
||||
|
||||
version.initMetric("StorageServer.Version"_sr, counters.cc.id);
|
||||
oldestVersion.initMetric("StorageServer.OldestVersion"_sr, counters.cc.id);
|
||||
durableVersion.initMetric("StorageServer.DurableVersion"_sr, counters.cc.id);
|
||||
desiredOldestVersion.initMetric("StorageServer.DesiredOldestVersion"_sr, counters.cc.id);
|
||||
version.initMetric("StorageServer.Version"_sr, counters.cc.getId());
|
||||
oldestVersion.initMetric("StorageServer.OldestVersion"_sr, counters.cc.getId());
|
||||
durableVersion.initMetric("StorageServer.DurableVersion"_sr, counters.cc.getId());
|
||||
desiredOldestVersion.initMetric("StorageServer.DesiredOldestVersion"_sr, counters.cc.getId());
|
||||
|
||||
newestAvailableVersion.insert(allKeys, invalidVersion);
|
||||
newestDirtyVersion.insert(allKeys, invalidVersion);
|
||||
|
@ -1380,7 +1380,7 @@ public:
|
|||
// This is the maximum version that might be read from storage (the minimum version is durableVersion)
|
||||
Version storageVersion() const { return oldestVersion.get(); }
|
||||
|
||||
bool isReadable(KeyRangeRef const& keys) {
|
||||
bool isReadable(KeyRangeRef const& keys) const override {
|
||||
auto sh = shards.intersectingRanges(keys);
|
||||
for (auto i = sh.begin(); i != sh.end(); ++i)
|
||||
if (!i->value()->isReadable())
|
||||
|
@ -1406,10 +1406,10 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
Counter::Value queueSize() { return counters.bytesInput.getValue() - counters.bytesDurable.getValue(); }
|
||||
Counter::Value queueSize() const { return counters.bytesInput.getValue() - counters.bytesDurable.getValue(); }
|
||||
|
||||
// penalty used by loadBalance() to balance requests among SSes. We prefer SS with less write queue size.
|
||||
double getPenalty() {
|
||||
double getPenalty() const override {
|
||||
return std::max(std::max(1.0,
|
||||
(queueSize() - (SERVER_KNOBS->TARGET_BYTES_PER_STORAGE_SERVER -
|
||||
2.0 * SERVER_KNOBS->SPRING_BYTES_STORAGE_SERVER)) /
|
||||
|
@ -1503,7 +1503,7 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
void getSplitPoints(SplitRangeRequest const& req) {
|
||||
void getSplitPoints(SplitRangeRequest const& req) override {
|
||||
try {
|
||||
Optional<TenantMapEntry> entry = getTenantEntry(version.get(), req.tenantInfo);
|
||||
metrics.getSplitPoints(req, entry.map<Key>([](TenantMapEntry e) { return e.prefix; }));
|
||||
|
@ -1533,6 +1533,15 @@ public:
|
|||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
Future<Void> waitMetricsTenantAware(const WaitMetricsRequest& req) override;
|
||||
|
||||
void addActor(Future<Void> future) override { actors.add(future); }
|
||||
|
||||
void getStorageMetrics(const GetStorageMetricsRequest& req) override {
|
||||
StorageBytes sb = storage.getStorageBytes();
|
||||
metrics.getStorageMetrics(req, sb, counters.bytesInput.getRate(), versionLag, lastUpdate);
|
||||
}
|
||||
};
|
||||
|
||||
const StringRef StorageServer::CurrentRunningFetchKeys::emptyString = ""_sr;
|
||||
|
@ -5976,27 +5985,26 @@ ACTOR Future<Void> tryGetRangeFromBlob(PromiseStream<RangeResult> results,
|
|||
Reference<BlobConnectionProvider> blobConn) {
|
||||
ASSERT(blobConn.isValid());
|
||||
try {
|
||||
|
||||
state Standalone<VectorRef<BlobGranuleChunkRef>> chunks = wait(tryReadBlobGranules(tr, keys, fetchVersion));
|
||||
|
||||
if (chunks.size() == 0) {
|
||||
throw blob_granule_transaction_too_old(); // no data on blob
|
||||
}
|
||||
|
||||
if (!isRangeFullyCovered(keys, chunks)) {
|
||||
throw blob_granule_transaction_too_old();
|
||||
}
|
||||
|
||||
for (const BlobGranuleChunkRef& chunk : chunks) {
|
||||
state KeyRangeRef chunkRange = chunk.keyRange;
|
||||
state RangeResult rows = wait(readBlobGranule(chunk, keys, 0, fetchVersion, blobConn));
|
||||
state int i;
|
||||
for (i = 0; i < chunks.size(); ++i) {
|
||||
state KeyRangeRef chunkRange = chunks[i].keyRange;
|
||||
state RangeResult rows = wait(readBlobGranule(chunks[i], keys, 0, fetchVersion, blobConn));
|
||||
TraceEvent("ReadBlobData")
|
||||
.detail("Rows", rows.size())
|
||||
.detail("ChunkRange", chunkRange.toString())
|
||||
.detail("Keys", keys.toString());
|
||||
|
||||
if (rows.size() == 0) {
|
||||
rows.readThrough = KeyRef(rows.arena(), chunkRange.end);
|
||||
rows.readThrough = KeyRef(rows.arena(), std::min(chunkRange.end, keys.end));
|
||||
}
|
||||
if (i == chunks.size() - 1) {
|
||||
rows.readThrough = KeyRef(rows.arena(), keys.end);
|
||||
}
|
||||
results.send(rows);
|
||||
}
|
||||
|
@ -6010,7 +6018,7 @@ ACTOR Future<Void> tryGetRangeFromBlob(PromiseStream<RangeResult> results,
|
|||
tr->reset();
|
||||
tr->setVersion(fetchVersion);
|
||||
tr->trState->taskID = TaskPriority::FetchKeys;
|
||||
wait(tryGetRange(results, tr, keys)); // fail back to storage server
|
||||
throw;
|
||||
}
|
||||
return Void();
|
||||
}
|
||||
|
@ -6798,8 +6806,10 @@ ACTOR Future<Void> fetchKeys(StorageServer* data, AddingShard* shard) {
|
|||
// We must also ensure we have fetched all change feed metadata BEFORE changing the phase to fetching to ensure
|
||||
// change feed mutations get applied correctly
|
||||
state std::vector<Key> changeFeedsToFetch;
|
||||
std::vector<Key> _cfToFetch = wait(fetchCFMetadata);
|
||||
changeFeedsToFetch = _cfToFetch;
|
||||
if (!isFullRestoreMode()) {
|
||||
std::vector<Key> _cfToFetch = wait(fetchCFMetadata);
|
||||
changeFeedsToFetch = _cfToFetch;
|
||||
}
|
||||
wait(data->durableVersionLock.take());
|
||||
|
||||
shard->phase = AddingShard::Fetching;
|
||||
|
@ -10166,7 +10176,7 @@ Future<Void> StorageServerMetrics::waitMetrics(WaitMetricsRequest req, Future<Vo
|
|||
#pragma region Core
|
||||
#endif
|
||||
|
||||
ACTOR Future<Void> waitMetricsTenantAware(StorageServer* self, WaitMetricsRequest req) {
|
||||
ACTOR Future<Void> waitMetricsTenantAware_internal(StorageServer* self, WaitMetricsRequest req) {
|
||||
if (req.tenantInfo.present() && req.tenantInfo.get().tenantId != TenantInfo::INVALID_TENANT) {
|
||||
wait(success(waitForVersionNoTooOld(self, latestVersion)));
|
||||
Optional<TenantMapEntry> entry = self->getTenantEntry(latestVersion, req.tenantInfo.get());
|
||||
|
@ -10184,85 +10194,45 @@ ACTOR Future<Void> waitMetricsTenantAware(StorageServer* self, WaitMetricsReques
|
|||
return Void();
|
||||
}
|
||||
|
||||
Future<Void> StorageServer::waitMetricsTenantAware(const WaitMetricsRequest& req) {
|
||||
return waitMetricsTenantAware_internal(this, req);
|
||||
}
|
||||
|
||||
ACTOR Future<Void> metricsCore(StorageServer* self, StorageServerInterface ssi) {
|
||||
state Future<Void> doPollMetrics = Void();
|
||||
|
||||
wait(self->byteSampleRecovery);
|
||||
TraceEvent("StorageServerRestoreDurableState", self->thisServerID).detail("RestoredBytes", self->bytesRestored);
|
||||
|
||||
// Logs all counters in `counters.cc` and reset the interval.
|
||||
self->actors.add(traceCounters("StorageMetrics",
|
||||
self->thisServerID,
|
||||
SERVER_KNOBS->STORAGE_LOGGING_DELAY,
|
||||
&self->counters.cc,
|
||||
self->thisServerID.toString() + "/StorageMetrics",
|
||||
[self = self](TraceEvent& te) {
|
||||
te.detail("StorageEngine", self->storage.getKeyValueStoreType().toString());
|
||||
te.detail("Tag", self->tag.toString());
|
||||
StorageBytes sb = self->storage.getStorageBytes();
|
||||
te.detail("KvstoreBytesUsed", sb.used);
|
||||
te.detail("KvstoreBytesFree", sb.free);
|
||||
te.detail("KvstoreBytesAvailable", sb.available);
|
||||
te.detail("KvstoreBytesTotal", sb.total);
|
||||
te.detail("KvstoreBytesTemp", sb.temp);
|
||||
if (self->isTss()) {
|
||||
te.detail("TSSPairID", self->tssPairID);
|
||||
te.detail("TSSJointID",
|
||||
UID(self->thisServerID.first() ^ self->tssPairID.get().first(),
|
||||
self->thisServerID.second() ^ self->tssPairID.get().second()));
|
||||
} else if (self->isSSWithTSSPair()) {
|
||||
te.detail("SSPairID", self->ssPairID);
|
||||
te.detail("TSSJointID",
|
||||
UID(self->thisServerID.first() ^ self->ssPairID.get().first(),
|
||||
self->thisServerID.second() ^ self->ssPairID.get().second()));
|
||||
}
|
||||
}));
|
||||
self->actors.add(self->counters.cc.traceCounters(
|
||||
"StorageMetrics",
|
||||
self->thisServerID,
|
||||
SERVER_KNOBS->STORAGE_LOGGING_DELAY,
|
||||
self->thisServerID.toString() + "/StorageMetrics",
|
||||
[self = self](TraceEvent& te) {
|
||||
te.detail("StorageEngine", self->storage.getKeyValueStoreType().toString());
|
||||
te.detail("Tag", self->tag.toString());
|
||||
StorageBytes sb = self->storage.getStorageBytes();
|
||||
te.detail("KvstoreBytesUsed", sb.used);
|
||||
te.detail("KvstoreBytesFree", sb.free);
|
||||
te.detail("KvstoreBytesAvailable", sb.available);
|
||||
te.detail("KvstoreBytesTotal", sb.total);
|
||||
te.detail("KvstoreBytesTemp", sb.temp);
|
||||
if (self->isTss()) {
|
||||
te.detail("TSSPairID", self->tssPairID);
|
||||
te.detail("TSSJointID",
|
||||
UID(self->thisServerID.first() ^ self->tssPairID.get().first(),
|
||||
self->thisServerID.second() ^ self->tssPairID.get().second()));
|
||||
} else if (self->isSSWithTSSPair()) {
|
||||
te.detail("SSPairID", self->ssPairID);
|
||||
te.detail("TSSJointID",
|
||||
UID(self->thisServerID.first() ^ self->ssPairID.get().first(),
|
||||
self->thisServerID.second() ^ self->ssPairID.get().second()));
|
||||
}
|
||||
}));
|
||||
|
||||
loop {
|
||||
choose {
|
||||
when(state WaitMetricsRequest req = waitNext(ssi.waitMetrics.getFuture())) {
|
||||
if (!req.tenantInfo.present() && !self->isReadable(req.keys)) {
|
||||
CODE_PROBE(true, "waitMetrics immediate wrong_shard_server()");
|
||||
self->sendErrorWithPenalty(req.reply, wrong_shard_server(), self->getPenalty());
|
||||
} else {
|
||||
self->actors.add(waitMetricsTenantAware(self, req));
|
||||
}
|
||||
}
|
||||
when(SplitMetricsRequest req = waitNext(ssi.splitMetrics.getFuture())) {
|
||||
if (!self->isReadable(req.keys)) {
|
||||
CODE_PROBE(true, "splitMetrics immediate wrong_shard_server()");
|
||||
self->sendErrorWithPenalty(req.reply, wrong_shard_server(), self->getPenalty());
|
||||
} else {
|
||||
self->metrics.splitMetrics(req);
|
||||
}
|
||||
}
|
||||
when(GetStorageMetricsRequest req = waitNext(ssi.getStorageMetrics.getFuture())) {
|
||||
StorageBytes sb = self->storage.getStorageBytes();
|
||||
self->metrics.getStorageMetrics(
|
||||
req, sb, self->counters.bytesInput.getRate(), self->versionLag, self->lastUpdate);
|
||||
}
|
||||
when(ReadHotSubRangeRequest req = waitNext(ssi.getReadHotRanges.getFuture())) {
|
||||
if (!self->isReadable(req.keys)) {
|
||||
CODE_PROBE(true, "readHotSubRanges immediate wrong_shard_server()", probe::decoration::rare);
|
||||
self->sendErrorWithPenalty(req.reply, wrong_shard_server(), self->getPenalty());
|
||||
} else {
|
||||
self->metrics.getReadHotRanges(req);
|
||||
}
|
||||
}
|
||||
when(SplitRangeRequest req = waitNext(ssi.getRangeSplitPoints.getFuture())) {
|
||||
if (!self->isReadable(req.keys)) {
|
||||
CODE_PROBE(true, "getSplitPoints immediate wrong_shard_server()");
|
||||
self->sendErrorWithPenalty(req.reply, wrong_shard_server(), self->getPenalty());
|
||||
} else {
|
||||
self->getSplitPoints(req);
|
||||
}
|
||||
}
|
||||
when(wait(doPollMetrics)) {
|
||||
self->metrics.poll();
|
||||
doPollMetrics = delay(SERVER_KNOBS->STORAGE_SERVER_POLL_METRICS_DELAY);
|
||||
}
|
||||
}
|
||||
}
|
||||
wait(serveStorageMetricsRequests(self, ssi));
|
||||
return Void();
|
||||
}
|
||||
|
||||
ACTOR Future<Void> logLongByteSampleRecovery(Future<Void> recovery) {
|
||||
|
|
|
@ -2267,7 +2267,25 @@ ACTOR Future<Void> workerServer(Reference<IClusterConnectionRecord> connRecord,
|
|||
CODE_PROBE(true, "Recruited while already a blob migrator.");
|
||||
} else {
|
||||
startRole(Role::BLOB_MIGRATOR, recruited.id(), interf.id());
|
||||
DUMPTOKEN(recruited.waitFailure);
|
||||
DUMPTOKEN(recruited.haltBlobMigrator);
|
||||
DUMPTOKEN(recruited.ssi.getValue);
|
||||
DUMPTOKEN(recruited.ssi.getKey);
|
||||
DUMPTOKEN(recruited.ssi.getKeyValues);
|
||||
DUMPTOKEN(recruited.ssi.getMappedKeyValues);
|
||||
DUMPTOKEN(recruited.ssi.getShardState);
|
||||
DUMPTOKEN(recruited.ssi.waitMetrics);
|
||||
DUMPTOKEN(recruited.ssi.splitMetrics);
|
||||
DUMPTOKEN(recruited.ssi.getReadHotRanges);
|
||||
DUMPTOKEN(recruited.ssi.getRangeSplitPoints);
|
||||
DUMPTOKEN(recruited.ssi.getStorageMetrics);
|
||||
DUMPTOKEN(recruited.ssi.waitFailure);
|
||||
DUMPTOKEN(recruited.ssi.getQueuingMetrics);
|
||||
DUMPTOKEN(recruited.ssi.getKeyValueStoreType);
|
||||
DUMPTOKEN(recruited.ssi.watchValue);
|
||||
DUMPTOKEN(recruited.ssi.getKeyValuesStream);
|
||||
DUMPTOKEN(recruited.ssi.changeFeedStream);
|
||||
DUMPTOKEN(recruited.ssi.changeFeedPop);
|
||||
DUMPTOKEN(recruited.ssi.changeFeedVersionUpdate);
|
||||
|
||||
Future<Void> blobMigratorProcess = blobMigrator(recruited, dbInfo);
|
||||
errorForwarders.add(forwardError(errors,
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "fdbclient/ManagementAPI.actor.h"
|
||||
#include "fdbrpc/simulator.h"
|
||||
#include "fdbclient/BackupAgent.actor.h"
|
||||
#include "fdbserver/Knobs.h"
|
||||
|
@ -95,6 +96,7 @@ struct AtomicRestoreWorkload : TestWorkload {
|
|||
TraceEvent("AtomicRestore_Start").detail("UsePartitionedLog", self->usePartitionedLogs);
|
||||
|
||||
state std::string backupContainer = "file://simfdb/backups/";
|
||||
state DatabaseConfiguration conf = wait(getDatabaseConfiguration(cx));
|
||||
try {
|
||||
wait(backupAgent.submitBackup(cx,
|
||||
StringRef(backupContainer),
|
||||
|
@ -103,7 +105,8 @@ struct AtomicRestoreWorkload : TestWorkload {
|
|||
deterministicRandom()->randomInt(0, 100),
|
||||
BackupAgentBase::getDefaultTagName(),
|
||||
self->backupRanges,
|
||||
SERVER_KNOBS->ENABLE_ENCRYPTION,
|
||||
SERVER_KNOBS->ENABLE_ENCRYPTION &&
|
||||
conf.tenantMode != TenantMode::OPTIONAL_TENANT,
|
||||
StopWhenDone::False,
|
||||
self->usePartitionedLogs));
|
||||
} catch (Error& e) {
|
||||
|
|
|
@ -215,7 +215,7 @@ struct BackupAndParallelRestoreCorrectnessWorkload : TestWorkload {
|
|||
|
||||
state std::string backupContainer = "file://simfdb/backups/";
|
||||
state Future<Void> status = statusLoop(cx, tag.toString());
|
||||
|
||||
state DatabaseConfiguration configuration = wait(getDatabaseConfiguration(cx));
|
||||
try {
|
||||
wait(backupAgent->submitBackup(cx,
|
||||
StringRef(backupContainer),
|
||||
|
@ -224,7 +224,8 @@ struct BackupAndParallelRestoreCorrectnessWorkload : TestWorkload {
|
|||
deterministicRandom()->randomInt(0, 100),
|
||||
tag.toString(),
|
||||
backupRanges,
|
||||
SERVER_KNOBS->ENABLE_ENCRYPTION,
|
||||
SERVER_KNOBS->ENABLE_ENCRYPTION &&
|
||||
configuration.tenantMode != TenantMode::OPTIONAL_TENANT,
|
||||
StopWhenDone{ !stopDifferentialDelay },
|
||||
self->usePartitionedLogs));
|
||||
} catch (Error& e) {
|
||||
|
@ -474,6 +475,7 @@ struct BackupAndParallelRestoreCorrectnessWorkload : TestWorkload {
|
|||
// Occasionally start yet another backup that might still be running when we restore
|
||||
if (!self->locked && BUGGIFY) {
|
||||
TraceEvent("BARW_SubmitBackup2", randomID).detail("Tag", printable(self->backupTag));
|
||||
state DatabaseConfiguration configuration = wait(getDatabaseConfiguration(cx));
|
||||
try {
|
||||
// Note the "partitionedLog" must be false, because we change
|
||||
// the configuration to disable backup workers before restore.
|
||||
|
@ -484,7 +486,8 @@ struct BackupAndParallelRestoreCorrectnessWorkload : TestWorkload {
|
|||
deterministicRandom()->randomInt(0, 100),
|
||||
self->backupTag.toString(),
|
||||
self->backupRanges,
|
||||
SERVER_KNOBS->ENABLE_ENCRYPTION,
|
||||
SERVER_KNOBS->ENABLE_ENCRYPTION &&
|
||||
configuration.tenantMode != TenantMode::OPTIONAL_TENANT,
|
||||
StopWhenDone::True,
|
||||
UsePartitionedLog::False);
|
||||
} catch (Error& e) {
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "fdbclient/ManagementAPI.actor.h"
|
||||
#include "fdbclient/ReadYourWrites.h"
|
||||
#include "fdbrpc/simulator.h"
|
||||
#include "fdbclient/BackupAgent.actor.h"
|
||||
|
@ -331,7 +332,7 @@ struct BackupAndRestoreCorrectnessWorkload : TestWorkload {
|
|||
|
||||
state std::string backupContainer = "file://simfdb/backups/";
|
||||
state Future<Void> status = statusLoop(cx, tag.toString());
|
||||
|
||||
state DatabaseConfiguration configuration = wait(getDatabaseConfiguration(cx));
|
||||
try {
|
||||
wait(backupAgent->submitBackup(cx,
|
||||
StringRef(backupContainer),
|
||||
|
@ -340,7 +341,8 @@ struct BackupAndRestoreCorrectnessWorkload : TestWorkload {
|
|||
deterministicRandom()->randomInt(0, 2000),
|
||||
tag.toString(),
|
||||
backupRanges,
|
||||
SERVER_KNOBS->ENABLE_ENCRYPTION,
|
||||
SERVER_KNOBS->ENABLE_ENCRYPTION &&
|
||||
configuration.tenantMode != TenantMode::OPTIONAL_TENANT,
|
||||
StopWhenDone{ !stopDifferentialDelay },
|
||||
UsePartitionedLog::False,
|
||||
IncrementalBackupOnly::False,
|
||||
|
@ -515,6 +517,42 @@ struct BackupAndRestoreCorrectnessWorkload : TestWorkload {
|
|||
return Void();
|
||||
}
|
||||
|
||||
ACTOR static Future<Void> clearAndRestoreSystemKeys(Database cx,
|
||||
BackupAndRestoreCorrectnessWorkload* self,
|
||||
FileBackupAgent* backupAgent,
|
||||
Version targetVersion,
|
||||
Reference<IBackupContainer> lastBackupContainer,
|
||||
Standalone<VectorRef<KeyRangeRef>> systemRestoreRanges) {
|
||||
// restore system keys before restoring any other ranges
|
||||
wait(runRYWTransaction(cx, [=](Reference<ReadYourWritesTransaction> tr) -> Future<Void> {
|
||||
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||
for (auto& range : systemRestoreRanges)
|
||||
tr->clear(range);
|
||||
return Void();
|
||||
}));
|
||||
state Standalone<StringRef> restoreTag(self->backupTag.toString() + "_system");
|
||||
printf("BackupCorrectness, backupAgent.restore is called for tag:%s\n", restoreTag.toString().c_str());
|
||||
wait(success(backupAgent->restore(cx,
|
||||
cx,
|
||||
restoreTag,
|
||||
KeyRef(lastBackupContainer->getURL()),
|
||||
lastBackupContainer->getProxy(),
|
||||
systemRestoreRanges,
|
||||
WaitForComplete::True,
|
||||
targetVersion,
|
||||
Verbose::True,
|
||||
Key(),
|
||||
Key(),
|
||||
self->locked,
|
||||
UnlockDB::True,
|
||||
OnlyApplyMutationLogs::False,
|
||||
InconsistentSnapshotOnly::False,
|
||||
::invalidVersion,
|
||||
self->encryptionKeyFileName)));
|
||||
printf("BackupCorrectness, backupAgent.restore finished for tag:%s\n", restoreTag.toString().c_str());
|
||||
return Void();
|
||||
}
|
||||
|
||||
ACTOR static Future<Void> _start(Database cx, BackupAndRestoreCorrectnessWorkload* self) {
|
||||
state FileBackupAgent backupAgent;
|
||||
state Future<Void> extraBackup;
|
||||
|
@ -593,6 +631,7 @@ struct BackupAndRestoreCorrectnessWorkload : TestWorkload {
|
|||
// Occasionally start yet another backup that might still be running when we restore
|
||||
if (!self->locked && BUGGIFY) {
|
||||
TraceEvent("BARW_SubmitBackup2", randomID).detail("Tag", printable(self->backupTag));
|
||||
state DatabaseConfiguration configuration = wait(getDatabaseConfiguration(cx));
|
||||
try {
|
||||
extraBackup = backupAgent.submitBackup(cx,
|
||||
"file://simfdb/backups/"_sr,
|
||||
|
@ -601,7 +640,8 @@ struct BackupAndRestoreCorrectnessWorkload : TestWorkload {
|
|||
deterministicRandom()->randomInt(0, 100),
|
||||
self->backupTag.toString(),
|
||||
self->backupRanges,
|
||||
SERVER_KNOBS->ENABLE_ENCRYPTION,
|
||||
SERVER_KNOBS->ENABLE_ENCRYPTION &&
|
||||
configuration.tenantMode != TenantMode::OPTIONAL_TENANT,
|
||||
StopWhenDone::True);
|
||||
} catch (Error& e) {
|
||||
TraceEvent("BARW_SubmitBackup2Exception", randomID)
|
||||
|
@ -638,7 +678,7 @@ struct BackupAndRestoreCorrectnessWorkload : TestWorkload {
|
|||
lastBackupContainer->getEncryptionKeyFileName());
|
||||
BackupDescription desc = wait(container->describeBackup());
|
||||
|
||||
Version targetVersion = -1;
|
||||
state Version targetVersion = -1;
|
||||
if (desc.maxRestorableVersion.present()) {
|
||||
if (deterministicRandom()->random01() < 0.1) {
|
||||
targetVersion = desc.minRestorableVersion.get();
|
||||
|
@ -656,6 +696,32 @@ struct BackupAndRestoreCorrectnessWorkload : TestWorkload {
|
|||
state std::vector<Standalone<StringRef>> restoreTags;
|
||||
state bool multipleRangesInOneTag = false;
|
||||
state int restoreIndex = 0;
|
||||
// make sure system keys are not present in the restoreRanges as they will get restored first separately
|
||||
// from the rest
|
||||
Standalone<VectorRef<KeyRangeRef>> modifiedRestoreRanges;
|
||||
Standalone<VectorRef<KeyRangeRef>> systemRestoreRanges;
|
||||
for (int i = 0; i < self->restoreRanges.size(); ++i) {
|
||||
if (!SERVER_KNOBS->ENABLE_ENCRYPTION ||
|
||||
!self->restoreRanges[i].intersects(getSystemBackupRanges())) {
|
||||
modifiedRestoreRanges.push_back_deep(modifiedRestoreRanges.arena(), self->restoreRanges[i]);
|
||||
} else {
|
||||
KeyRangeRef normalKeyRange = self->restoreRanges[i] & normalKeys;
|
||||
KeyRangeRef systemKeyRange = self->restoreRanges[i] & systemKeys;
|
||||
if (!normalKeyRange.empty()) {
|
||||
modifiedRestoreRanges.push_back_deep(modifiedRestoreRanges.arena(), normalKeyRange);
|
||||
}
|
||||
if (!systemKeyRange.empty()) {
|
||||
systemRestoreRanges.push_back_deep(systemRestoreRanges.arena(), systemKeyRange);
|
||||
}
|
||||
}
|
||||
}
|
||||
self->restoreRanges = modifiedRestoreRanges;
|
||||
if (!systemRestoreRanges.empty()) {
|
||||
// We are able to restore system keys first since we restore an entire cluster at once rather than
|
||||
// partial key ranges.
|
||||
wait(clearAndRestoreSystemKeys(
|
||||
cx, self, &backupAgent, targetVersion, lastBackupContainer, systemRestoreRanges));
|
||||
}
|
||||
if (deterministicRandom()->random01() < 0.5) {
|
||||
for (restoreIndex = 0; restoreIndex < self->restoreRanges.size(); restoreIndex++) {
|
||||
auto range = self->restoreRanges[restoreIndex];
|
||||
|
@ -703,6 +769,7 @@ struct BackupAndRestoreCorrectnessWorkload : TestWorkload {
|
|||
Key(),
|
||||
Key(),
|
||||
self->locked,
|
||||
UnlockDB::True,
|
||||
OnlyApplyMutationLogs::False,
|
||||
InconsistentSnapshotOnly::False,
|
||||
::invalidVersion,
|
||||
|
@ -735,6 +802,7 @@ struct BackupAndRestoreCorrectnessWorkload : TestWorkload {
|
|||
Key(),
|
||||
Key(),
|
||||
self->locked,
|
||||
UnlockDB::True,
|
||||
OnlyApplyMutationLogs::False,
|
||||
InconsistentSnapshotOnly::False,
|
||||
::invalidVersion,
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
#include "fdbrpc/simulator.h"
|
||||
#include "fdbclient/BackupAgent.actor.h"
|
||||
#include "fdbclient/BackupContainer.h"
|
||||
#include "fdbclient/ManagementAPI.actor.h"
|
||||
#include "fdbserver/Knobs.h"
|
||||
#include "fdbserver/workloads/BlobStoreWorkload.h"
|
||||
#include "fdbserver/workloads/workloads.actor.h"
|
||||
|
@ -57,6 +58,7 @@ struct BackupToBlobWorkload : TestWorkload {
|
|||
addDefaultBackupRanges(backupRanges);
|
||||
|
||||
wait(delay(self->backupAfter));
|
||||
state DatabaseConfiguration configuration = wait(getDatabaseConfiguration(cx));
|
||||
wait(backupAgent.submitBackup(cx,
|
||||
self->backupURL,
|
||||
{},
|
||||
|
@ -64,7 +66,8 @@ struct BackupToBlobWorkload : TestWorkload {
|
|||
self->snapshotInterval,
|
||||
self->backupTag.toString(),
|
||||
backupRanges,
|
||||
SERVER_KNOBS->ENABLE_ENCRYPTION));
|
||||
SERVER_KNOBS->ENABLE_ENCRYPTION &&
|
||||
configuration.tenantMode != TenantMode::OPTIONAL_TENANT));
|
||||
EBackupState backupStatus = wait(backupAgent.waitBackup(cx, self->backupTag.toString(), StopWhenDone::True));
|
||||
TraceEvent("BackupToBlob_BackupStatus").detail("Status", BackupAgentBase::getStateText(backupStatus));
|
||||
return Void();
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
#include "fdbclient/BackupAgent.actor.h"
|
||||
#include "fdbclient/ClusterConnectionMemoryRecord.h"
|
||||
#include "fdbclient/TenantManagement.actor.h"
|
||||
#include "fdbserver/Knobs.h"
|
||||
#include "fdbserver/workloads/workloads.actor.h"
|
||||
#include "fdbserver/workloads/BulkSetup.actor.h"
|
||||
#include "flow/ApiVersion.h"
|
||||
|
@ -667,10 +668,47 @@ struct BackupToDBCorrectnessWorkload : TestWorkload {
|
|||
// wait(diffRanges(self->backupRanges, self->backupPrefix, cx, self->extraDB));
|
||||
|
||||
state Standalone<VectorRef<KeyRangeRef>> restoreRange;
|
||||
state Standalone<VectorRef<KeyRangeRef>> systemRestoreRange;
|
||||
for (auto r : self->backupRanges) {
|
||||
restoreRange.push_back_deep(
|
||||
restoreRange.arena(),
|
||||
KeyRangeRef(r.begin.withPrefix(self->backupPrefix), r.end.withPrefix(self->backupPrefix)));
|
||||
if (!SERVER_KNOBS->ENABLE_ENCRYPTION || !r.intersects(getSystemBackupRanges())) {
|
||||
restoreRange.push_back_deep(
|
||||
restoreRange.arena(),
|
||||
KeyRangeRef(r.begin.withPrefix(self->backupPrefix), r.end.withPrefix(self->backupPrefix)));
|
||||
} else {
|
||||
KeyRangeRef normalKeyRange = r & normalKeys;
|
||||
KeyRangeRef systemKeyRange = r & systemKeys;
|
||||
if (!normalKeyRange.empty()) {
|
||||
restoreRange.push_back_deep(restoreRange.arena(),
|
||||
KeyRangeRef(normalKeyRange.begin.withPrefix(self->backupPrefix),
|
||||
normalKeyRange.end.withPrefix(self->backupPrefix)));
|
||||
}
|
||||
if (!systemKeyRange.empty()) {
|
||||
systemRestoreRange.push_back_deep(systemRestoreRange.arena(), systemKeyRange);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// restore system keys first before restoring user data
|
||||
if (!systemRestoreRange.empty()) {
|
||||
state Key systemRestoreTag = "restore_system"_sr;
|
||||
try {
|
||||
wait(restoreTool.submitBackup(cx,
|
||||
systemRestoreTag,
|
||||
systemRestoreRange,
|
||||
StopWhenDone::True,
|
||||
StringRef(),
|
||||
self->backupPrefix,
|
||||
self->locked,
|
||||
DatabaseBackupAgent::PreBackupAction::CLEAR));
|
||||
} catch (Error& e) {
|
||||
TraceEvent("BARW_DoBackupSubmitBackupException", randomID)
|
||||
.error(e)
|
||||
.detail("Tag", printable(systemRestoreTag));
|
||||
if (e.code() != error_code_backup_unneeded && e.code() != error_code_backup_duplicate)
|
||||
throw;
|
||||
}
|
||||
wait(success(restoreTool.waitBackup(cx, systemRestoreTag)));
|
||||
wait(restoreTool.unlockBackup(cx, systemRestoreTag));
|
||||
}
|
||||
|
||||
try {
|
||||
|
|
|
@ -105,16 +105,6 @@ struct BlobGranuleRangesWorkload : TestWorkload {
|
|||
}
|
||||
}
|
||||
|
||||
ACTOR Future<bool> setRange(Database cx, KeyRange range, bool active, Optional<TenantName> tenantName) {
|
||||
if (active) {
|
||||
bool success = wait(cx->blobbifyRange(range, tenantName));
|
||||
return success;
|
||||
} else {
|
||||
bool success = wait(cx->unblobbifyRange(range, tenantName));
|
||||
return success;
|
||||
}
|
||||
}
|
||||
|
||||
ACTOR Future<Void> registerNewRange(Database cx, BlobGranuleRangesWorkload* self, Optional<TenantName> tenantName) {
|
||||
std::string nextRangeKey = "R_" + self->newKey();
|
||||
state KeyRange range(KeyRangeRef(StringRef(nextRangeKey), strinc(StringRef(nextRangeKey))));
|
||||
|
@ -124,8 +114,7 @@ struct BlobGranuleRangesWorkload : TestWorkload {
|
|||
|
||||
// don't put in active ranges until AFTER set range command succeeds, to avoid checking a range that maybe
|
||||
// wasn't initialized
|
||||
bool success =
|
||||
wait(self->setRange(cx, range, true, tenantName.present() ? tenantName.get() : self->tenantName));
|
||||
bool success = wait(cx->blobbifyRange(range, tenantName.present() ? tenantName.get() : self->tenantName));
|
||||
ASSERT(success);
|
||||
|
||||
if (BGRW_DEBUG) {
|
||||
|
@ -163,7 +152,7 @@ struct BlobGranuleRangesWorkload : TestWorkload {
|
|||
Key purgeKey = wait(self->versionedForcePurge(cx, range, self->tenantName));
|
||||
wait(cx->waitPurgeGranulesComplete(purgeKey));
|
||||
}
|
||||
bool success = wait(self->setRange(cx, range, false, self->tenantName));
|
||||
bool success = wait(cx->unblobbifyRange(range, self->tenantName));
|
||||
ASSERT(success);
|
||||
|
||||
if (BGRW_DEBUG) {
|
||||
|
@ -356,7 +345,7 @@ struct BlobGranuleRangesWorkload : TestWorkload {
|
|||
// tear down range at end
|
||||
Key purgeKey = wait(self->versionedForcePurge(cx, range, self->tenantName));
|
||||
wait(cx->waitPurgeGranulesComplete(purgeKey));
|
||||
bool success = wait(self->setRange(cx, range, false, self->tenantName));
|
||||
bool success = wait(cx->unblobbifyRange(range, self->tenantName));
|
||||
ASSERT(success);
|
||||
|
||||
if (BGRW_DEBUG) {
|
||||
|
@ -373,7 +362,7 @@ struct BlobGranuleRangesWorkload : TestWorkload {
|
|||
if (BGRW_DEBUG) {
|
||||
fmt::print("VerifyRangeUnit: [{0} - {1})\n", range.begin.printable(), range.end.printable());
|
||||
}
|
||||
bool setSuccess = wait(self->setRange(cx, activeRange, true, self->tenantName));
|
||||
bool setSuccess = wait(cx->blobbifyRange(activeRange, self->tenantName));
|
||||
ASSERT(setSuccess);
|
||||
wait(self->checkRange(cx, self, activeRange, true));
|
||||
|
||||
|
@ -426,7 +415,7 @@ struct BlobGranuleRangesWorkload : TestWorkload {
|
|||
for (i = 0; i < rangeCount; i++) {
|
||||
state KeyRange subRange(KeyRangeRef(boundaries[i], boundaries[i + 1]));
|
||||
if (i != rangeToNotBlobbify) {
|
||||
bool setSuccess = wait(self->setRange(cx, subRange, true, self->tenantName));
|
||||
bool setSuccess = wait(cx->blobbifyRange(subRange, self->tenantName));
|
||||
ASSERT(setSuccess);
|
||||
wait(self->checkRange(cx, self, subRange, true));
|
||||
} else {
|
||||
|
@ -473,7 +462,7 @@ struct BlobGranuleRangesWorkload : TestWorkload {
|
|||
}
|
||||
|
||||
ACTOR Future<Void> rangesMisalignedUnit(Database cx, BlobGranuleRangesWorkload* self, KeyRange range) {
|
||||
bool setSuccess = wait(self->setRange(cx, range, true, self->tenantName));
|
||||
bool setSuccess = wait(cx->blobbifyRange(range, self->tenantName));
|
||||
ASSERT(setSuccess);
|
||||
state KeyRange subRange(KeyRangeRef(range.begin.withSuffix("A"_sr), range.begin.withSuffix("B"_sr)));
|
||||
|
||||
|
@ -526,42 +515,42 @@ struct BlobGranuleRangesWorkload : TestWorkload {
|
|||
|
||||
// unblobbifying range that already doesn't exist should be no-op
|
||||
if (deterministicRandom()->coinflip()) {
|
||||
bool unblobbifyStartSuccess = wait(self->setRange(cx, activeRange, false, self->tenantName));
|
||||
bool unblobbifyStartSuccess = wait(cx->blobbifyRange(activeRange, self->tenantName));
|
||||
ASSERT(unblobbifyStartSuccess);
|
||||
}
|
||||
|
||||
bool success = wait(self->setRange(cx, activeRange, true, self->tenantName));
|
||||
bool success = wait(cx->blobbifyRange(activeRange, self->tenantName));
|
||||
ASSERT(success);
|
||||
wait(self->checkRange(cx, self, activeRange, true));
|
||||
|
||||
// check that re-blobbifying same range is successful
|
||||
bool retrySuccess = wait(self->setRange(cx, activeRange, true, self->tenantName));
|
||||
bool retrySuccess = wait(cx->blobbifyRange(activeRange, self->tenantName));
|
||||
ASSERT(retrySuccess);
|
||||
wait(self->checkRange(cx, self, activeRange, true));
|
||||
|
||||
// check that blobbifying range that overlaps but does not match existing blob range fails
|
||||
bool fail1 = wait(self->setRange(cx, range, true, self->tenantName));
|
||||
bool fail1 = wait(cx->blobbifyRange(range, self->tenantName));
|
||||
ASSERT(!fail1);
|
||||
|
||||
bool fail2 = wait(self->setRange(cx, KeyRangeRef(range.begin, activeRange.end), true, self->tenantName));
|
||||
bool fail2 = wait(cx->blobbifyRange(KeyRangeRef(range.begin, activeRange.end), self->tenantName));
|
||||
ASSERT(!fail2);
|
||||
|
||||
bool fail3 = wait(self->setRange(cx, KeyRangeRef(activeRange.begin, range.end), true, self->tenantName));
|
||||
bool fail3 = wait(cx->blobbifyRange(KeyRangeRef(activeRange.begin, range.end), self->tenantName));
|
||||
ASSERT(!fail3);
|
||||
|
||||
bool fail4 = wait(self->setRange(cx, KeyRangeRef(range.begin, middleKey), true, self->tenantName));
|
||||
bool fail4 = wait(cx->blobbifyRange(KeyRangeRef(range.begin, middleKey), self->tenantName));
|
||||
ASSERT(!fail4);
|
||||
|
||||
bool fail5 = wait(self->setRange(cx, KeyRangeRef(middleKey, range.end), true, self->tenantName));
|
||||
bool fail5 = wait(cx->blobbifyRange(KeyRangeRef(middleKey, range.end), self->tenantName));
|
||||
ASSERT(!fail5);
|
||||
|
||||
bool fail6 = wait(self->setRange(cx, KeyRangeRef(activeRange.begin, middleKey), true, self->tenantName));
|
||||
bool fail6 = wait(cx->blobbifyRange(KeyRangeRef(activeRange.begin, middleKey), self->tenantName));
|
||||
ASSERT(!fail6);
|
||||
|
||||
bool fail7 = wait(self->setRange(cx, KeyRangeRef(middleKey, activeRange.end), true, self->tenantName));
|
||||
bool fail7 = wait(cx->blobbifyRange(KeyRangeRef(middleKey, activeRange.end), self->tenantName));
|
||||
ASSERT(!fail7);
|
||||
|
||||
bool fail8 = wait(self->setRange(cx, KeyRangeRef(middleKey, middleKey2), true, self->tenantName));
|
||||
bool fail8 = wait(cx->blobbifyRange(KeyRangeRef(middleKey, middleKey2), self->tenantName));
|
||||
ASSERT(!fail8);
|
||||
|
||||
{
|
||||
|
@ -582,13 +571,14 @@ struct BlobGranuleRangesWorkload : TestWorkload {
|
|||
}
|
||||
}
|
||||
|
||||
// tear down + check that un-blobbifying at a non-aligned range also doesn't work
|
||||
Key purgeKey = wait(self->versionedForcePurge(cx, activeRange, self->tenantName));
|
||||
state Version purgeVersion = deterministicRandom()->coinflip() ? latestVersion : 1;
|
||||
state KeyRangeRef purgeRange = deterministicRandom()->coinflip() ? activeRange : range;
|
||||
Key purgeKey = wait(cx->purgeBlobGranules(purgeRange, purgeVersion, self->tenantName, true));
|
||||
wait(cx->waitPurgeGranulesComplete(purgeKey));
|
||||
|
||||
if (deterministicRandom()->coinflip()) {
|
||||
// force purge again and ensure it is idempotent
|
||||
Key purgeKeyAgain = wait(cx->purgeBlobGranules(activeRange, 1, self->tenantName, true));
|
||||
Key purgeKeyAgain = wait(cx->purgeBlobGranules(purgeRange, purgeVersion, self->tenantName, true));
|
||||
wait(cx->waitPurgeGranulesComplete(purgeKeyAgain));
|
||||
}
|
||||
}
|
||||
|
@ -600,41 +590,38 @@ struct BlobGranuleRangesWorkload : TestWorkload {
|
|||
ASSERT(blobRanges.size() == 1);
|
||||
ASSERT(blobRanges[0] == activeRange);
|
||||
|
||||
bool unblobbifyFail1 = wait(self->setRange(cx, range, false, self->tenantName));
|
||||
bool unblobbifyFail1 = wait(cx->unblobbifyRange(range, self->tenantName));
|
||||
ASSERT(!unblobbifyFail1);
|
||||
|
||||
bool unblobbifyFail2 =
|
||||
wait(self->setRange(cx, KeyRangeRef(range.begin, activeRange.end), false, self->tenantName));
|
||||
wait(cx->unblobbifyRange(KeyRangeRef(range.begin, activeRange.end), self->tenantName));
|
||||
ASSERT(!unblobbifyFail2);
|
||||
|
||||
bool unblobbifyFail3 =
|
||||
wait(self->setRange(cx, KeyRangeRef(activeRange.begin, range.end), false, self->tenantName));
|
||||
wait(cx->unblobbifyRange(KeyRangeRef(activeRange.begin, range.end), self->tenantName));
|
||||
ASSERT(!unblobbifyFail3);
|
||||
|
||||
bool unblobbifyFail4 =
|
||||
wait(self->setRange(cx, KeyRangeRef(activeRange.begin, middleKey), false, self->tenantName));
|
||||
wait(cx->unblobbifyRange(KeyRangeRef(activeRange.begin, middleKey), self->tenantName));
|
||||
ASSERT(!unblobbifyFail4);
|
||||
|
||||
bool unblobbifyFail5 =
|
||||
wait(self->setRange(cx, KeyRangeRef(middleKey, activeRange.end), false, self->tenantName));
|
||||
bool unblobbifyFail5 = wait(cx->unblobbifyRange(KeyRangeRef(middleKey, activeRange.end), self->tenantName));
|
||||
ASSERT(!unblobbifyFail5);
|
||||
|
||||
bool unblobbifyFail6 =
|
||||
wait(self->setRange(cx, KeyRangeRef(activeRange.begin, middleKey), false, self->tenantName));
|
||||
wait(cx->unblobbifyRange(KeyRangeRef(activeRange.begin, middleKey), self->tenantName));
|
||||
ASSERT(!unblobbifyFail6);
|
||||
|
||||
bool unblobbifyFail7 =
|
||||
wait(self->setRange(cx, KeyRangeRef(middleKey, activeRange.end), false, self->tenantName));
|
||||
bool unblobbifyFail7 = wait(cx->unblobbifyRange(KeyRangeRef(middleKey, activeRange.end), self->tenantName));
|
||||
ASSERT(!unblobbifyFail7);
|
||||
|
||||
bool unblobbifyFail8 =
|
||||
wait(self->setRange(cx, KeyRangeRef(middleKey, middleKey2), false, self->tenantName));
|
||||
bool unblobbifyFail8 = wait(cx->unblobbifyRange(KeyRangeRef(middleKey, middleKey2), self->tenantName));
|
||||
ASSERT(!unblobbifyFail8);
|
||||
|
||||
bool unblobbifySuccess = wait(self->setRange(cx, activeRange, true, self->tenantName));
|
||||
bool unblobbifySuccess = wait(cx->unblobbifyRange(activeRange, self->tenantName));
|
||||
ASSERT(unblobbifySuccess);
|
||||
|
||||
bool unblobbifySuccessAgain = wait(self->setRange(cx, activeRange, true, self->tenantName));
|
||||
bool unblobbifySuccessAgain = wait(cx->unblobbifyRange(activeRange, self->tenantName));
|
||||
ASSERT(unblobbifySuccessAgain);
|
||||
}
|
||||
|
||||
|
@ -642,7 +629,7 @@ struct BlobGranuleRangesWorkload : TestWorkload {
|
|||
}
|
||||
|
||||
ACTOR Future<Void> reBlobbifyUnit(Database cx, BlobGranuleRangesWorkload* self, KeyRange range) {
|
||||
bool setSuccess = wait(self->setRange(cx, range, true, self->tenantName));
|
||||
bool setSuccess = wait(cx->blobbifyRange(range, self->tenantName));
|
||||
ASSERT(setSuccess);
|
||||
wait(self->checkRange(cx, self, range, true));
|
||||
|
||||
|
@ -651,11 +638,11 @@ struct BlobGranuleRangesWorkload : TestWorkload {
|
|||
wait(cx->waitPurgeGranulesComplete(purgeKey));
|
||||
wait(self->checkRange(cx, self, range, false));
|
||||
|
||||
bool unsetSuccess = wait(self->setRange(cx, range, false, self->tenantName));
|
||||
bool unsetSuccess = wait(cx->unblobbifyRange(range, self->tenantName));
|
||||
ASSERT(unsetSuccess);
|
||||
wait(self->checkRange(cx, self, range, false));
|
||||
|
||||
bool reSetSuccess = wait(self->setRange(cx, range, true, self->tenantName));
|
||||
bool reSetSuccess = wait(cx->blobbifyRange(range, self->tenantName));
|
||||
ASSERT(reSetSuccess);
|
||||
wait(self->checkRange(cx, self, range, true));
|
||||
|
||||
|
|
|
@ -305,6 +305,8 @@ struct BlobGranuleVerifierWorkload : TestWorkload {
|
|||
state Version prevPurgeVersion = -1;
|
||||
state UID dbgId = debugRandom()->randomUniqueID();
|
||||
state Version newPurgeVersion = 0;
|
||||
// usually we want randomness to verify maximum data, but sometimes hotspotting a subset is good too
|
||||
state bool pickGranuleUniform = deterministicRandom()->random01() < 0.1;
|
||||
|
||||
TraceEvent("BlobGranuleVerifierStart");
|
||||
if (BGV_DEBUG) {
|
||||
|
@ -458,7 +460,13 @@ struct BlobGranuleVerifierWorkload : TestWorkload {
|
|||
}
|
||||
|
||||
// pick a random range
|
||||
int rIndex = deterministicRandom()->randomInt(0, self->granuleRanges.get().size());
|
||||
size_t granuleCount = self->granuleRanges.get().size();
|
||||
size_t rIndex;
|
||||
if (pickGranuleUniform) {
|
||||
rIndex = deterministicRandom()->randomInt(0, granuleCount);
|
||||
} else {
|
||||
rIndex = deterministicRandom()->randomSkewedUInt32(0, granuleCount);
|
||||
}
|
||||
state KeyRange range = self->granuleRanges.get()[rIndex];
|
||||
|
||||
state std::pair<RangeResult, Version> fdb = wait(readFromFDB(cx, range));
|
||||
|
|
|
@ -30,7 +30,7 @@
|
|||
#include "flow/IRateControl.h"
|
||||
#include "fdbrpc/simulator.h"
|
||||
#include "fdbserver/Knobs.h"
|
||||
#include "fdbserver/StorageMetrics.h"
|
||||
#include "fdbserver/StorageMetrics.actor.h"
|
||||
#include "fdbserver/DataDistribution.actor.h"
|
||||
#include "fdbserver/QuietDatabase.h"
|
||||
#include "fdbserver/TSSMappingUtil.actor.h"
|
||||
|
@ -394,6 +394,7 @@ struct ConsistencyCheckWorkload : TestWorkload {
|
|||
state Standalone<VectorRef<KeyValueRef>>
|
||||
serverList; // "\xff/serverList/[[serverID]]" := "[[StorageServerInterface]]"
|
||||
state Standalone<VectorRef<KeyValueRef>> serverTag; // "\xff/serverTag/[[serverID]]" = "[[Tag]]"
|
||||
state bool testResult = true;
|
||||
|
||||
std::vector<Future<bool>> cacheResultsPromise;
|
||||
cacheResultsPromise.push_back(self->fetchKeyValuesFromSS(cx, self, storageCacheKeys, cacheKeyPromise, true));
|
||||
|
@ -581,7 +582,7 @@ struct ConsistencyCheckWorkload : TestWorkload {
|
|||
for (j = 0; j < keyValueFutures.size(); j++) {
|
||||
ErrorOr<GetKeyValuesReply> rangeResult = keyValueFutures[j].get();
|
||||
// if (rangeResult.isError()) {
|
||||
// throw rangeResult.getError();
|
||||
// throw rangeResult.getError();
|
||||
// }
|
||||
|
||||
// Compare the results with other storage servers
|
||||
|
@ -709,7 +710,7 @@ struct ConsistencyCheckWorkload : TestWorkload {
|
|||
.detail("MatchingKVPairs", matchingKVPairs);
|
||||
|
||||
self->testFailure("Data inconsistent", true);
|
||||
return false;
|
||||
testResult = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -755,7 +756,7 @@ struct ConsistencyCheckWorkload : TestWorkload {
|
|||
.detail("BytesRead", bytesReadInRange);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
return testResult;
|
||||
}
|
||||
|
||||
// Directly fetch key/values from storage servers through GetKeyValuesRequest
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
|
||||
#include "fdbclient/FDBTypes.h"
|
||||
#include "fdbclient/Knobs.h"
|
||||
#include "fdbclient/ManagementAPI.actor.h"
|
||||
#include "fdbclient/SystemData.h"
|
||||
#include "fdbclient/ReadYourWrites.h"
|
||||
#include "fdbrpc/simulator.h"
|
||||
|
@ -150,6 +151,7 @@ struct IncrementalBackupWorkload : TestWorkload {
|
|||
|
||||
if (self->submitOnly) {
|
||||
TraceEvent("IBackupSubmitAttempt").log();
|
||||
state DatabaseConfiguration configuration = wait(getDatabaseConfiguration(cx));
|
||||
try {
|
||||
wait(self->backupAgent.submitBackup(cx,
|
||||
self->backupDir,
|
||||
|
@ -158,7 +160,8 @@ struct IncrementalBackupWorkload : TestWorkload {
|
|||
1e8,
|
||||
self->tag.toString(),
|
||||
backupRanges,
|
||||
SERVER_KNOBS->ENABLE_ENCRYPTION,
|
||||
SERVER_KNOBS->ENABLE_ENCRYPTION &&
|
||||
configuration.tenantMode != TenantMode::OPTIONAL_TENANT,
|
||||
StopWhenDone::False,
|
||||
UsePartitionedLog::False,
|
||||
IncrementalBackupOnly::True));
|
||||
|
@ -227,19 +230,56 @@ struct IncrementalBackupWorkload : TestWorkload {
|
|||
.detail("Size", containers.size())
|
||||
.detail("First", containers.front());
|
||||
state Key backupURL = Key(containers.front());
|
||||
|
||||
state Standalone<VectorRef<KeyRangeRef>> restoreRange;
|
||||
state Standalone<VectorRef<KeyRangeRef>> systemRestoreRange;
|
||||
for (auto r : backupRanges) {
|
||||
if (!SERVER_KNOBS->ENABLE_ENCRYPTION || !r.intersects(getSystemBackupRanges())) {
|
||||
restoreRange.push_back_deep(restoreRange.arena(), r);
|
||||
} else {
|
||||
KeyRangeRef normalKeyRange = r & normalKeys;
|
||||
KeyRangeRef systemKeyRange = r & systemKeys;
|
||||
if (!normalKeyRange.empty()) {
|
||||
restoreRange.push_back_deep(restoreRange.arena(), normalKeyRange);
|
||||
}
|
||||
if (!systemKeyRange.empty()) {
|
||||
systemRestoreRange.push_back_deep(systemRestoreRange.arena(), systemKeyRange);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!systemRestoreRange.empty()) {
|
||||
TraceEvent("IBackupSystemRestoreAttempt").detail("BeginVersion", beginVersion);
|
||||
wait(success(self->backupAgent.restore(cx,
|
||||
cx,
|
||||
"system_restore"_sr,
|
||||
backupURL,
|
||||
{},
|
||||
systemRestoreRange,
|
||||
WaitForComplete::True,
|
||||
invalidVersion,
|
||||
Verbose::True,
|
||||
Key(),
|
||||
Key(),
|
||||
LockDB::True,
|
||||
UnlockDB::True,
|
||||
OnlyApplyMutationLogs::True,
|
||||
InconsistentSnapshotOnly::False,
|
||||
beginVersion)));
|
||||
}
|
||||
TraceEvent("IBackupRestoreAttempt").detail("BeginVersion", beginVersion);
|
||||
wait(success(self->backupAgent.restore(cx,
|
||||
cx,
|
||||
Key(self->tag.toString()),
|
||||
backupURL,
|
||||
{},
|
||||
backupRanges,
|
||||
restoreRange,
|
||||
WaitForComplete::True,
|
||||
invalidVersion,
|
||||
Verbose::True,
|
||||
Key(),
|
||||
Key(),
|
||||
LockDB::True,
|
||||
UnlockDB::True,
|
||||
OnlyApplyMutationLogs::True,
|
||||
InconsistentSnapshotOnly::False,
|
||||
beginVersion)));
|
||||
|
|
|
@ -24,6 +24,7 @@
|
|||
#include "fdbrpc/simulator.h"
|
||||
#include "fdbclient/BackupAgent.actor.h"
|
||||
#include "fdbclient/BackupContainer.h"
|
||||
#include "fdbserver/Knobs.h"
|
||||
#include "fdbserver/workloads/workloads.actor.h"
|
||||
#include "flow/actorcompiler.h" // This must be the last #include.
|
||||
|
||||
|
@ -113,14 +114,43 @@ struct RestoreBackupWorkload : TestWorkload {
|
|||
wait(delay(self->delayFor));
|
||||
wait(waitOnBackup(self, cx));
|
||||
wait(clearDatabase(cx));
|
||||
wait(success(self->backupAgent.restore(cx,
|
||||
cx,
|
||||
self->tag,
|
||||
Key(self->backupContainer->getURL()),
|
||||
self->backupContainer->getProxy(),
|
||||
WaitForComplete::True,
|
||||
::invalidVersion,
|
||||
Verbose::True)));
|
||||
if (SERVER_KNOBS->ENABLE_ENCRYPTION) {
|
||||
// restore system keys
|
||||
VectorRef<KeyRangeRef> systemBackupRanges = getSystemBackupRanges();
|
||||
state std::vector<Future<Version>> restores;
|
||||
for (int i = 0; i < systemBackupRanges.size(); i++) {
|
||||
restores.push_back((self->backupAgent.restore(cx,
|
||||
cx,
|
||||
"system_restore"_sr,
|
||||
Key(self->backupContainer->getURL()),
|
||||
self->backupContainer->getProxy(),
|
||||
WaitForComplete::True,
|
||||
::invalidVersion,
|
||||
Verbose::True,
|
||||
systemBackupRanges[i])));
|
||||
}
|
||||
waitForAll(restores);
|
||||
// restore non-system keys
|
||||
wait(success(self->backupAgent.restore(cx,
|
||||
cx,
|
||||
self->tag,
|
||||
Key(self->backupContainer->getURL()),
|
||||
self->backupContainer->getProxy(),
|
||||
WaitForComplete::True,
|
||||
::invalidVersion,
|
||||
Verbose::True,
|
||||
normalKeys)));
|
||||
} else {
|
||||
wait(success(self->backupAgent.restore(cx,
|
||||
cx,
|
||||
self->tag,
|
||||
Key(self->backupContainer->getURL()),
|
||||
self->backupContainer->getProxy(),
|
||||
WaitForComplete::True,
|
||||
::invalidVersion,
|
||||
Verbose::True)));
|
||||
}
|
||||
|
||||
return Void();
|
||||
}
|
||||
|
||||
|
|
|
@ -18,9 +18,11 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "fdbclient/SystemData.h"
|
||||
#include "fdbrpc/simulator.h"
|
||||
#include "fdbclient/BackupAgent.actor.h"
|
||||
#include "fdbclient/BackupContainer.h"
|
||||
#include "fdbserver/Knobs.h"
|
||||
#include "fdbserver/workloads/BlobStoreWorkload.h"
|
||||
#include "fdbserver/workloads/workloads.actor.h"
|
||||
#include "fdbserver/workloads/BulkSetup.actor.h"
|
||||
|
@ -52,13 +54,22 @@ struct RestoreFromBlobWorkload : TestWorkload {
|
|||
|
||||
ACTOR static Future<Void> _start(Database cx, RestoreFromBlobWorkload* self) {
|
||||
state FileBackupAgent backupAgent;
|
||||
state Standalone<VectorRef<KeyRangeRef>> restoreRanges;
|
||||
|
||||
addDefaultBackupRanges(restoreRanges);
|
||||
|
||||
wait(delay(self->restoreAfter));
|
||||
Version v = wait(
|
||||
backupAgent.restore(cx, {}, self->backupTag, self->backupURL, {}, restoreRanges, self->waitForComplete));
|
||||
if (SERVER_KNOBS->ENABLE_ENCRYPTION) {
|
||||
// restore system keys followed by user keys
|
||||
wait(success(backupAgent.restore(
|
||||
cx, {}, self->backupTag, self->backupURL, {}, getSystemBackupRanges(), self->waitForComplete)));
|
||||
Standalone<VectorRef<KeyRangeRef>> restoreRanges;
|
||||
restoreRanges.push_back_deep(restoreRanges.arena(), normalKeys);
|
||||
wait(success(backupAgent.restore(
|
||||
cx, {}, self->backupTag, self->backupURL, {}, restoreRanges, self->waitForComplete)));
|
||||
} else {
|
||||
Standalone<VectorRef<KeyRangeRef>> restoreRanges;
|
||||
addDefaultBackupRanges(restoreRanges);
|
||||
wait(success(backupAgent.restore(
|
||||
cx, {}, self->backupTag, self->backupURL, {}, restoreRanges, self->waitForComplete)));
|
||||
}
|
||||
return Void();
|
||||
}
|
||||
|
||||
|
|
|
@ -38,17 +38,17 @@ struct StorageQuotaWorkload : TestWorkload {
|
|||
wait(setStorageQuotaHelper(cx, "name2"_sr, 200));
|
||||
wait(setStorageQuotaHelper(cx, "name1"_sr, 300));
|
||||
|
||||
state Optional<uint64_t> quota1 = wait(getStorageQuotaHelper(cx, "name1"_sr));
|
||||
state Optional<int64_t> quota1 = wait(getStorageQuotaHelper(cx, "name1"_sr));
|
||||
ASSERT(quota1.present() && quota1.get() == 300);
|
||||
state Optional<uint64_t> quota2 = wait(getStorageQuotaHelper(cx, "name2"_sr));
|
||||
state Optional<int64_t> quota2 = wait(getStorageQuotaHelper(cx, "name2"_sr));
|
||||
ASSERT(quota2.present() && quota2.get() == 200);
|
||||
state Optional<uint64_t> quota3 = wait(getStorageQuotaHelper(cx, "name3"_sr));
|
||||
state Optional<int64_t> quota3 = wait(getStorageQuotaHelper(cx, "name3"_sr));
|
||||
ASSERT(!quota3.present());
|
||||
|
||||
return Void();
|
||||
}
|
||||
|
||||
ACTOR static Future<Void> setStorageQuotaHelper(Database cx, StringRef tenantName, uint64_t quota) {
|
||||
ACTOR static Future<Void> setStorageQuotaHelper(Database cx, StringRef tenantName, int64_t quota) {
|
||||
state Transaction tr(cx);
|
||||
loop {
|
||||
try {
|
||||
|
@ -61,11 +61,11 @@ struct StorageQuotaWorkload : TestWorkload {
|
|||
}
|
||||
}
|
||||
|
||||
ACTOR static Future<Optional<uint64_t>> getStorageQuotaHelper(Database cx, StringRef tenantName) {
|
||||
ACTOR static Future<Optional<int64_t>> getStorageQuotaHelper(Database cx, StringRef tenantName) {
|
||||
state Transaction tr(cx);
|
||||
loop {
|
||||
try {
|
||||
state Optional<uint64_t> quota = wait(getStorageQuota(&tr, tenantName));
|
||||
state Optional<int64_t> quota = wait(getStorageQuota(&tr, tenantName));
|
||||
wait(tr.commit());
|
||||
return quota;
|
||||
} catch (Error& e) {
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
*/
|
||||
|
||||
#include "fdbclient/FDBTypes.h"
|
||||
#include "fdbclient/ManagementAPI.actor.h"
|
||||
#include "fdbclient/ReadYourWrites.h"
|
||||
#include "fdbrpc/simulator.h"
|
||||
#include "fdbclient/BackupAgent.actor.h"
|
||||
|
@ -52,8 +53,9 @@ struct SubmitBackupWorkload : TestWorkload {
|
|||
|
||||
ACTOR static Future<Void> _start(SubmitBackupWorkload* self, Database cx) {
|
||||
wait(delay(self->delayFor));
|
||||
Standalone<VectorRef<KeyRangeRef>> backupRanges;
|
||||
state Standalone<VectorRef<KeyRangeRef>> backupRanges;
|
||||
addDefaultBackupRanges(backupRanges);
|
||||
state DatabaseConfiguration configuration = wait(getDatabaseConfiguration(cx));
|
||||
try {
|
||||
wait(self->backupAgent.submitBackup(cx,
|
||||
self->backupDir,
|
||||
|
@ -62,7 +64,8 @@ struct SubmitBackupWorkload : TestWorkload {
|
|||
self->snapshotInterval,
|
||||
self->tag.toString(),
|
||||
backupRanges,
|
||||
SERVER_KNOBS->ENABLE_ENCRYPTION,
|
||||
SERVER_KNOBS->ENABLE_ENCRYPTION &&
|
||||
configuration.tenantMode != TenantMode::OPTIONAL_TENANT,
|
||||
self->stopWhenDone,
|
||||
UsePartitionedLog::False,
|
||||
self->incremental));
|
||||
|
|
|
@ -43,7 +43,7 @@ struct TagThrottleApiWorkload : TestWorkload {
|
|||
}
|
||||
|
||||
Future<Void> start(Database const& cx) override {
|
||||
if (this->clientId != 0)
|
||||
if (SERVER_KNOBS->GLOBAL_TAG_THROTTLING || this->clientId != 0)
|
||||
return Void();
|
||||
return timeout(runThrottleApi(this, cx), testDuration, Void());
|
||||
}
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* GlobalTagThrottling.actor.cpp
|
||||
* ThroughputQuota.actor.cpp
|
||||
*
|
||||
* This source file is part of the FoundationDB open source project
|
||||
*
|
||||
|
@ -23,42 +23,46 @@
|
|||
|
||||
#include "flow/actorcompiler.h" // This must be the last #include.
|
||||
|
||||
class GlobalTagThrottlingWorkload : public TestWorkload {
|
||||
// This workload sets the throughput quota of a tag during the setup phase
|
||||
class ThroughputQuotaWorkload : public TestWorkload {
|
||||
TransactionTag transactionTag;
|
||||
double reservedQuota{ 0.0 };
|
||||
double totalQuota{ 0.0 };
|
||||
|
||||
ACTOR static Future<Void> setup(GlobalTagThrottlingWorkload* self, Database cx) {
|
||||
ACTOR static Future<Void> setup(ThroughputQuotaWorkload* self, Database cx) {
|
||||
state Reference<ReadYourWritesTransaction> tr = makeReference<ReadYourWritesTransaction>(cx);
|
||||
loop {
|
||||
try {
|
||||
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||
TraceEvent("GlobalTagThrottlingWorkload_SettingTagQuota")
|
||||
.detail("Tag", self->transactionTag)
|
||||
TraceEvent("ThroughputQuotaWorkload_SettingTagQuota")
|
||||
.detail("Tag", printable(self->transactionTag))
|
||||
.detail("ReservedQuota", self->reservedQuota)
|
||||
.detail("TotalQuota", self->totalQuota);
|
||||
ThrottleApi::setTagQuota(tr, self->transactionTag, self->reservedQuota, self->totalQuota);
|
||||
wait(tr->commit());
|
||||
return Void();
|
||||
} catch (Error& e) {
|
||||
TraceEvent("GlobalTagThrottlingWorkload_SetupError").error(e);
|
||||
TraceEvent("ThroughputQuotaWorkload_SetupError").error(e);
|
||||
wait(tr->onError(e));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
public:
|
||||
static constexpr auto NAME = "GlobalTagThrottling";
|
||||
explicit GlobalTagThrottlingWorkload(WorkloadContext const& wcx) : TestWorkload(wcx) {
|
||||
static constexpr auto NAME = "ThroughputQuota";
|
||||
explicit ThroughputQuotaWorkload(WorkloadContext const& wcx) : TestWorkload(wcx) {
|
||||
transactionTag = getOption(options, "transactionTag"_sr, "sampleTag"_sr);
|
||||
reservedQuota = getOption(options, "reservedQuota"_sr, 0.0);
|
||||
totalQuota = getOption(options, "totalQuota"_sr, 0.0);
|
||||
}
|
||||
|
||||
Future<Void> setup(Database const& cx) override { return clientId ? Void() : setup(this, cx); }
|
||||
Future<Void> setup(Database const& cx) override {
|
||||
DatabaseContext::debugUseTags = true;
|
||||
return clientId ? Void() : setup(this, cx);
|
||||
}
|
||||
Future<Void> start(Database const& cx) override { return Void(); }
|
||||
Future<bool> check(Database const& cx) override { return true; }
|
||||
void getMetrics(std::vector<PerfMetric>& m) override {}
|
||||
};
|
||||
|
||||
WorkloadFactory<GlobalTagThrottlingWorkload> GlobalTagThrottlingWorkloadFactory;
|
||||
WorkloadFactory<ThroughputQuotaWorkload> ThroughputQuotaWorkloadFactory;
|
|
@ -131,6 +131,7 @@ ERROR( please_reboot_kv_store, 1219, "Need to reboot the storage engine")
|
|||
ERROR( incompatible_software_version, 1220, "Current software does not support database format" )
|
||||
ERROR( audit_storage_failed, 1221, "Validate storage consistency operation failed" )
|
||||
ERROR( audit_storage_exceeded_request_limit, 1222, "Exceeded the max number of allowed concurrent audit storage requests" )
|
||||
ERROR( proxy_tag_throttled, 1223, "Exceeded maximum proxy tag throttling duration" )
|
||||
|
||||
// 15xx Platform errors
|
||||
ERROR( platform_error, 1500, "Platform error" )
|
||||
|
|
|
@ -178,13 +178,13 @@ RUN yum -y install \
|
|||
rm -rf /var/cache/yum
|
||||
|
||||
WORKDIR /tmp
|
||||
RUN curl -Ls https://amazon-eks.s3.amazonaws.com/1.19.6/2021-01-05/bin/linux/amd64/kubectl -o kubectl && \
|
||||
echo "08ff68159bbcb844455167abb1d0de75bbfe5ae1b051f81ab060a1988027868a kubectl" > kubectl.txt && \
|
||||
RUN curl -Ls https://s3.us-west-2.amazonaws.com/amazon-eks/1.22.6/2022-03-09/bin/linux/amd64/kubectl -o kubectl && \
|
||||
echo "860c3d37a5979491895767e7332404d28dc0d7797c7673c33df30ca80e215a07 kubectl" > kubectl.txt && \
|
||||
sha256sum --quiet -c kubectl.txt && \
|
||||
mv kubectl /usr/local/bin/kubectl && \
|
||||
chmod 755 /usr/local/bin/kubectl && \
|
||||
curl -Ls https://awscli.amazonaws.com/awscli-exe-linux-x86_64-2.2.43.zip -o "awscliv2.zip" && \
|
||||
echo "9a8b3c4e7f72bbcc55e341dce3af42479f2730c225d6d265ee6f9162cfdebdfd awscliv2.zip" > awscliv2.txt && \
|
||||
curl -Ls https://awscli.amazonaws.com/awscli-exe-linux-x86_64-2.7.34.zip -o "awscliv2.zip" && \
|
||||
echo "daf9253f0071b5cfee9532bc5220bedd7a5d29d4e0f92b42b9e3e4c496341e88 awscliv2.zip" > awscliv2.txt && \
|
||||
sha256sum --quiet -c awscliv2.txt && \
|
||||
unzip -qq awscliv2.zip && \
|
||||
./aws/install && \
|
||||
|
|
|
@ -53,13 +53,13 @@ RUN curl -Ls https://github.com/krallin/tini/releases/download/v0.19.0/tini-amd6
|
|||
mv tini /usr/bin/ && \
|
||||
rm -rf /tmp/*
|
||||
|
||||
RUN curl -Ls https://amazon-eks.s3.amazonaws.com/1.19.6/2021-01-05/bin/linux/amd64/kubectl -o kubectl && \
|
||||
echo "08ff68159bbcb844455167abb1d0de75bbfe5ae1b051f81ab060a1988027868a kubectl" > kubectl.txt && \
|
||||
RUN curl -Ls https://s3.us-west-2.amazonaws.com/amazon-eks/1.22.6/2022-03-09/bin/linux/amd64/kubectl -o kubectl && \
|
||||
echo "860c3d37a5979491895767e7332404d28dc0d7797c7673c33df30ca80e215a07 kubectl" > kubectl.txt && \
|
||||
sha256sum --quiet -c kubectl.txt && \
|
||||
mv kubectl /usr/local/bin/kubectl && \
|
||||
chmod 755 /usr/local/bin/kubectl && \
|
||||
curl -Ls https://awscli.amazonaws.com/awscli-exe-linux-x86_64-2.2.43.zip -o "awscliv2.zip" && \
|
||||
echo "9a8b3c4e7f72bbcc55e341dce3af42479f2730c225d6d265ee6f9162cfdebdfd awscliv2.zip" > awscliv2.txt && \
|
||||
curl -Ls https://awscli.amazonaws.com/awscli-exe-linux-x86_64-2.7.34.zip -o "awscliv2.zip" && \
|
||||
echo "daf9253f0071b5cfee9532bc5220bedd7a5d29d4e0f92b42b9e3e4c496341e88 awscliv2.zip" > awscliv2.txt && \
|
||||
sha256sum --quiet -c awscliv2.txt && \
|
||||
unzip -qq awscliv2.zip && \
|
||||
./aws/install && \
|
||||
|
|
|
@ -1,22 +1,44 @@
|
|||
#!/usr/bin/env bash
|
||||
set -Eeuxo pipefail
|
||||
set -Eeuo pipefail
|
||||
|
||||
function logg () {
|
||||
printf "##### $(date +'%Y-%m-%dT%H:%M:%SZ') # %-56.55s #####\n" "${1}"
|
||||
}
|
||||
|
||||
function error_exit () {
|
||||
echo "################################################################################"
|
||||
logg "${0} FAILED"
|
||||
logg "RUN_ID: ${RUN_ID}"
|
||||
logg "WORKLOAD: ${WORKLOAD}"
|
||||
logg "ENVIRONMENT IS:"
|
||||
env
|
||||
echo "################################################################################"
|
||||
}
|
||||
|
||||
trap error_exit ERR
|
||||
|
||||
namespace=$(cat /var/run/secrets/kubernetes.io/serviceaccount/namespace)
|
||||
POD_NUM=$(echo $POD_NAME | cut -d - -f3)
|
||||
KEY="ycsb_load_${POD_NUM}_of_${NUM_PODS}_complete"
|
||||
CLI=$(ls /var/dynamic-conf/bin/*/fdbcli | head -n1)
|
||||
|
||||
echo "WAITING FOR ALL PODS TO COME UP"
|
||||
while [[ $(kubectl get pods -n ${namespace} -l name=ycsb,run=${RUN_ID} --field-selector=status.phase=Running | grep -cv NAME) -lt ${NUM_PODS} ]]; do
|
||||
logg "WAITING FOR ${NUM_PODS} PODS TO COME UP IN ${namespace}"
|
||||
while [[ $(kubectl get pods -n "${namespace}" -l name=ycsb,run="${RUN_ID}" --field-selector=status.phase=Running | grep -cv NAME) -lt ${NUM_PODS} ]]; do
|
||||
sleep 1
|
||||
done
|
||||
echo "ALL PODS ARE UP"
|
||||
logg "${NUM_PODS} PODS ARE UP IN ${namespace}"
|
||||
|
||||
echo "RUNNING YCSB"
|
||||
./bin/ycsb.sh ${MODE} foundationdb -s -P workloads/${WORKLOAD} ${YCSB_ARGS}
|
||||
echo "YCSB FINISHED"
|
||||
logg "RUNNING YCSB ${WORKLOAD}"
|
||||
set -x
|
||||
./bin/ycsb.sh "${MODE}" foundationdb -s -P "workloads/${WORKLOAD}" "${YCSB_ARGS}"
|
||||
set +x
|
||||
logg "YCSB ${WORKLOAD} FINISHED"
|
||||
|
||||
echo "COPYING HISTOGRAMS TO S3"
|
||||
aws s3 sync --sse aws:kms --exclude "*" --include "histogram.*" /tmp s3://${BUCKET}/ycsb_histograms/${namespace}/${POD_NAME}
|
||||
echo "COPYING HISTOGRAMS TO S3 FINISHED"
|
||||
logg "COPYING HISTOGRAMS TO S3"
|
||||
set -x
|
||||
aws s3 sync --sse aws:kms --exclude "*" --include "histogram.*" /tmp "s3://${BUCKET}/ycsb_histograms/${namespace}/${POD_NAME}"
|
||||
set +x
|
||||
logg "COPYING HISTOGRAMS TO S3 FINISHED"
|
||||
|
||||
echo "################################################################################"
|
||||
logg "COMPLETED ${0}"
|
||||
logg "RUN_ID: ${RUN_ID}"
|
||||
logg "WORKLOAD: ${WORKLOAD}"
|
||||
echo "################################################################################"
|
||||
|
|
|
@ -226,7 +226,6 @@ if(WITH_PYTHON)
|
|||
add_fdb_test(TEST_FILES rare/CycleWithDeadHall.toml)
|
||||
add_fdb_test(TEST_FILES rare/DataDistributionMetrics.toml)
|
||||
add_fdb_test(TEST_FILES rare/FuzzTest.toml)
|
||||
add_fdb_test(TEST_FILES rare/GlobalTagThrottling.toml IGNORE)
|
||||
add_fdb_test(TEST_FILES rare/HighContentionPrefixAllocator.toml)
|
||||
add_fdb_test(TEST_FILES rare/InventoryTestHeavyWrites.toml)
|
||||
add_fdb_test(TEST_FILES rare/LargeApiCorrectness.toml)
|
||||
|
@ -240,6 +239,7 @@ if(WITH_PYTHON)
|
|||
add_fdb_test(TEST_FILES rare/RedwoodCorrectnessBTree.toml)
|
||||
add_fdb_test(TEST_FILES rare/RedwoodDeltaTree.toml)
|
||||
add_fdb_test(TEST_FILES rare/Throttling.toml)
|
||||
add_fdb_test(TEST_FILES rare/ThroughputQuota.toml)
|
||||
add_fdb_test(TEST_FILES rare/TransactionTagApiCorrectness.toml)
|
||||
add_fdb_test(TEST_FILES rare/TransactionTagSwizzledApiCorrectness.toml)
|
||||
add_fdb_test(TEST_FILES rare/WriteTagThrottling.toml)
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue