Merge branch 'master' of github.com:apple/foundationdb into RedwoodSuperpage

# Conflicts:
#	fdbserver/VersionedBTree.actor.cpp
This commit is contained in:
Steve Atherton 2021-10-25 00:55:52 -07:00
commit b75edbda31
106 changed files with 2793 additions and 1100 deletions

View File

@ -2,6 +2,7 @@
#include <fcntl.h>
#include <getopt.h>
#include <math.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@ -1065,7 +1066,8 @@ void* worker_thread(void* thread_args) {
int worker_id = ((thread_args_t*)thread_args)->process->worker_id;
int thread_id = ((thread_args_t*)thread_args)->thread_id;
mako_args_t* args = ((thread_args_t*)thread_args)->process->args;
FDBDatabase* database = ((thread_args_t*)thread_args)->process->database;
size_t database_index = ((thread_args_t*)thread_args)->database_index;
FDBDatabase* database = ((thread_args_t*)thread_args)->process->databases[database_index];
fdb_error_t err;
int rc;
FDBTransaction* transaction;
@ -1099,11 +1101,12 @@ void* worker_thread(void* thread_args) {
}
fprintf(debugme,
"DEBUG: worker_id:%d (%d) thread_id:%d (%d) (tid:%lld)\n",
"DEBUG: worker_id:%d (%d) thread_id:%d (%d) database_index:%d (tid:%lld)\n",
worker_id,
args->num_processes,
thread_id,
args->num_threads,
database_index,
(uint64_t)pthread_self());
if (args->tpsmax) {
@ -1231,6 +1234,7 @@ int worker_process_main(mako_args_t* args, int worker_id, mako_shmhdr_t* shm, pi
fprintf(debugme, "DEBUG: worker %d started\n", worker_id);
/* Everything starts from here */
err = fdb_select_api_version(args->api_version);
if (err) {
fprintf(stderr, "ERROR: Failed at %s:%d (%s)\n", __FILE__, __LINE__, fdb_get_error(err));
@ -1291,6 +1295,17 @@ int worker_process_main(mako_args_t* args, int worker_id, mako_shmhdr_t* shm, pi
}
}
if (args->client_threads_per_version > 0) {
err = fdb_network_set_option(
FDB_NET_OPTION_CLIENT_THREADS_PER_VERSION, (uint8_t*)&args->client_threads_per_version, sizeof(uint32_t));
if (err) {
fprintf(stderr,
"ERROR: fdb_network_set_option (FDB_NET_OPTION_CLIENT_THREADS_PER_VERSION) (%d): %s\n",
(uint8_t*)&args->client_threads_per_version,
fdb_get_error(err));
}
}
/* Network thread must be setup before doing anything */
fprintf(debugme, "DEBUG: fdb_setup_network\n");
err = fdb_setup_network();
@ -1328,11 +1343,16 @@ int worker_process_main(mako_args_t* args, int worker_id, mako_shmhdr_t* shm, pi
fdb_future_destroy(f);
#else /* >= 610 */
fdb_create_database(args->cluster_file, &process.database);
#endif
if (args->disable_ryw) {
fdb_database_set_option(process.database, FDB_DB_OPTION_SNAPSHOT_RYW_DISABLE, (uint8_t*)NULL, 0);
for (size_t i = 0; i < args->num_databases; i++) {
size_t cluster_index = args->num_fdb_clusters <= 1 ? 0 : i % args->num_fdb_clusters;
fdb_create_database(args->cluster_files[cluster_index], &process.databases[i]);
fprintf(debugme, "DEBUG: creating database at cluster %s\n", args->cluster_files[cluster_index]);
if (args->disable_ryw) {
fdb_database_set_option(process.databases[i], FDB_DB_OPTION_SNAPSHOT_RYW_DISABLE, (uint8_t*)NULL, 0);
}
}
#endif
fprintf(debugme, "DEBUG: creating %d worker threads\n", args->num_threads);
worker_threads = (pthread_t*)calloc(sizeof(pthread_t), args->num_threads);
if (!worker_threads) {
@ -1349,6 +1369,8 @@ int worker_process_main(mako_args_t* args, int worker_id, mako_shmhdr_t* shm, pi
for (i = 0; i < args->num_threads; i++) {
thread_args[i].thread_id = i;
thread_args[i].database_index = i % args->num_databases;
for (int op = 0; op < MAX_OP; op++) {
if (args->txnspec.ops[op][OP_COUNT] > 0 || op == OP_TRANSACTION || op == OP_COMMIT) {
thread_args[i].block[op] = (lat_block_t*)malloc(sizeof(lat_block_t));
@ -1388,7 +1410,10 @@ failExit:
free(thread_args);
/* clean up database and cluster */
fdb_database_destroy(process.database);
for (size_t i = 0; i < args->num_databases; i++) {
fdb_database_destroy(process.databases[i]);
}
#if FDB_API_VERSION < 610
fdb_cluster_destroy(cluster);
#endif
@ -1414,6 +1439,8 @@ int init_args(mako_args_t* args) {
if (!args)
return -1;
memset(args, 0, sizeof(mako_args_t)); /* zero-out everything */
args->num_fdb_clusters = 0;
args->num_databases = 1;
args->api_version = fdb_get_max_api_version();
args->json = 0;
args->num_processes = 1;
@ -1446,7 +1473,9 @@ int init_args(mako_args_t* args) {
for (i = 0; i < MAX_OP; i++) {
args->txnspec.ops[i][OP_COUNT] = 0;
}
args->client_threads_per_version = 0;
args->disable_ryw = 0;
args->json_output_path[0] = '\0';
return 0;
}
@ -1579,6 +1608,7 @@ void usage() {
printf("%-24s %s\n", "-v, --verbose", "Specify verbosity");
printf("%-24s %s\n", "-a, --api_version=API_VERSION", "Specify API_VERSION to use");
printf("%-24s %s\n", "-c, --cluster=FILE", "Specify FDB cluster file");
printf("%-24s %s\n", "-d, --num_databases=NUM_DATABASES", "Specify number of databases");
printf("%-24s %s\n", "-p, --procs=PROCS", "Specify number of worker processes");
printf("%-24s %s\n", "-t, --threads=THREADS", "Specify number of worker threads");
printf("%-24s %s\n", "-r, --rows=ROWS", "Specify number of records");
@ -1612,6 +1642,7 @@ void usage() {
printf("%-24s %s\n", " --flatbuffers", "Use flatbuffers");
printf("%-24s %s\n", " --streaming", "Streaming mode: all (default), iterator, small, medium, large, serial");
printf("%-24s %s\n", " --disable_ryw", "Disable snapshot read-your-writes");
printf("%-24s %s\n", " --json_report=PATH", "Output stats to the specified json file (Default: mako.json)");
}
/* parse benchmark paramters */
@ -1620,50 +1651,53 @@ int parse_args(int argc, char* argv[], mako_args_t* args) {
int c;
int idx;
while (1) {
const char* short_options = "a:c:p:t:r:s:i:x:v:m:hjz";
static struct option long_options[] = { /* name, has_arg, flag, val */
{ "api_version", required_argument, NULL, 'a' },
{ "cluster", required_argument, NULL, 'c' },
{ "procs", required_argument, NULL, 'p' },
{ "threads", required_argument, NULL, 't' },
{ "rows", required_argument, NULL, 'r' },
{ "seconds", required_argument, NULL, 's' },
{ "iteration", required_argument, NULL, 'i' },
{ "keylen", required_argument, NULL, ARG_KEYLEN },
{ "vallen", required_argument, NULL, ARG_VALLEN },
{ "transaction", required_argument, NULL, 'x' },
{ "tps", required_argument, NULL, ARG_TPS },
{ "tpsmax", required_argument, NULL, ARG_TPSMAX },
{ "tpsmin", required_argument, NULL, ARG_TPSMIN },
{ "tpsinterval", required_argument, NULL, ARG_TPSINTERVAL },
{ "tpschange", required_argument, NULL, ARG_TPSCHANGE },
{ "sampling", required_argument, NULL, ARG_SAMPLING },
{ "verbose", required_argument, NULL, 'v' },
{ "mode", required_argument, NULL, 'm' },
{ "knobs", required_argument, NULL, ARG_KNOBS },
{ "loggroup", required_argument, NULL, ARG_LOGGROUP },
{ "tracepath", required_argument, NULL, ARG_TRACEPATH },
{ "trace_format", required_argument, NULL, ARG_TRACEFORMAT },
{ "streaming", required_argument, NULL, ARG_STREAMING_MODE },
{ "txntrace", required_argument, NULL, ARG_TXNTRACE },
/* no args */
{ "help", no_argument, NULL, 'h' },
{ "json", no_argument, NULL, 'j' },
{ "zipf", no_argument, NULL, 'z' },
{ "commitget", no_argument, NULL, ARG_COMMITGET },
{ "flatbuffers", no_argument, NULL, ARG_FLATBUFFERS },
{ "prefix_padding", no_argument, NULL, ARG_PREFIXPADDING },
{ "trace", no_argument, NULL, ARG_TRACE },
{ "txntagging", required_argument, NULL, ARG_TXNTAGGING },
{ "txntagging_prefix", required_argument, NULL, ARG_TXNTAGGINGPREFIX },
{ "version", no_argument, NULL, ARG_VERSION },
{ "disable_ryw", no_argument, NULL, ARG_DISABLE_RYW },
{ NULL, 0, NULL, 0 }
const char* short_options = "a:c:p:t:r:s:i:x:v:m:hz";
static struct option long_options[] = {
/* name, has_arg, flag, val */
{ "api_version", required_argument, NULL, 'a' },
{ "cluster", required_argument, NULL, 'c' },
{ "procs", required_argument, NULL, 'p' },
{ "threads", required_argument, NULL, 't' },
{ "rows", required_argument, NULL, 'r' },
{ "seconds", required_argument, NULL, 's' },
{ "iteration", required_argument, NULL, 'i' },
{ "keylen", required_argument, NULL, ARG_KEYLEN },
{ "vallen", required_argument, NULL, ARG_VALLEN },
{ "transaction", required_argument, NULL, 'x' },
{ "tps", required_argument, NULL, ARG_TPS },
{ "tpsmax", required_argument, NULL, ARG_TPSMAX },
{ "tpsmin", required_argument, NULL, ARG_TPSMIN },
{ "tpsinterval", required_argument, NULL, ARG_TPSINTERVAL },
{ "tpschange", required_argument, NULL, ARG_TPSCHANGE },
{ "sampling", required_argument, NULL, ARG_SAMPLING },
{ "verbose", required_argument, NULL, 'v' },
{ "mode", required_argument, NULL, 'm' },
{ "knobs", required_argument, NULL, ARG_KNOBS },
{ "loggroup", required_argument, NULL, ARG_LOGGROUP },
{ "tracepath", required_argument, NULL, ARG_TRACEPATH },
{ "trace_format", required_argument, NULL, ARG_TRACEFORMAT },
{ "streaming", required_argument, NULL, ARG_STREAMING_MODE },
{ "txntrace", required_argument, NULL, ARG_TXNTRACE },
/* no args */
{ "help", no_argument, NULL, 'h' },
{ "zipf", no_argument, NULL, 'z' },
{ "commitget", no_argument, NULL, ARG_COMMITGET },
{ "flatbuffers", no_argument, NULL, ARG_FLATBUFFERS },
{ "prefix_padding", no_argument, NULL, ARG_PREFIXPADDING },
{ "trace", no_argument, NULL, ARG_TRACE },
{ "txntagging", required_argument, NULL, ARG_TXNTAGGING },
{ "txntagging_prefix", required_argument, NULL, ARG_TXNTAGGINGPREFIX },
{ "version", no_argument, NULL, ARG_VERSION },
{ "client_threads_per_version", required_argument, NULL, ARG_CLIENT_THREADS_PER_VERSION },
{ "disable_ryw", no_argument, NULL, ARG_DISABLE_RYW },
{ "json_report", optional_argument, NULL, ARG_JSON_REPORT },
{ NULL, 0, NULL, 0 }
};
idx = 0;
c = getopt_long(argc, argv, short_options, long_options, &idx);
if (c < 0)
if (c < 0) {
break;
}
switch (c) {
case '?':
case 'h':
@ -1672,8 +1706,17 @@ int parse_args(int argc, char* argv[], mako_args_t* args) {
case 'a':
args->api_version = atoi(optarg);
break;
case 'c':
strcpy(args->cluster_file, optarg);
case 'c': {
const char delim[] = ",";
char* cluster_file = strtok(optarg, delim);
while (cluster_file != NULL) {
strcpy(args->cluster_files[args->num_fdb_clusters++], cluster_file);
cluster_file = strtok(NULL, delim);
}
break;
}
case 'd':
args->num_databases = atoi(optarg);
break;
case 'p':
args->num_processes = atoi(optarg);
@ -1812,9 +1855,22 @@ int parse_args(int argc, char* argv[], mako_args_t* args) {
}
memcpy(args->txntagging_prefix, optarg, strlen(optarg));
break;
case ARG_CLIENT_THREADS_PER_VERSION:
args->client_threads_per_version = atoi(optarg);
break;
case ARG_DISABLE_RYW:
args->disable_ryw = 1;
break;
case ARG_JSON_REPORT:
if (optarg == NULL && (argv[optind] == NULL || (argv[optind] != NULL && argv[optind][0] == '-'))) {
// if --report_json is the last option and no file is specified
// or --report_json is followed by another option
char default_file[] = "mako.json";
strncpy(args->json_output_path, default_file, strlen(default_file));
} else {
strncpy(args->json_output_path, optarg, strlen(optarg) + 1);
}
break;
}
}
@ -1841,6 +1897,41 @@ int parse_args(int argc, char* argv[], mako_args_t* args) {
return 0;
}
char* get_ops_name(int ops_code) {
switch (ops_code) {
case OP_GETREADVERSION:
return "GRV";
case OP_GET:
return "GET";
case OP_GETRANGE:
return "GETRANGE";
case OP_SGET:
return "SGET";
case OP_SGETRANGE:
return "SGETRANGE";
case OP_UPDATE:
return "UPDATE";
case OP_INSERT:
return "INSERT";
case OP_INSERTRANGE:
return "INSERTRANGE";
case OP_CLEAR:
return "CLEAR";
case OP_SETCLEAR:
return "SETCLEAR";
case OP_CLEARRANGE:
return "CLEARRANGE";
case OP_SETCLEARRANGE:
return "SETCLEARRANGE";
case OP_COMMIT:
return "COMMIT";
case OP_TRANSACTION:
return "TRANSACTION";
default:
return "";
}
}
int validate_args(mako_args_t* args) {
if (args->mode == MODE_INVALID) {
fprintf(stderr, "ERROR: --mode has to be set\n");
@ -1858,6 +1949,28 @@ int validate_args(mako_args_t* args) {
fprintf(stderr, "ERROR: --vallen must be a positive integer\n");
return -1;
}
if (args->num_fdb_clusters > NUM_CLUSTERS_MAX) {
fprintf(stderr, "ERROR: Mako is not supported to do work to more than %d clusters\n", NUM_CLUSTERS_MAX);
return -1;
}
if (args->num_databases > NUM_DATABASES_MAX) {
fprintf(stderr, "ERROR: Mako is not supported to do work to more than %d databases\n", NUM_DATABASES_MAX);
return -1;
}
if (args->num_databases < args->num_fdb_clusters) {
fprintf(stderr,
"ERROR: --num_databases (%d) must be >= number of clusters(%d)\n",
args->num_databases,
args->num_fdb_clusters);
return -1;
}
if (args->num_threads < args->num_databases) {
fprintf(stderr,
"ERROR: --threads (%d) must be >= number of databases (%d)\n",
args->num_threads,
args->num_databases);
return -1;
}
if (args->key_length < 4 /* "mako" */ + digits(args->rows)) {
fprintf(stderr,
"ERROR: --keylen must be larger than %d to store \"mako\" prefix "
@ -1888,7 +2001,7 @@ int validate_args(mako_args_t* args) {
#define STATS_TITLE_WIDTH 12
#define STATS_FIELD_WIDTH 12
void print_stats(mako_args_t* args, mako_stats_t* stats, struct timespec* now, struct timespec* prev) {
void print_stats(mako_args_t* args, mako_stats_t* stats, struct timespec* now, struct timespec* prev, FILE* fp) {
int i, j;
int op;
int print_err;
@ -1901,7 +2014,7 @@ void print_stats(mako_args_t* args, mako_stats_t* stats, struct timespec* now, s
uint64_t totalxacts = 0;
static uint64_t conflicts_prev = 0;
uint64_t conflicts = 0;
double durationns = (now->tv_sec - prev->tv_sec) * 1000000000.0 + (now->tv_nsec - prev->tv_nsec);
double duration_nsec = (now->tv_sec - prev->tv_sec) * 1000000000.0 + (now->tv_nsec - prev->tv_nsec);
for (i = 0; i < args->num_processes; i++) {
for (j = 0; j < args->num_threads; j++) {
@ -1913,10 +2026,18 @@ void print_stats(mako_args_t* args, mako_stats_t* stats, struct timespec* now, s
}
}
}
if (fp) {
fwrite("{", 1, 1, fp);
}
printf("%" STR(STATS_TITLE_WIDTH) "s ", "OPS");
for (op = 0; op < MAX_OP; op++) {
if (args->txnspec.ops[op][OP_COUNT] > 0) {
printf("%" STR(STATS_FIELD_WIDTH) "lld ", ops_total[op] - ops_total_prev[op]);
uint64_t ops_total_diff = ops_total[op] - ops_total_prev[op];
printf("%" STR(STATS_FIELD_WIDTH) "lld ", ops_total_diff);
if (fp) {
fprintf(fp, "\"%s\": %lld,", get_ops_name(op), ops_total_diff);
}
errors_diff[op] = errors_total[op] - errors_total_prev[op];
print_err = (errors_diff[op] > 0);
ops_total_prev[op] = ops_total[op];
@ -1924,11 +2045,19 @@ void print_stats(mako_args_t* args, mako_stats_t* stats, struct timespec* now, s
}
}
/* TPS */
printf("%" STR(STATS_FIELD_WIDTH) ".2f ", (totalxacts - totalxacts_prev) * 1000000000.0 / durationns);
double tps = (totalxacts - totalxacts_prev) * 1000000000.0 / duration_nsec;
printf("%" STR(STATS_FIELD_WIDTH) ".2f ", tps);
if (fp) {
fprintf(fp, "\"tps\": %.2f,", tps);
}
totalxacts_prev = totalxacts;
/* Conflicts */
printf("%" STR(STATS_FIELD_WIDTH) ".2f\n", (conflicts - conflicts_prev) * 1000000000.0 / durationns);
double conflicts_diff = (conflicts - conflicts_prev) * 1000000000.0 / duration_nsec;
printf("%" STR(STATS_FIELD_WIDTH) ".2f\n", conflicts_diff);
if (fp) {
fprintf(fp, "\"conflictsPerSec\": %.2f},", conflicts_diff);
}
conflicts_prev = conflicts;
if (print_err) {
@ -1936,10 +2065,14 @@ void print_stats(mako_args_t* args, mako_stats_t* stats, struct timespec* now, s
for (op = 0; op < MAX_OP; op++) {
if (args->txnspec.ops[op][OP_COUNT] > 0) {
printf("%" STR(STATS_FIELD_WIDTH) "lld ", errors_diff[op]);
if (fp) {
fprintf(fp, "\"errors\": %.2f", conflicts_diff);
}
}
}
printf("\n");
}
return;
}
@ -1953,44 +2086,7 @@ void print_stats_header(mako_args_t* args, bool show_commit, bool is_first_heade
printf(" ");
for (op = 0; op < MAX_OP; op++) {
if (args->txnspec.ops[op][OP_COUNT] > 0) {
switch (op) {
case OP_GETREADVERSION:
printf("%" STR(STATS_FIELD_WIDTH) "s ", "GRV");
break;
case OP_GET:
printf("%" STR(STATS_FIELD_WIDTH) "s ", "GET");
break;
case OP_GETRANGE:
printf("%" STR(STATS_FIELD_WIDTH) "s ", "GETRANGE");
break;
case OP_SGET:
printf("%" STR(STATS_FIELD_WIDTH) "s ", "SGET");
break;
case OP_SGETRANGE:
printf("%" STR(STATS_FIELD_WIDTH) "s ", "SGETRANGE");
break;
case OP_UPDATE:
printf("%" STR(STATS_FIELD_WIDTH) "s ", "UPDATE");
break;
case OP_INSERT:
printf("%" STR(STATS_FIELD_WIDTH) "s ", "INSERT");
break;
case OP_INSERTRANGE:
printf("%" STR(STATS_FIELD_WIDTH) "s ", "INSERTRANGE");
break;
case OP_CLEAR:
printf("%" STR(STATS_FIELD_WIDTH) "s ", "CLEAR");
break;
case OP_SETCLEAR:
printf("%" STR(STATS_FIELD_WIDTH) "s ", "SETCLEAR");
break;
case OP_CLEARRANGE:
printf("%" STR(STATS_FIELD_WIDTH) "s ", "CLEARRANGE");
break;
case OP_SETCLEARRANGE:
printf("%" STR(STATS_FIELD_WIDTH) "s ", "SETCLRRANGE");
break;
}
printf("%" STR(STATS_FIELD_WIDTH) "s ", get_ops_name(op));
}
}
@ -2043,7 +2139,8 @@ void print_report(mako_args_t* args,
mako_stats_t* stats,
struct timespec* timer_now,
struct timespec* timer_start,
pid_t* pid_main) {
pid_t* pid_main,
FILE* fp) {
int i, j, k, op, index;
uint64_t totalxacts = 0;
uint64_t conflicts = 0;
@ -2055,7 +2152,7 @@ void print_report(mako_args_t* args,
uint64_t lat_samples[MAX_OP] = { 0 };
uint64_t lat_max[MAX_OP] = { 0 };
uint64_t durationns =
uint64_t duration_nsec =
(timer_now->tv_sec - timer_start->tv_sec) * 1000000000 + (timer_now->tv_nsec - timer_start->tv_nsec);
for (op = 0; op < MAX_OP; op++) {
@ -2089,7 +2186,8 @@ void print_report(mako_args_t* args,
}
/* overall stats */
printf("\n====== Total Duration %6.3f sec ======\n\n", (double)durationns / 1000000000);
double total_duration = duration_nsec * 1.0 / 1000000000;
printf("\n====== Total Duration %6.3f sec ======\n\n", total_duration);
printf("Total Processes: %8d\n", args->num_processes);
printf("Total Threads: %8d\n", args->num_threads);
if (args->tpsmax == args->tpsmin)
@ -2114,32 +2212,62 @@ void print_report(mako_args_t* args,
printf("Total Xacts: %8lld\n", totalxacts);
printf("Total Conflicts: %8lld\n", conflicts);
printf("Total Errors: %8lld\n", totalerrors);
printf("Overall TPS: %8lld\n\n", totalxacts * 1000000000 / durationns);
printf("Overall TPS: %8lld\n\n", totalxacts * 1000000000 / duration_nsec);
if (fp) {
fprintf(fp, "\"results\": {");
fprintf(fp, "\"totalDuration\": %6.3f,", total_duration);
fprintf(fp, "\"totalProcesses\": %d,", args->num_processes);
fprintf(fp, "\"totalThreads\": %d,", args->num_threads);
fprintf(fp, "\"targetTPS\": %d,", args->tpsmax);
fprintf(fp, "\"totalXacts\": %lld,", totalxacts);
fprintf(fp, "\"totalConflicts\": %lld,", conflicts);
fprintf(fp, "\"totalErrors\": %lld,", totalerrors);
fprintf(fp, "\"overallTPS\": %lld,", totalxacts * 1000000000 / duration_nsec);
}
/* per-op stats */
print_stats_header(args, true, true, false);
/* OPS */
printf("%-" STR(STATS_TITLE_WIDTH) "s ", "Total OPS");
if (fp) {
fprintf(fp, "\"totalOps\": {");
}
for (op = 0; op < MAX_OP; op++) {
if ((args->txnspec.ops[op][OP_COUNT] > 0 && op != OP_TRANSACTION) || op == OP_COMMIT) {
printf("%" STR(STATS_FIELD_WIDTH) "lld ", ops_total[op]);
if (fp) {
fprintf(fp, "\"%s\": %lld,", get_ops_name(op), ops_total[op]);
}
}
}
/* TPS */
printf("%" STR(STATS_FIELD_WIDTH) ".2f ", totalxacts * 1000000000.0 / durationns);
double tps = totalxacts * 1000000000.0 / duration_nsec;
printf("%" STR(STATS_FIELD_WIDTH) ".2f ", tps);
/* Conflicts */
printf("%" STR(STATS_FIELD_WIDTH) ".2f\n", conflicts * 1000000000.0 / durationns);
double conflicts_rate = conflicts * 1000000000.0 / duration_nsec;
printf("%" STR(STATS_FIELD_WIDTH) ".2f\n", conflicts_rate);
if (fp) {
fprintf(fp, "}, \"tps\": %.2f, \"conflictsPerSec\": %.2f, \"errors\": {", tps, conflicts_rate);
}
/* Errors */
printf("%-" STR(STATS_TITLE_WIDTH) "s ", "Errors");
for (op = 0; op < MAX_OP; op++) {
if (args->txnspec.ops[op][OP_COUNT] > 0 && op != OP_TRANSACTION) {
printf("%" STR(STATS_FIELD_WIDTH) "lld ", errors_total[op]);
if (fp) {
fprintf(fp, "\"%s\": %lld,", get_ops_name(op), errors_total[op]);
}
}
}
if (fp) {
fprintf(fp, "}, \"numSamples\": {");
}
printf("\n\n");
printf("%s", "Latency (us)");
@ -2154,11 +2282,17 @@ void print_report(mako_args_t* args,
} else {
printf("%" STR(STATS_FIELD_WIDTH) "s ", "N/A");
}
if (fp) {
fprintf(fp, "\"%s\": %lld,", get_ops_name(op), lat_samples[op]);
}
}
}
printf("\n");
/* Min Latency */
if (fp) {
fprintf(fp, "}, \"minLatency\": {");
}
printf("%-" STR(STATS_TITLE_WIDTH) "s ", "Min");
for (op = 0; op < MAX_OP; op++) {
if (args->txnspec.ops[op][OP_COUNT] > 0 || op == OP_TRANSACTION || op == OP_COMMIT) {
@ -2166,17 +2300,26 @@ void print_report(mako_args_t* args,
printf("%" STR(STATS_FIELD_WIDTH) "s ", "N/A");
} else {
printf("%" STR(STATS_FIELD_WIDTH) "lld ", lat_min[op]);
if (fp) {
fprintf(fp, "\"%s\": %lld,", get_ops_name(op), lat_min[op]);
}
}
}
}
printf("\n");
/* Avg Latency */
if (fp) {
fprintf(fp, "}, \"avgLatency\": {");
}
printf("%-" STR(STATS_TITLE_WIDTH) "s ", "Avg");
for (op = 0; op < MAX_OP; op++) {
if (args->txnspec.ops[op][OP_COUNT] > 0 || op == OP_TRANSACTION || op == OP_COMMIT) {
if (lat_total[op]) {
printf("%" STR(STATS_FIELD_WIDTH) "lld ", lat_total[op] / lat_samples[op]);
if (fp) {
fprintf(fp, "\"%s\": %lld,", get_ops_name(op), lat_total[op] / lat_samples[op]);
}
} else {
printf("%" STR(STATS_FIELD_WIDTH) "s ", "N/A");
}
@ -2185,6 +2328,9 @@ void print_report(mako_args_t* args,
printf("\n");
/* Max Latency */
if (fp) {
fprintf(fp, "}, \"maxLatency\": {");
}
printf("%-" STR(STATS_TITLE_WIDTH) "s ", "Max");
for (op = 0; op < MAX_OP; op++) {
if (args->txnspec.ops[op][OP_COUNT] > 0 || op == OP_TRANSACTION || op == OP_COMMIT) {
@ -2192,6 +2338,9 @@ void print_report(mako_args_t* args,
printf("%" STR(STATS_FIELD_WIDTH) "s ", "N/A");
} else {
printf("%" STR(STATS_FIELD_WIDTH) "lld ", lat_max[op]);
if (fp) {
fprintf(fp, "\"%s\": %lld,", get_ops_name(op), lat_max[op]);
}
}
}
}
@ -2202,6 +2351,9 @@ void print_report(mako_args_t* args,
int point_99_9pct, point_99pct, point_95pct;
/* Median Latency */
if (fp) {
fprintf(fp, "}, \"medianLatency\": {");
}
printf("%-" STR(STATS_TITLE_WIDTH) "s ", "Median");
int num_points[MAX_OP] = { 0 };
for (op = 0; op < MAX_OP; op++) {
@ -2238,6 +2390,9 @@ void print_report(mako_args_t* args,
median = (dataPoints[op][num_points[op] / 2] + dataPoints[op][num_points[op] / 2 - 1]) >> 1;
}
printf("%" STR(STATS_FIELD_WIDTH) "lld ", median);
if (fp) {
fprintf(fp, "\"%s\": %lld,", get_ops_name(op), median);
}
} else {
printf("%" STR(STATS_FIELD_WIDTH) "s ", "N/A");
}
@ -2246,6 +2401,9 @@ void print_report(mako_args_t* args,
printf("\n");
/* 95%ile Latency */
if (fp) {
fprintf(fp, "}, \"p95Latency\": {");
}
printf("%-" STR(STATS_TITLE_WIDTH) "s ", "95.0 pctile");
for (op = 0; op < MAX_OP; op++) {
if (args->txnspec.ops[op][OP_COUNT] > 0 || op == OP_TRANSACTION || op == OP_COMMIT) {
@ -2256,6 +2414,9 @@ void print_report(mako_args_t* args,
if (lat_total[op]) {
point_95pct = ((float)(num_points[op]) * 0.95) - 1;
printf("%" STR(STATS_FIELD_WIDTH) "lld ", dataPoints[op][point_95pct]);
if (fp) {
fprintf(fp, "\"%s\": %lld,", get_ops_name(op), dataPoints[op][point_95pct]);
}
} else {
printf("%" STR(STATS_FIELD_WIDTH) "s ", "N/A");
}
@ -2264,6 +2425,9 @@ void print_report(mako_args_t* args,
printf("\n");
/* 99%ile Latency */
if (fp) {
fprintf(fp, "}, \"p99Latency\": {");
}
printf("%-" STR(STATS_TITLE_WIDTH) "s ", "99.0 pctile");
for (op = 0; op < MAX_OP; op++) {
if (args->txnspec.ops[op][OP_COUNT] > 0 || op == OP_TRANSACTION || op == OP_COMMIT) {
@ -2274,6 +2438,9 @@ void print_report(mako_args_t* args,
if (lat_total[op]) {
point_99pct = ((float)(num_points[op]) * 0.99) - 1;
printf("%" STR(STATS_FIELD_WIDTH) "lld ", dataPoints[op][point_99pct]);
if (fp) {
fprintf(fp, "\"%s\": %lld,", get_ops_name(op), dataPoints[op][point_99pct]);
}
} else {
printf("%" STR(STATS_FIELD_WIDTH) "s ", "N/A");
}
@ -2282,6 +2449,9 @@ void print_report(mako_args_t* args,
printf("\n");
/* 99.9%ile Latency */
if (fp) {
fprintf(fp, "}, \"p99.9Latency\": {");
}
printf("%-" STR(STATS_TITLE_WIDTH) "s ", "99.9 pctile");
for (op = 0; op < MAX_OP; op++) {
if (args->txnspec.ops[op][OP_COUNT] > 0 || op == OP_TRANSACTION || op == OP_COMMIT) {
@ -2292,12 +2462,18 @@ void print_report(mako_args_t* args,
if (lat_total[op]) {
point_99_9pct = ((float)(num_points[op]) * 0.999) - 1;
printf("%" STR(STATS_FIELD_WIDTH) "lld ", dataPoints[op][point_99_9pct]);
if (fp) {
fprintf(fp, "\"%s\": %lld,", get_ops_name(op), dataPoints[op][point_99_9pct]);
}
} else {
printf("%" STR(STATS_FIELD_WIDTH) "s ", "N/A");
}
}
}
printf("\n");
if (fp) {
fprintf(fp, "}}");
}
char command_remove[NAME_MAX] = { '\0' };
sprintf(command_remove, "rm -rf %s%d", TEMP_DATA_STORE, *pid_main);
@ -2328,6 +2504,44 @@ int stats_process_main(mako_args_t* args,
if (args->verbose >= VERBOSE_DEFAULT)
print_stats_header(args, false, true, false);
FILE* fp = NULL;
if (args->json_output_path[0] != '\0') {
fp = fopen(args->json_output_path, "w");
fprintf(fp, "{\"makoArgs\": {");
fprintf(fp, "\"api_version\": %d,", args->api_version);
fprintf(fp, "\"json\": %d,", args->json);
fprintf(fp, "\"num_processes\": %d,", args->num_processes);
fprintf(fp, "\"num_threads\": %d,", args->num_threads);
fprintf(fp, "\"mode\": %d,", args->mode);
fprintf(fp, "\"rows\": %d,", args->rows);
fprintf(fp, "\"seconds\": %d,", args->seconds);
fprintf(fp, "\"iteration\": %d,", args->iteration);
fprintf(fp, "\"tpsmax\": %d,", args->tpsmax);
fprintf(fp, "\"tpsmin\": %d,", args->tpsmin);
fprintf(fp, "\"tpsinterval\": %d,", args->tpsinterval);
fprintf(fp, "\"tpschange\": %d,", args->tpschange);
fprintf(fp, "\"sampling\": %d,", args->sampling);
fprintf(fp, "\"key_length\": %d,", args->key_length);
fprintf(fp, "\"value_length\": %d,", args->value_length);
fprintf(fp, "\"commit_get\": %d,", args->commit_get);
fprintf(fp, "\"verbose\": %d,", args->verbose);
fprintf(fp, "\"cluster_file\": \"%s\",", args->cluster_files);
fprintf(fp, "\"log_group\": \"%s\",", args->log_group);
fprintf(fp, "\"prefixpadding\": %d,", args->prefixpadding);
fprintf(fp, "\"trace\": %d,", args->trace);
fprintf(fp, "\"tracepath\": \"%s\",", args->tracepath);
fprintf(fp, "\"traceformat\": %d,", args->traceformat);
fprintf(fp, "\"knobs\": \"%s\",", args->knobs);
fprintf(fp, "\"flatbuffers\": %d,", args->flatbuffers);
fprintf(fp, "\"txntrace\": %d,", args->txntrace);
fprintf(fp, "\"txntagging\": %d,", args->txntagging);
fprintf(fp, "\"txntagging_prefix\": \"%s\",", args->txntagging_prefix);
fprintf(fp, "\"streaming_mode\": %d,", args->streaming_mode);
fprintf(fp, "\"disable_ryw\": %d,", args->disable_ryw);
fprintf(fp, "\"json_output_path\": \"%s\",", args->json_output_path);
fprintf(fp, "},\"samples\": [");
}
clock_gettime(CLOCK_MONOTONIC_COARSE, &timer_start);
timer_prev.tv_sec = timer_start.tv_sec;
timer_prev.tv_nsec = timer_start.tv_nsec;
@ -2369,19 +2583,28 @@ int stats_process_main(mako_args_t* args,
}
if (args->verbose >= VERBOSE_DEFAULT)
print_stats(args, stats, &timer_now, &timer_prev);
print_stats(args, stats, &timer_now, &timer_prev, fp);
timer_prev.tv_sec = timer_now.tv_sec;
timer_prev.tv_nsec = timer_now.tv_nsec;
}
}
if (fp) {
fprintf(fp, "],");
}
/* print report */
if (args->verbose >= VERBOSE_DEFAULT) {
clock_gettime(CLOCK_MONOTONIC_COARSE, &timer_now);
while (*stopcount < args->num_threads * args->num_processes) {
usleep(10000); /* 10ms */
}
print_report(args, stats, &timer_now, &timer_start, pid_main);
print_report(args, stats, &timer_now, &timer_start, pid_main, fp);
}
if (fp) {
fprintf(fp, "}");
fclose(fp);
}
return 0;

View File

@ -81,7 +81,9 @@ enum Arguments {
ARG_TXNTAGGING,
ARG_TXNTAGGINGPREFIX,
ARG_STREAMING_MODE,
ARG_DISABLE_RYW
ARG_DISABLE_RYW,
ARG_CLIENT_THREADS_PER_VERSION,
ARG_JSON_REPORT
};
enum TPSChangeTypes { TPS_SIN, TPS_SQUARE, TPS_PULSE };
@ -103,6 +105,8 @@ typedef struct {
#define LOGGROUP_MAX 256
#define KNOB_MAX 256
#define TAGPREFIXLENGTH_MAX 8
#define NUM_CLUSTERS_MAX 3
#define NUM_DATABASES_MAX 10
/* benchmark parameters */
typedef struct {
@ -125,7 +129,9 @@ typedef struct {
int commit_get;
int verbose;
mako_txnspec_t txnspec;
char cluster_file[PATH_MAX];
char cluster_files[NUM_CLUSTERS_MAX][PATH_MAX];
int num_fdb_clusters;
int num_databases;
char log_group[LOGGROUP_MAX];
int prefixpadding;
int trace;
@ -137,7 +143,9 @@ typedef struct {
int txntagging;
char txntagging_prefix[TAGPREFIXLENGTH_MAX];
FDBStreamingMode streaming_mode;
uint32_t client_threads_per_version;
int disable_ryw;
char json_output_path[PATH_MAX];
} mako_args_t;
/* shared memory */
@ -173,14 +181,15 @@ typedef struct {
typedef struct {
int worker_id;
pid_t parent_id;
FDBDatabase* database;
mako_args_t* args;
mako_shmhdr_t* shm;
FDBDatabase* databases[NUM_DATABASES_MAX];
} process_info_t;
/* args for threads */
typedef struct {
int thread_id;
int database_index; // index of the database to do work to
int elem_size[MAX_OP]; /* stores the multiple of LAT_BLOCK_SIZE to check the memory allocation of each operation */
bool is_memory_allocated[MAX_OP]; /* flag specified for each operation, whether the memory was allocated to that
specific operation */

View File

@ -35,7 +35,7 @@ else()
BUILD_BYPRODUCTS "${JEMALLOC_DIR}/include/jemalloc/jemalloc.h"
"${JEMALLOC_DIR}/lib/libjemalloc.a"
"${JEMALLOC_DIR}/lib/libjemalloc_pic.a"
CONFIGURE_COMMAND ./configure --prefix=${JEMALLOC_DIR} --enable-static --disable-cxx
CONFIGURE_COMMAND ./configure --prefix=${JEMALLOC_DIR} --enable-static --disable-cxx --enable-prof
BUILD_IN_SOURCE ON
BUILD_COMMAND make
INSTALL_DIR "${JEMALLOC_DIR}"

View File

@ -164,6 +164,10 @@ If the ``failed`` keyword is specified, the address is marked as failed and adde
For more information on excluding servers, see :ref:`removing-machines-from-a-cluster`.
Warning about potential dataloss ``failed`` option: if a server is the last one in some team(s), excluding it with ``failed`` will lose all data in the team(s), and hence ``failed`` should only be set when the server(s) have permanently failed.
In the case all servers of a team have failed permanently, excluding all the servers will clean up the corresponding keyrange, and fix the invalid metadata. The keyrange will be assigned to a new team as an empty shard.
exit
----

View File

@ -2,6 +2,13 @@
Release Notes
#############
6.3.22
======
* Added histograms to client GRV batcher. `(PR #5760) <https://github.com/apple/foundationdb/pull/5760>`_
* Added FastAlloc memory utilization trace. `(PR #5759) <https://github.com/apple/foundationdb/pull/5759>`_
* Added locality cache size to TransactionMetrics. `(PR #5771) <https://github.com/apple/foundationdb/pull/5771>`_
* Added a new feature that allows FDB to failover to remote DC when the primary is experiencing massive grey failure. This feature is turned off by default. `(PR #5774) <https://github.com/apple/foundationdb/pull/5774>`_
6.3.21
======
* Added a ThreadID field to all trace events for the purpose of multi-threaded client debugging. `(PR #5665) <https://github.com/apple/foundationdb/pull/5665>`_

View File

@ -95,3 +95,13 @@ Tracing can be enabled or disabled for individual transactions. The special key
space exposes an API to set a custom trace ID for a transaction, or to disable
tracing for the transaction. See the special key space :ref:`tracing module
documentation <special-key-space-tracing-module>` to learn more.
^^^^^^^^^^^^^^
Trace sampling
^^^^^^^^^^^^^^
By default, all traces are recorded. If tracing is producing too much data,
adjust the trace sample rate with the ``TRACING_SAMPLE_RATE`` knob. Set the
knob to 0.0 to record no traces, to 1.0 to record all traces, or somewhere in
the middle. Traces are sampled as a unit. All individual spans in the trace
will be included in the sample.

View File

@ -37,6 +37,7 @@
#include "fdbclient/BackupAgent.actor.h"
#include "fdbclient/Status.h"
#include "fdbclient/BackupContainer.h"
#include "fdbclient/ClusterConnectionFile.h"
#include "fdbclient/KeyBackedTypes.h"
#include "fdbclient/IKnobCollection.h"
#include "fdbclient/RunTransaction.actor.h"
@ -3095,7 +3096,7 @@ Optional<Database> connectToCluster(std::string const& clusterFile,
} catch (Error& e) {
if (!quiet) {
fprintf(stderr, "ERROR: %s\n", e.what());
fprintf(stderr, "ERROR: Unable to connect to cluster from `%s'\n", ccf->getFilename().c_str());
fprintf(stderr, "ERROR: Unable to connect to cluster from `%s'\n", ccf->getLocation().c_str());
}
return db;
}

View File

@ -393,5 +393,11 @@ CommandFactory excludeFactory(
"command returns \nimmediately without checking if the exclusions have completed successfully.\n"
"If 'FORCE' is set, the command does not perform safety checks before excluding.\n"
"If 'failed' is set, the transaction log queue is dropped pre-emptively before waiting\n"
"for data movement to finish and the server cannot be included again."));
"for data movement to finish and the server cannot be included again."
"\n\nWARNING of potential dataloss\n:"
"If a to-be-excluded server is the last server of some team(s), and 'failed' is set, the data in the team(s) "
"will be lost. 'failed' should be set only if the server(s) have permanently failed."
"In the case all servers of a team have failed permanently and dataloss has been a fact, excluding all the "
"servers will clean up the corresponding keyrange, and fix the invalid metadata. The keyrange will be "
"assigned to a new team as an empty shard."));
} // namespace fdb_cli

View File

@ -19,6 +19,7 @@
*/
#include "boost/lexical_cast.hpp"
#include "fdbclient/ClusterConnectionFile.h"
#include "fdbclient/NativeAPI.actor.h"
#include "fdbclient/FDBTypes.h"
#include "fdbclient/IClientApi.h"
@ -1034,8 +1035,8 @@ ACTOR Future<bool> exclude(Database db,
locality.c_str());
}
ClusterConnectionString ccs = wait(ccf->getStoredConnectionString());
bool foundCoordinator = false;
auto ccs = ClusterConnectionFile(ccf->getFilename()).getConnectionString();
for (const auto& c : ccs.coordinators()) {
if (std::count(exclusionVector.begin(), exclusionVector.end(), AddressExclusion(c.ip, c.port)) ||
std::count(exclusionVector.begin(), exclusionVector.end(), AddressExclusion(c.ip))) {
@ -1584,12 +1585,12 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise) {
try {
localDb = Database::createDatabase(ccf, opt.api_version, IsInternal::False);
if (!opt.exec.present()) {
printf("Using cluster file `%s'.\n", ccf->getFilename().c_str());
printf("Using cluster file `%s'.\n", ccf->getLocation().c_str());
}
db = API->createDatabase(opt.clusterFile.c_str());
} catch (Error& e) {
fprintf(stderr, "ERROR: %s (%d)\n", e.what(), e.code());
printf("Unable to connect to cluster from `%s'\n", ccf->getFilename().c_str());
printf("Unable to connect to cluster from `%s'\n", ccf->getLocation().c_str());
return 1;
}
@ -1600,7 +1601,7 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise) {
.detail("Version", FDB_VT_VERSION)
.detail("PackageName", FDB_VT_PACKAGE_NAME)
.detailf("ActualTime", "%lld", DEBUG_DETERMINISM ? 0 : time(nullptr))
.detail("ClusterFile", ccf->getFilename().c_str())
.detail("ClusterFile", ccf->toString())
.detail("ConnectionString", ccf->getConnectionString().toString())
.setMaxFieldLength(10000)
.detail("CommandLine", opt.commandLine)

View File

@ -242,6 +242,9 @@ void sample(LineageReference* lineagePtr) {
if (!lineagePtr->isValid()) {
return;
}
if (!lineagePtr->isAllocated()) {
lineagePtr->allocate();
}
(*lineagePtr)->modify(&NameLineage::actorName) = lineagePtr->actorName();
boost::asio::post(ActorLineageProfiler::instance().context(),
[lineage = LineageReference::addRef(lineagePtr->getPtr())]() {

View File

@ -26,6 +26,12 @@ set(FDBCLIENT_SRCS
ClientKnobs.h
ClientLogEvents.h
ClientWorkerInterface.h
ClusterConnectionFile.actor.cpp
ClusterConnectionFile.h
ClusterConnectionKey.actor.cpp
ClusterConnectionKey.actor.h
ClusterConnectionMemoryRecord.actor.cpp
ClusterConnectionMemoryRecord.h
ClusterInterface.h
CommitProxyInterface.h
CommitTransaction.h

View File

@ -0,0 +1,178 @@
/*
* ClusterConnectionFile.actor.cpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2021 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "fdbclient/ClusterConnectionFile.h"
#include "fdbclient/MonitorLeader.h"
#include "flow/actorcompiler.h" // has to be last include
// Loads and parses the file at 'filename', throwing errors if the file cannot be read or the format is invalid.
ClusterConnectionFile::ClusterConnectionFile(std::string const& filename)
: IClusterConnectionRecord(ConnectionStringNeedsPersisted::False) {
if (!fileExists(filename)) {
throw no_cluster_file_found();
}
cs = ClusterConnectionString(readFileBytes(filename, MAX_CLUSTER_FILE_BYTES));
this->filename = filename;
}
// Creates a cluster file with a given connection string and saves it to the specified file.
ClusterConnectionFile::ClusterConnectionFile(std::string const& filename, ClusterConnectionString const& contents)
: IClusterConnectionRecord(ConnectionStringNeedsPersisted::True) {
this->filename = filename;
cs = contents;
}
// Returns the connection string currently held in this object. This may not match the string on disk if it hasn't
// been persisted or if the file has been modified externally.
ClusterConnectionString const& ClusterConnectionFile::getConnectionString() const {
return cs;
}
// Sets the connections string held by this object and persists it.
Future<Void> ClusterConnectionFile::setConnectionString(ClusterConnectionString const& conn) {
ASSERT(filename.size());
cs = conn;
return success(persist());
}
// Get the connection string stored in the file.
Future<ClusterConnectionString> ClusterConnectionFile::getStoredConnectionString() {
try {
return ClusterConnectionFile(filename).cs;
} catch (Error& e) {
return e;
}
}
// Checks whether the connection string in the file matches the connection string stored in memory. The cluster
// string stored in the file is returned via the reference parameter connectionString.
Future<bool> ClusterConnectionFile::upToDate(ClusterConnectionString& fileConnectionString) {
try {
// the cluster file hasn't been created yet so there's nothing to check
if (needsToBePersisted())
return true;
ClusterConnectionFile temp(filename);
fileConnectionString = temp.getConnectionString();
return fileConnectionString.toString() == cs.toString();
} catch (Error& e) {
TraceEvent(SevWarnAlways, "ClusterFileError").error(e).detail("Filename", filename);
return false; // Swallow the error and report that the file is out of date
}
}
// Returns the specified path of the cluster file.
std::string ClusterConnectionFile::getLocation() const {
return filename;
}
// Creates a copy of this object with a modified connection string but that isn't persisted.
Reference<IClusterConnectionRecord> ClusterConnectionFile::makeIntermediateRecord(
ClusterConnectionString const& connectionString) const {
return makeReference<ClusterConnectionFile>(filename, connectionString);
}
// Returns a string representation of this cluster connection record. This will include the type of record and the
// filename of the cluster file.
std::string ClusterConnectionFile::toString() const {
// This is a fairly naive attempt to generate a URI-like string. It will not account for characters like spaces, it
// may use backslashes in windows paths, etc.
// SOMEDAY: we should encode this string as a proper URI.
return "file://" + filename;
}
// returns <resolved name, was default file>
std::pair<std::string, bool> ClusterConnectionFile::lookupClusterFileName(std::string const& filename) {
if (filename.length())
return std::make_pair(filename, false);
std::string f;
bool isDefaultFile = true;
if (platform::getEnvironmentVar(CLUSTER_FILE_ENV_VAR_NAME, f)) {
// If this is set but points to a file that does not
// exist, we will not fallback to any other methods
isDefaultFile = false;
} else if (fileExists("fdb.cluster"))
f = "fdb.cluster";
else
f = platform::getDefaultClusterFilePath();
return std::make_pair(f, isDefaultFile);
}
// get a human readable error message describing the error returned from the constructor
std::string ClusterConnectionFile::getErrorString(std::pair<std::string, bool> const& resolvedClusterFile,
Error const& e) {
bool isDefault = resolvedClusterFile.second;
if (e.code() == error_code_connection_string_invalid) {
return format("Invalid cluster file `%s': %d %s", resolvedClusterFile.first.c_str(), e.code(), e.what());
} else if (e.code() == error_code_no_cluster_file_found) {
if (isDefault)
return format("Unable to read cluster file `./fdb.cluster' or `%s' and %s unset: %d %s",
platform::getDefaultClusterFilePath().c_str(),
CLUSTER_FILE_ENV_VAR_NAME,
e.code(),
e.what());
else
return format(
"Unable to read cluster file `%s': %d %s", resolvedClusterFile.first.c_str(), e.code(), e.what());
} else {
return format(
"Unexpected error loading cluster file `%s': %d %s", resolvedClusterFile.first.c_str(), e.code(), e.what());
}
}
// Writes the connection string to the cluster file
Future<bool> ClusterConnectionFile::persist() {
setPersisted();
if (filename.size()) {
try {
atomicReplace(filename,
"# DO NOT EDIT!\n# This file is auto-generated, it is not to be edited by hand\n" +
cs.toString().append("\n"));
Future<bool> isUpToDate = IClusterConnectionRecord::upToDate();
// The implementation of upToDate in this class is synchronous
ASSERT(isUpToDate.isReady());
if (!isUpToDate.get()) {
// This should only happen in rare scenarios where multiple processes are updating the same file to
// different values simultaneously In that case, we don't have any guarantees about which file will
// ultimately be written
TraceEvent(SevWarnAlways, "ClusterFileChangedAfterReplace")
.detail("Filename", filename)
.detail("ConnectionString", cs.toString());
return false;
}
return true;
} catch (Error& e) {
TraceEvent(SevWarnAlways, "UnableToChangeConnectionFile")
.error(e)
.detail("Filename", filename)
.detail("ConnectionString", cs.toString());
}
}
return false;
}

View File

@ -0,0 +1,81 @@
/*
* ClusterConnectionFile.h
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2021 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#ifndef FDBCLIENT_CLUSTERCONNECTIONFILE_H
#define FDBCLIENT_CLUSTERCONNECTIONFILE_H
#include "fdbclient/CoordinationInterface.h"
#include "flow/actorcompiler.h" // has to be last include
// An implementation of IClusterConnectionRecord backed by a file.
class ClusterConnectionFile : public IClusterConnectionRecord, ReferenceCounted<ClusterConnectionFile>, NonCopyable {
public:
// Loads and parses the file at 'filename', throwing errors if the file cannot be read or the format is invalid.
explicit ClusterConnectionFile(std::string const& filename);
// Creates a cluster file with a given connection string and saves it to the specified file.
explicit ClusterConnectionFile(std::string const& filename, ClusterConnectionString const& contents);
// Returns the connection string currently held in this object. This may not match the string on disk if it hasn't
// been persisted or if the file has been modified externally.
ClusterConnectionString const& getConnectionString() const override;
// Sets the connections string held by this object and persists it.
Future<Void> setConnectionString(ClusterConnectionString const&) override;
// Get the connection string stored in the file.
Future<ClusterConnectionString> getStoredConnectionString() override;
// Checks whether the connection string in the file matches the connection string stored in memory. The cluster
// string stored in the file is returned via the reference parameter connectionString.
Future<bool> upToDate(ClusterConnectionString& fileConnectionString) override;
// Returns the specified path of the cluster file.
std::string getLocation() const override;
// Creates a copy of this object with a modified connection string but that isn't persisted.
Reference<IClusterConnectionRecord> makeIntermediateRecord(
ClusterConnectionString const& connectionString) const override;
// Returns a string representation of this cluster connection record. This will include the type of record and the
// filename of the cluster file.
std::string toString() const override;
void addref() override { ReferenceCounted<ClusterConnectionFile>::addref(); }
void delref() override { ReferenceCounted<ClusterConnectionFile>::delref(); }
// returns <resolved name, was default file>
static std::pair<std::string, bool> lookupClusterFileName(std::string const& filename);
// get a human readable error message describing the error returned from the constructor
static std::string getErrorString(std::pair<std::string, bool> const& resolvedFile, Error const& e);
protected:
// Writes the connection string to the cluster file
Future<bool> persist() override;
private:
ClusterConnectionString cs;
std::string filename;
};
#include "flow/unactorcompiler.h"
#endif

View File

@ -0,0 +1,172 @@
/*
* ClusterConnectionKey.actor.cpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2021 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "fdbclient/ClusterConnectionKey.actor.h"
#include "flow/actorcompiler.h" // has to be last include
// Creates a cluster connection record with a given connection string and saves it to the specified key. Needs to be
// persisted should be set to true unless this ClusterConnectionKey is being created with the value read from the
// key.
ClusterConnectionKey::ClusterConnectionKey(Database db,
Key connectionStringKey,
ClusterConnectionString const& contents,
ConnectionStringNeedsPersisted needsToBePersisted)
: IClusterConnectionRecord(needsToBePersisted), db(db), cs(contents), connectionStringKey(connectionStringKey) {
if (!needsToBePersisted) {
lastPersistedConnectionString = ValueRef(contents.toString());
}
}
// Loads and parses the connection string at the specified key, throwing errors if the file cannot be read or the
// format is invalid.
ACTOR Future<Reference<ClusterConnectionKey>> ClusterConnectionKey::loadClusterConnectionKey(Database db,
Key connectionStringKey) {
state Transaction tr(db);
loop {
try {
Optional<Value> v = wait(tr.get(connectionStringKey));
if (!v.present()) {
throw connection_string_invalid();
}
return makeReference<ClusterConnectionKey>(db,
connectionStringKey,
ClusterConnectionString(v.get().toString()),
ConnectionStringNeedsPersisted::False);
} catch (Error& e) {
wait(tr.onError(e));
}
}
}
// Returns the connection string currently held in this object. This may not match the string in the database if it
// hasn't been persisted or if the key has been modified externally.
ClusterConnectionString const& ClusterConnectionKey::getConnectionString() const {
return cs;
}
// Sets the connections string held by this object and persists it.
Future<Void> ClusterConnectionKey::setConnectionString(ClusterConnectionString const& connectionString) {
cs = connectionString;
return success(persist());
}
// Get the connection string stored in the database.
ACTOR Future<ClusterConnectionString> ClusterConnectionKey::getStoredConnectionStringImpl(
Reference<ClusterConnectionKey> self) {
Reference<ClusterConnectionKey> cck =
wait(ClusterConnectionKey::loadClusterConnectionKey(self->db, self->connectionStringKey));
return cck->cs;
}
Future<ClusterConnectionString> ClusterConnectionKey::getStoredConnectionString() {
return getStoredConnectionStringImpl(Reference<ClusterConnectionKey>::addRef(this));
}
ACTOR Future<bool> ClusterConnectionKey::upToDateImpl(Reference<ClusterConnectionKey> self,
ClusterConnectionString* connectionString) {
try {
// the cluster file hasn't been created yet so there's nothing to check
if (self->needsToBePersisted())
return true;
Reference<ClusterConnectionKey> temp =
wait(ClusterConnectionKey::loadClusterConnectionKey(self->db, self->connectionStringKey));
*connectionString = temp->getConnectionString();
return connectionString->toString() == self->cs.toString();
} catch (Error& e) {
TraceEvent(SevWarnAlways, "ClusterKeyError").error(e).detail("Key", self->connectionStringKey);
return false; // Swallow the error and report that the file is out of date
}
}
// Checks whether the connection string in the database matches the connection string stored in memory. The cluster
// string stored in the database is returned via the reference parameter connectionString.
Future<bool> ClusterConnectionKey::upToDate(ClusterConnectionString& connectionString) {
return upToDateImpl(Reference<ClusterConnectionKey>::addRef(this), &connectionString);
}
// Returns the key where the connection string is stored.
std::string ClusterConnectionKey::getLocation() const {
return printable(connectionStringKey);
}
// Creates a copy of this object with a modified connection string but that isn't persisted.
Reference<IClusterConnectionRecord> ClusterConnectionKey::makeIntermediateRecord(
ClusterConnectionString const& connectionString) const {
return makeReference<ClusterConnectionKey>(db, connectionStringKey, connectionString);
}
// Returns a string representation of this cluster connection record. This will include the type of record and the
// key where the record is stored.
std::string ClusterConnectionKey::toString() const {
return "fdbkey://" + printable(connectionStringKey);
}
ACTOR Future<bool> ClusterConnectionKey::persistImpl(Reference<ClusterConnectionKey> self) {
self->setPersisted();
state Value newConnectionString = ValueRef(self->cs.toString());
try {
state Transaction tr(self->db);
loop {
try {
Optional<Value> existingConnectionString = wait(tr.get(self->connectionStringKey));
// Someone has already updated the connection string to what we want
if (existingConnectionString.present() && existingConnectionString.get() == newConnectionString) {
self->lastPersistedConnectionString = newConnectionString;
return true;
}
// Someone has updated the connection string to something we didn't expect, in which case we leave it
// alone. It's possible this could result in the stored string getting stuck if the connection string
// changes twice and only the first change is recorded. If the process that wrote the first change dies
// and no other process attempts to write the intermediate state, then only a newly opened connection
// key would be able to update the state.
else if (existingConnectionString.present() &&
existingConnectionString != self->lastPersistedConnectionString) {
TraceEvent(SevWarnAlways, "UnableToChangeConnectionKeyDueToMismatch")
.detail("ConnectionKey", self->connectionStringKey)
.detail("NewConnectionString", newConnectionString)
.detail("ExpectedStoredConnectionString", self->lastPersistedConnectionString)
.detail("ActualStoredConnectionString", existingConnectionString);
return false;
}
tr.set(self->connectionStringKey, newConnectionString);
wait(tr.commit());
self->lastPersistedConnectionString = newConnectionString;
return true;
} catch (Error& e) {
wait(tr.onError(e));
}
}
} catch (Error& e) {
TraceEvent(SevWarnAlways, "UnableToChangeConnectionKey")
.error(e)
.detail("ConnectionKey", self->connectionStringKey)
.detail("ConnectionString", self->cs.toString());
}
return false;
};
// Writes the connection string to the database
Future<bool> ClusterConnectionKey::persist() {
return persistImpl(Reference<ClusterConnectionKey>::addRef(this));
}

View File

@ -0,0 +1,97 @@
/*
* ClusterConnectionKey.actor.h
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2021 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
// When actually compiled (NO_INTELLISENSE), include the generated version of this file. In intellisense use the source
// version.
#if defined(NO_INTELLISENSE) && !defined(FDBCLIENT_CLUSTERCONNECTIONKEY_ACTOR_G_H)
#define FDBCLIENT_CLUSTERCONNECTIONKEY_ACTOR_G_H
#include "fdbclient/ClusterConnectionKey.actor.g.h"
#elif !defined(FDBCLIENT_CLUSTERCONNECTIONKEY_ACTOR_H)
#define FDBCLIENT_CLUSTERCONNECTIONKEY_ACTOR_H
#include "fdbclient/CoordinationInterface.h"
#include "fdbclient/NativeAPI.actor.h"
#include "flow/actorcompiler.h" // has to be last include
// An implementation of IClusterConnectionRecord backed by a key in a FoundationDB database.
class ClusterConnectionKey : public IClusterConnectionRecord, ReferenceCounted<ClusterConnectionKey>, NonCopyable {
public:
// Creates a cluster connection record with a given connection string and saves it to the specified key. Needs to be
// persisted should be set to true unless this ClusterConnectionKey is being created with the value read from the
// key.
ClusterConnectionKey(Database db,
Key connectionStringKey,
ClusterConnectionString const& contents,
ConnectionStringNeedsPersisted needsToBePersisted = ConnectionStringNeedsPersisted::True);
// Loads and parses the connection string at the specified key, throwing errors if the file cannot be read or the
// format is invalid.
ACTOR static Future<Reference<ClusterConnectionKey>> loadClusterConnectionKey(Database db, Key connectionStringKey);
// Returns the connection string currently held in this object. This may not match the string in the database if it
// hasn't been persisted or if the key has been modified externally.
ClusterConnectionString const& getConnectionString() const override;
// Sets the connections string held by this object and persists it.
Future<Void> setConnectionString(ClusterConnectionString const&) override;
// Get the connection string stored in the database.
Future<ClusterConnectionString> getStoredConnectionString() override;
// Checks whether the connection string in the database matches the connection string stored in memory. The cluster
// string stored in the database is returned via the reference parameter connectionString.
Future<bool> upToDate(ClusterConnectionString& connectionString) override;
// Returns the key where the connection string is stored.
std::string getLocation() const override;
// Creates a copy of this object with a modified connection string but that isn't persisted.
Reference<IClusterConnectionRecord> makeIntermediateRecord(
ClusterConnectionString const& connectionString) const override;
// Returns a string representation of this cluster connection record. This will include the type of record and the
// key where the record is stored.
std::string toString() const override;
void addref() override { ReferenceCounted<ClusterConnectionKey>::addref(); }
void delref() override { ReferenceCounted<ClusterConnectionKey>::delref(); }
protected:
// Writes the connection string to the database
Future<bool> persist() override;
private:
ACTOR static Future<ClusterConnectionString> getStoredConnectionStringImpl(Reference<ClusterConnectionKey> self);
ACTOR static Future<bool> upToDateImpl(Reference<ClusterConnectionKey> self,
ClusterConnectionString* connectionString);
ACTOR static Future<bool> persistImpl(Reference<ClusterConnectionKey> self);
// The database where the connection key is stored. Note that this does not need to be the same database as the one
// that the connection string would connect to.
Database db;
ClusterConnectionString cs;
Key connectionStringKey;
Optional<Value> lastPersistedConnectionString;
};
#include "flow/unactorcompiler.h"
#endif

View File

@ -0,0 +1,68 @@
/*
* ClusterConnectionMemoryRecord.actor.cpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2021 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "fdbclient/ClusterConnectionMemoryRecord.h"
#include "fdbclient/MonitorLeader.h"
#include "flow/actorcompiler.h" // has to be last include
// Returns the connection string currently held in this object.
ClusterConnectionString const& ClusterConnectionMemoryRecord::getConnectionString() const {
return cs;
}
// Sets the connections string held by this object.
Future<Void> ClusterConnectionMemoryRecord::setConnectionString(ClusterConnectionString const& conn) {
cs = conn;
return Void();
}
// Returns the connection string currently held in this object (there is no persistent storage).
Future<ClusterConnectionString> ClusterConnectionMemoryRecord::getStoredConnectionString() {
return cs;
}
// Because the memory record is not persisted, it is always up to date and this returns true. The connection string
// is returned via the reference parameter connectionString.
Future<bool> ClusterConnectionMemoryRecord::upToDate(ClusterConnectionString& fileConnectionString) {
fileConnectionString = cs;
return true;
}
// Returns the ID of the memory record.
std::string ClusterConnectionMemoryRecord::getLocation() const {
return id.toString();
}
// Returns a copy of this object with a modified connection string.
Reference<IClusterConnectionRecord> ClusterConnectionMemoryRecord::makeIntermediateRecord(
ClusterConnectionString const& connectionString) const {
return makeReference<ClusterConnectionMemoryRecord>(connectionString);
}
// Returns a string representation of this cluster connection record. This will include the type and id of the
// record.
std::string ClusterConnectionMemoryRecord::toString() const {
return "memory://" + id.toString();
}
// This is a no-op for memory records. Returns true to indicate success.
Future<bool> ClusterConnectionMemoryRecord::persist() {
return true;
}

View File

@ -0,0 +1,74 @@
/*
* ClusterConnectionMemoryRecord.h
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2021 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#ifndef FDBCLIENT_CLUSTERCONNECTIONMEMORYRECORD_H
#define FDBCLIENT_CLUSTERCONNECTIONMEMORYRECORD_H
#include "fdbclient/CoordinationInterface.h"
// An implementation of IClusterConnectionRecord that is stored in memory only and not persisted.
class ClusterConnectionMemoryRecord : public IClusterConnectionRecord,
ReferenceCounted<ClusterConnectionMemoryRecord>,
NonCopyable {
public:
// Creates a cluster file with a given connection string.
explicit ClusterConnectionMemoryRecord(ClusterConnectionString const& cs)
: IClusterConnectionRecord(ConnectionStringNeedsPersisted::False), id(deterministicRandom()->randomUniqueID()),
cs(cs) {}
// Returns the connection string currently held in this object.
ClusterConnectionString const& getConnectionString() const override;
// Sets the connections string held by this object.
Future<Void> setConnectionString(ClusterConnectionString const&) override;
// Returns the connection string currently held in this object (there is no persistent storage).
Future<ClusterConnectionString> getStoredConnectionString() override;
// Because the memory record is not persisted, it is always up to date and this returns true. The connection string
// is returned via the reference parameter connectionString.
Future<bool> upToDate(ClusterConnectionString& fileConnectionString) override;
// Returns a location string for the memory record that includes its ID.
std::string getLocation() const override;
// Returns a copy of this object with a modified connection string.
Reference<IClusterConnectionRecord> makeIntermediateRecord(
ClusterConnectionString const& connectionString) const override;
// Returns a string representation of this cluster connection record. This will include the type and id of the
// record.
std::string toString() const override;
void addref() override { ReferenceCounted<ClusterConnectionMemoryRecord>::addref(); }
void delref() override { ReferenceCounted<ClusterConnectionMemoryRecord>::delref(); }
protected:
// This is a no-op for memory records. Returns true to indicate success.
Future<bool> persist() override;
private:
// A unique ID for the record
UID id;
ClusterConnectionString cs;
};
#endif

View File

@ -36,6 +36,8 @@ struct ClusterInterface {
RequestStream<ReplyPromise<Void>> ping;
RequestStream<struct GetClientWorkersRequest> getClientWorkers;
RequestStream<struct ForceRecoveryRequest> forceRecovery;
RequestStream<struct MoveShardRequest> moveShard;
RequestStream<struct RepairSystemDataRequest> repairSystemData;
bool operator==(ClusterInterface const& r) const { return id() == r.id(); }
bool operator!=(ClusterInterface const& r) const { return id() != r.id(); }
@ -45,7 +47,8 @@ struct ClusterInterface {
bool hasMessage() const {
return openDatabase.getFuture().isReady() || failureMonitoring.getFuture().isReady() ||
databaseStatus.getFuture().isReady() || ping.getFuture().isReady() ||
getClientWorkers.getFuture().isReady() || forceRecovery.getFuture().isReady();
getClientWorkers.getFuture().isReady() || forceRecovery.getFuture().isReady() ||
moveShard.getFuture().isReady() || repairSystemData.getFuture().isReady();
}
void initEndpoints() {
@ -55,11 +58,21 @@ struct ClusterInterface {
ping.getEndpoint(TaskPriority::ClusterController);
getClientWorkers.getEndpoint(TaskPriority::ClusterController);
forceRecovery.getEndpoint(TaskPriority::ClusterController);
moveShard.getEndpoint(TaskPriority::ClusterController);
repairSystemData.getEndpoint(TaskPriority::ClusterController);
}
template <class Ar>
void serialize(Ar& ar) {
serializer(ar, openDatabase, failureMonitoring, databaseStatus, ping, getClientWorkers, forceRecovery);
serializer(ar,
openDatabase,
failureMonitoring,
databaseStatus,
ping,
getClientWorkers,
forceRecovery,
moveShard,
repairSystemData);
}
};
@ -291,4 +304,37 @@ struct ForceRecoveryRequest {
}
};
// Request to move a keyrange (shard) to a new team represented as addresses.
struct MoveShardRequest {
constexpr static FileIdentifier file_identifier = 2799592;
KeyRange shard;
std::vector<NetworkAddress> addresses;
ReplyPromise<Void> reply;
MoveShardRequest() {}
MoveShardRequest(KeyRange shard, std::vector<NetworkAddress> addresses)
: shard{ std::move(shard) }, addresses{ std::move(addresses) } {}
template <class Ar>
void serialize(Ar& ar) {
serializer(ar, shard, addresses, reply);
}
};
// Request to trigger a master recovery, and during the following recovery, the system metadata will be
// reconstructed from TLogs, and written to a new SS team.
// This is used when metadata on SSes are lost or corrupted.
struct RepairSystemDataRequest {
constexpr static FileIdentifier file_identifier = 2799593;
ReplyPromise<Void> reply;
RepairSystemDataRequest() {}
template <class Ar>
void serialize(Ar& ar) {
serializer(ar, reply);
}
};
#endif

View File

@ -45,11 +45,23 @@ struct ClientLeaderRegInterface {
}
};
// A string containing the information necessary to connect to a cluster.
//
// The format of the connection string is: description:id@[addrs]+
// The description and id together are called the "key"
//
// The following is enforced about the format of the file:
// - The key must contain one (and only one) ':' character
// - The description contains only allowed characters (a-z, A-Z, 0-9, _)
// - The ID contains only allowed characters (a-z, A-Z, 0-9)
// - At least one address is specified
// - There is no address present more than once
class ClusterConnectionString {
public:
ClusterConnectionString() {}
ClusterConnectionString(std::string const& connectionString);
ClusterConnectionString(std::vector<NetworkAddress>, Key);
std::vector<NetworkAddress> const& coordinators() const { return coord; }
Key clusterKey() const { return key; }
Key clusterKeyName() const {
@ -65,45 +77,70 @@ private:
Key key, keyDesc;
};
class ClusterConnectionFile : NonCopyable, public ReferenceCounted<ClusterConnectionFile> {
FDB_DECLARE_BOOLEAN_PARAM(ConnectionStringNeedsPersisted);
// A record that stores the connection string used to connect to a cluster. This record can be updated when a cluster
// notifies a connected party that the connection string has changed.
//
// The typically used cluster connection record is a cluster file (implemented in ClusterConnectionFile). This interface
// provides an abstraction over the cluster file so that we can persist the connection string in other locations or have
// one that is only stored in memory.
class IClusterConnectionRecord {
public:
ClusterConnectionFile() {}
// Loads and parses the file at 'path', throwing errors if the file cannot be read or the format is invalid.
//
// The format of the file is: description:id@[addrs]+
// The description and id together are called the "key"
//
// The following is enforced about the format of the file:
// - The key must contain one (and only one) ':' character
// - The description contains only allowed characters (a-z, A-Z, 0-9, _)
// - The ID contains only allowed characters (a-z, A-Z, 0-9)
// - At least one address is specified
// - There is no address present more than once
explicit ClusterConnectionFile(std::string const& path);
explicit ClusterConnectionFile(ClusterConnectionString const& cs) : cs(cs), setConn(false) {}
explicit ClusterConnectionFile(std::string const& filename, ClusterConnectionString const& contents);
IClusterConnectionRecord(ConnectionStringNeedsPersisted connectionStringNeedsPersisted)
: connectionStringNeedsPersisted(connectionStringNeedsPersisted) {}
virtual ~IClusterConnectionRecord() {}
// returns <resolved name, was default file>
static std::pair<std::string, bool> lookupClusterFileName(std::string const& filename);
// get a human readable error message describing the error returned from the constructor
static std::string getErrorString(std::pair<std::string, bool> const& resolvedFile, Error const& e);
// Returns the connection string currently held in this object. This may not match the stored record if it hasn't
// been persisted or if the persistent storage for the record has been modified externally.
virtual ClusterConnectionString const& getConnectionString() const = 0;
ClusterConnectionString const& getConnectionString() const;
bool writeFile();
void setConnectionString(ClusterConnectionString const&);
std::string const& getFilename() const {
ASSERT(filename.size());
return filename;
}
bool canGetFilename() const { return filename.size() != 0; }
bool fileContentsUpToDate() const;
bool fileContentsUpToDate(ClusterConnectionString& fileConnectionString) const;
// Sets the connections string held by this object and persists it.
virtual Future<Void> setConnectionString(ClusterConnectionString const&) = 0;
// If this record is backed by persistent storage, get the connection string from that storage. Otherwise, return
// the connection string stored in memory.
virtual Future<ClusterConnectionString> getStoredConnectionString() = 0;
// Checks whether the connection string in persisten storage matches the connection string stored in memory.
Future<bool> upToDate();
// Checks whether the connection string in persisten storage matches the connection string stored in memory. The
// cluster string stored in persistent storage is returned via the reference parameter connectionString.
virtual Future<bool> upToDate(ClusterConnectionString& connectionString) = 0;
// Returns a string representing the location of the cluster record. For example, this could be the filename or key
// that stores the connection string.
virtual std::string getLocation() const = 0;
// Creates a copy of this object with a modified connection string but that isn't persisted.
virtual Reference<IClusterConnectionRecord> makeIntermediateRecord(
ClusterConnectionString const& connectionString) const = 0;
// Returns a string representation of this cluster connection record. This will include the type and location of the
// record.
virtual std::string toString() const = 0;
// Signals to the connection record that it was successfully used to connect to a cluster.
void notifyConnected();
virtual void addref() = 0;
virtual void delref() = 0;
protected:
// Writes the connection string to the backing persistent storage, if applicable.
virtual Future<bool> persist() = 0;
// Returns whether the connection record contains a connection string that needs to be persisted upon connection.
bool needsToBePersisted() const;
// Clears the flag needs persisted flag.
void setPersisted();
private:
ClusterConnectionString cs;
std::string filename;
bool setConn;
// A flag that indicates whether this connection record needs to be persisted when it succesfully establishes a
// connection.
bool connectionStringNeedsPersisted;
};
struct LeaderInfo {
@ -199,9 +236,9 @@ class ClientCoordinators {
public:
std::vector<ClientLeaderRegInterface> clientLeaderServers;
Key clusterKey;
Reference<ClusterConnectionFile> ccf;
Reference<IClusterConnectionRecord> ccr;
explicit ClientCoordinators(Reference<ClusterConnectionFile> ccf);
explicit ClientCoordinators(Reference<IClusterConnectionRecord> ccr);
explicit ClientCoordinators(Key clusterKey, std::vector<NetworkAddress> coordinators);
ClientCoordinators() {}
};

View File

@ -167,7 +167,7 @@ public:
// Constructs a new copy of this DatabaseContext from the parameters of this DatabaseContext
Database clone() const {
return Database(new DatabaseContext(connectionFile,
return Database(new DatabaseContext(connectionRecord,
clientInfo,
coordinator,
clientInfoMonitor,
@ -231,16 +231,16 @@ public:
Future<Void> onConnected(); // Returns after a majority of coordination servers are available and have reported a
// leader. The cluster file therefore is valid, but the database might be unavailable.
Reference<ClusterConnectionFile> getConnectionFile();
Reference<IClusterConnectionRecord> getConnectionRecord();
// Switch the database to use the new connection file, and recreate all pending watches for committed transactions.
//
// Meant to be used as part of a 'hot standby' solution to switch to the standby. A correct switch will involve
// advancing the version on the new cluster sufficiently far that any transaction begun with a read version from the
// old cluster will fail to commit. Assuming the above version-advancing is done properly, a call to
// switchConnectionFile guarantees that any read with a version from the old cluster will not be attempted on the
// switchConnectionRecord guarantees that any read with a version from the old cluster will not be attempted on the
// new cluster.
Future<Void> switchConnectionFile(Reference<ClusterConnectionFile> standby);
Future<Void> switchConnectionRecord(Reference<IClusterConnectionRecord> standby);
Future<Void> connectionFileChanged();
IsSwitchable switchable{ false };
@ -253,7 +253,7 @@ public:
Future<Void> createSnapshot(StringRef uid, StringRef snapshot_command);
// private:
explicit DatabaseContext(Reference<AsyncVar<Reference<ClusterConnectionFile>>> connectionFile,
explicit DatabaseContext(Reference<AsyncVar<Reference<IClusterConnectionRecord>>> connectionRecord,
Reference<AsyncVar<ClientDBInfo>> clientDBInfo,
Reference<AsyncVar<Optional<ClientLeaderRegInterface>> const> coordinator,
Future<Void> clientInfoMonitor,
@ -270,7 +270,7 @@ public:
void expireThrottles();
// Key DB-specific information
Reference<AsyncVar<Reference<ClusterConnectionFile>>> connectionFile;
Reference<AsyncVar<Reference<IClusterConnectionRecord>>> connectionRecord;
AsyncTrigger proxiesChangeTrigger;
Future<Void> monitorProxiesInfoChange;
Future<Void> monitorTssInfoChange;
@ -387,6 +387,7 @@ public:
int snapshotRywEnabled;
int transactionTracingEnabled;
double verifyCausalReadsProp = 0.0;
Future<Void> logger;
Future<Void> throttleExpirer;

View File

@ -73,13 +73,14 @@ class SampleSender : public std::enable_shared_from_this<SampleSender<Protocol,
}
void send(boost::asio::ip::tcp::socket& socket, std::shared_ptr<Buf> const& buf) {
boost::asio::async_write(socket,
boost::asio::const_buffer(buf->data, buf->size),
[buf, this](auto const& ec, size_t) { this->sendCompletionHandler(ec); });
boost::system::error_code ec;
socket.send(boost::asio::const_buffer(buf->data, buf->size), 0, ec);
this->sendCompletionHandler(ec);
}
void send(boost::asio::ip::udp::socket& socket, std::shared_ptr<Buf> const& buf) {
socket.async_send(boost::asio::const_buffer(buf->data, buf->size),
[buf, this](auto const& ec, size_t) { this->sendCompletionHandler(ec); });
boost::system::error_code ec;
socket.send(boost::asio::const_buffer(buf->data, buf->size), 0, ec);
this->sendCompletionHandler(ec);
}
void sendNext() {

View File

@ -22,6 +22,9 @@
#ifndef FDBCLIENT_GRVPROXYINTERFACE_H
#define FDBCLIENT_GRVPROXYINTERFACE_H
#pragma once
#include "flow/FileIdentifier.h"
#include "fdbrpc/fdbrpc.h"
#include "fdbclient/FDBTypes.h"
// GrvProxy is proxy primarily specializing on serving GetReadVersion. It also serves health metrics since it
// communicates with RateKeeper to gather health information of the cluster.

View File

@ -24,6 +24,7 @@
#include "fdbclient/Knobs.h"
#include "flow/Arena.h"
#include "fdbclient/ClusterConnectionMemoryRecord.h"
#include "fdbclient/FDBOptions.g.h"
#include "fdbclient/FDBTypes.h"
#include "fdbclient/ReadYourWrites.h"
@ -778,15 +779,18 @@ ACTOR Future<Optional<CoordinatorsResult>> changeQuorumChecker(Transaction* tr,
return CoordinatorsResult::BAD_DATABASE_STATE; // Someone deleted this key entirely?
state ClusterConnectionString old(currentKey.get().toString());
if (tr->getDatabase()->getConnectionFile() &&
if (tr->getDatabase()->getConnectionRecord() &&
old.clusterKeyName().toString() !=
tr->getDatabase()->getConnectionFile()->getConnectionString().clusterKeyName())
tr->getDatabase()->getConnectionRecord()->getConnectionString().clusterKeyName())
return CoordinatorsResult::BAD_DATABASE_STATE; // Someone changed the "name" of the database??
state CoordinatorsResult result = CoordinatorsResult::SUCCESS;
if (!desiredCoordinators->size()) {
std::vector<NetworkAddress> _desiredCoordinators = wait(change->getDesiredCoordinators(
tr, old.coordinators(), Reference<ClusterConnectionFile>(new ClusterConnectionFile(old)), result));
tr,
old.coordinators(),
Reference<ClusterConnectionMemoryRecord>(new ClusterConnectionMemoryRecord(old)),
result));
*desiredCoordinators = _desiredCoordinators;
}
@ -821,7 +825,7 @@ ACTOR Future<Optional<CoordinatorsResult>> changeQuorumChecker(Transaction* tr,
}
std::vector<Future<Optional<LeaderInfo>>> leaderServers;
ClientCoordinators coord(Reference<ClusterConnectionFile>(new ClusterConnectionFile(conn)));
ClientCoordinators coord(Reference<ClusterConnectionMemoryRecord>(new ClusterConnectionMemoryRecord(conn)));
leaderServers.reserve(coord.clientLeaderServers.size());
for (int i = 0; i < coord.clientLeaderServers.size(); i++)
@ -854,14 +858,17 @@ ACTOR Future<CoordinatorsResult> changeQuorum(Database cx, Reference<IQuorumChan
return CoordinatorsResult::BAD_DATABASE_STATE; // Someone deleted this key entirely?
state ClusterConnectionString old(currentKey.get().toString());
if (cx->getConnectionFile() &&
old.clusterKeyName().toString() != cx->getConnectionFile()->getConnectionString().clusterKeyName())
if (cx->getConnectionRecord() &&
old.clusterKeyName().toString() != cx->getConnectionRecord()->getConnectionString().clusterKeyName())
return CoordinatorsResult::BAD_DATABASE_STATE; // Someone changed the "name" of the database??
state CoordinatorsResult result = CoordinatorsResult::SUCCESS;
if (!desiredCoordinators.size()) {
std::vector<NetworkAddress> _desiredCoordinators = wait(change->getDesiredCoordinators(
&tr, old.coordinators(), Reference<ClusterConnectionFile>(new ClusterConnectionFile(old)), result));
&tr,
old.coordinators(),
Reference<ClusterConnectionMemoryRecord>(new ClusterConnectionMemoryRecord(old)),
result));
desiredCoordinators = _desiredCoordinators;
}
@ -907,7 +914,8 @@ ACTOR Future<CoordinatorsResult> changeQuorum(Database cx, Reference<IQuorumChan
TEST(old.clusterKeyName() == conn.clusterKeyName()); // Quorum change with unchanged name
state std::vector<Future<Optional<LeaderInfo>>> leaderServers;
state ClientCoordinators coord(Reference<ClusterConnectionFile>(new ClusterConnectionFile(conn)));
state ClientCoordinators coord(
Reference<ClusterConnectionMemoryRecord>(new ClusterConnectionMemoryRecord(conn)));
// check if allowed to modify the cluster descriptor
if (!change->getDesiredClusterKeyName().empty()) {
CheckDescriptorMutableReply mutabilityReply =
@ -942,7 +950,7 @@ struct SpecifiedQuorumChange final : IQuorumChange {
explicit SpecifiedQuorumChange(std::vector<NetworkAddress> const& desired) : desired(desired) {}
Future<std::vector<NetworkAddress>> getDesiredCoordinators(Transaction* tr,
std::vector<NetworkAddress> oldCoordinators,
Reference<ClusterConnectionFile>,
Reference<IClusterConnectionRecord>,
CoordinatorsResult&) override {
return desired;
}
@ -954,7 +962,7 @@ Reference<IQuorumChange> specifiedQuorumChange(std::vector<NetworkAddress> const
struct NoQuorumChange final : IQuorumChange {
Future<std::vector<NetworkAddress>> getDesiredCoordinators(Transaction* tr,
std::vector<NetworkAddress> oldCoordinators,
Reference<ClusterConnectionFile>,
Reference<IClusterConnectionRecord>,
CoordinatorsResult&) override {
return oldCoordinators;
}
@ -970,9 +978,9 @@ struct NameQuorumChange final : IQuorumChange {
: newName(newName), otherChange(otherChange) {}
Future<std::vector<NetworkAddress>> getDesiredCoordinators(Transaction* tr,
std::vector<NetworkAddress> oldCoordinators,
Reference<ClusterConnectionFile> cf,
Reference<IClusterConnectionRecord> ccr,
CoordinatorsResult& t) override {
return otherChange->getDesiredCoordinators(tr, oldCoordinators, cf, t);
return otherChange->getDesiredCoordinators(tr, oldCoordinators, ccr, t);
}
std::string getDesiredClusterKeyName() const override { return newName; }
};
@ -986,9 +994,9 @@ struct AutoQuorumChange final : IQuorumChange {
Future<std::vector<NetworkAddress>> getDesiredCoordinators(Transaction* tr,
std::vector<NetworkAddress> oldCoordinators,
Reference<ClusterConnectionFile> ccf,
Reference<IClusterConnectionRecord> ccr,
CoordinatorsResult& err) override {
return getDesired(Reference<AutoQuorumChange>::addRef(this), tr, oldCoordinators, ccf, &err);
return getDesired(Reference<AutoQuorumChange>::addRef(this), tr, oldCoordinators, ccr, &err);
}
ACTOR static Future<int> getRedundancy(AutoQuorumChange* self, Transaction* tr) {
@ -1006,7 +1014,7 @@ struct AutoQuorumChange final : IQuorumChange {
ACTOR static Future<bool> isAcceptable(AutoQuorumChange* self,
Transaction* tr,
std::vector<NetworkAddress> oldCoordinators,
Reference<ClusterConnectionFile> ccf,
Reference<IClusterConnectionRecord> ccr,
int desiredCount,
std::set<AddressExclusion>* excluded) {
// Are there enough coordinators for the redundancy level?
@ -1016,7 +1024,7 @@ struct AutoQuorumChange final : IQuorumChange {
return false;
// Check availability
ClientCoordinators coord(ccf);
ClientCoordinators coord(ccr);
std::vector<Future<Optional<LeaderInfo>>> leaderServers;
leaderServers.reserve(coord.clientLeaderServers.size());
for (int i = 0; i < coord.clientLeaderServers.size(); i++) {
@ -1054,7 +1062,7 @@ struct AutoQuorumChange final : IQuorumChange {
ACTOR static Future<std::vector<NetworkAddress>> getDesired(Reference<AutoQuorumChange> self,
Transaction* tr,
std::vector<NetworkAddress> oldCoordinators,
Reference<ClusterConnectionFile> ccf,
Reference<IClusterConnectionRecord> ccr,
CoordinatorsResult* err) {
state int desiredCount = self->desired;
@ -1088,7 +1096,7 @@ struct AutoQuorumChange final : IQuorumChange {
}
if (checkAcceptable) {
bool ok = wait(isAcceptable(self.getPtr(), tr, oldCoordinators, ccf, desiredCount, &excluded));
bool ok = wait(isAcceptable(self.getPtr(), tr, oldCoordinators, ccr, desiredCount, &excluded));
if (ok)
return oldCoordinators;
}
@ -2093,7 +2101,7 @@ ACTOR Future<Void> advanceVersion(Database cx, Version v) {
}
}
ACTOR Future<Void> forceRecovery(Reference<ClusterConnectionFile> clusterFile, Key dcId) {
ACTOR Future<Void> forceRecovery(Reference<IClusterConnectionRecord> clusterFile, Key dcId) {
state Reference<AsyncVar<Optional<ClusterInterface>>> clusterInterface(new AsyncVar<Optional<ClusterInterface>>);
state Future<Void> leaderMon = monitorLeader<ClusterInterface>(clusterFile, clusterInterface);

View File

@ -130,7 +130,7 @@ struct IQuorumChange : ReferenceCounted<IQuorumChange> {
virtual ~IQuorumChange() {}
virtual Future<std::vector<NetworkAddress>> getDesiredCoordinators(Transaction* tr,
std::vector<NetworkAddress> oldCoordinators,
Reference<ClusterConnectionFile>,
Reference<IClusterConnectionRecord>,
CoordinatorsResult&) = 0;
virtual std::string getDesiredClusterKeyName() const { return std::string(); }
};
@ -211,7 +211,7 @@ ACTOR Future<Void> advanceVersion(Database cx, Version v);
ACTOR Future<int> setDDMode(Database cx, int mode);
ACTOR Future<Void> forceRecovery(Reference<ClusterConnectionFile> clusterFile, Standalone<StringRef> dcId);
ACTOR Future<Void> forceRecovery(Reference<IClusterConnectionRecord> clusterFile, Standalone<StringRef> dcId);
ACTOR Future<Void> printHealthyZone(Database cx);
ACTOR Future<Void> setDDIgnoreRebalanceSwitch(Database cx, bool ignoreRebalance);

View File

@ -18,8 +18,10 @@
* limitations under the License.
*/
#include "fdbclient/ClusterConnectionMemoryRecord.h"
#include "fdbclient/MonitorLeader.h"
#include "fdbclient/CoordinationInterface.h"
#include "fdbclient/NativeAPI.actor.h"
#include "flow/ActorCollection.h"
#include "flow/UnitTest.h"
#include "fdbrpc/genericactors.actor.h"
@ -48,124 +50,25 @@ std::string trim(std::string const& connectionString) {
} // namespace
std::pair<std::string, bool> ClusterConnectionFile::lookupClusterFileName(std::string const& filename) {
if (filename.length())
return std::make_pair(filename, false);
FDB_DEFINE_BOOLEAN_PARAM(ConnectionStringNeedsPersisted);
std::string f;
bool isDefaultFile = true;
if (platform::getEnvironmentVar(CLUSTER_FILE_ENV_VAR_NAME, f)) {
// If this is set but points to a file that does not
// exist, we will not fallback to any other methods
isDefaultFile = false;
} else if (fileExists("fdb.cluster"))
f = "fdb.cluster";
else
f = platform::getDefaultClusterFilePath();
return std::make_pair(f, isDefaultFile);
}
std::string ClusterConnectionFile::getErrorString(std::pair<std::string, bool> const& resolvedClusterFile,
Error const& e) {
bool isDefault = resolvedClusterFile.second;
if (e.code() == error_code_connection_string_invalid) {
return format("Invalid cluster file `%s': %d %s", resolvedClusterFile.first.c_str(), e.code(), e.what());
} else if (e.code() == error_code_no_cluster_file_found) {
if (isDefault)
return format("Unable to read cluster file `./fdb.cluster' or `%s' and %s unset: %d %s",
platform::getDefaultClusterFilePath().c_str(),
CLUSTER_FILE_ENV_VAR_NAME,
e.code(),
e.what());
else
return format(
"Unable to read cluster file `%s': %d %s", resolvedClusterFile.first.c_str(), e.code(), e.what());
} else {
return format(
"Unexpected error loading cluster file `%s': %d %s", resolvedClusterFile.first.c_str(), e.code(), e.what());
}
}
ClusterConnectionFile::ClusterConnectionFile(std::string const& filename) {
if (!fileExists(filename)) {
throw no_cluster_file_found();
}
cs = ClusterConnectionString(readFileBytes(filename, MAX_CLUSTER_FILE_BYTES));
this->filename = filename;
setConn = false;
}
ClusterConnectionFile::ClusterConnectionFile(std::string const& filename, ClusterConnectionString const& contents) {
this->filename = filename;
cs = contents;
setConn = true;
}
ClusterConnectionString const& ClusterConnectionFile::getConnectionString() const {
return cs;
}
void ClusterConnectionFile::notifyConnected() {
if (setConn) {
this->writeFile();
}
}
bool ClusterConnectionFile::fileContentsUpToDate() const {
Future<bool> IClusterConnectionRecord::upToDate() {
ClusterConnectionString temp;
return fileContentsUpToDate(temp);
return upToDate(temp);
}
bool ClusterConnectionFile::fileContentsUpToDate(ClusterConnectionString& fileConnectionString) const {
try {
// the cluster file hasn't been created yet so there's nothing to check
if (setConn)
return true;
ClusterConnectionFile temp(filename);
fileConnectionString = temp.getConnectionString();
return fileConnectionString.toString() == cs.toString();
} catch (Error& e) {
TraceEvent(SevWarnAlways, "ClusterFileError").error(e).detail("Filename", filename);
return false; // Swallow the error and report that the file is out of date
void IClusterConnectionRecord::notifyConnected() {
if (connectionStringNeedsPersisted) {
this->persist();
}
}
bool ClusterConnectionFile::writeFile() {
setConn = false;
if (filename.size()) {
try {
atomicReplace(filename,
"# DO NOT EDIT!\n# This file is auto-generated, it is not to be edited by hand\n" +
cs.toString().append("\n"));
if (!fileContentsUpToDate()) {
// This should only happen in rare scenarios where multiple processes are updating the same file to
// different values simultaneously In that case, we don't have any guarantees about which file will
// ultimately be written
TraceEvent(SevWarnAlways, "ClusterFileChangedAfterReplace")
.detail("Filename", filename)
.detail("ConnStr", cs.toString());
return false;
}
return true;
} catch (Error& e) {
TraceEvent(SevWarnAlways, "UnableToChangeConnectionFile")
.error(e)
.detail("Filename", filename)
.detail("ConnStr", cs.toString());
}
}
return false;
bool IClusterConnectionRecord::needsToBePersisted() const {
return connectionStringNeedsPersisted;
}
void ClusterConnectionFile::setConnectionString(ClusterConnectionString const& conn) {
ASSERT(filename.size());
cs = conn;
writeFile();
void IClusterConnectionRecord::setPersisted() {
connectionStringNeedsPersisted = false;
}
std::string ClusterConnectionString::getErrorString(std::string const& source, Error const& e) {
@ -367,8 +270,8 @@ std::string ClusterConnectionString::toString() const {
return s;
}
ClientCoordinators::ClientCoordinators(Reference<ClusterConnectionFile> ccf) : ccf(ccf) {
ClusterConnectionString cs = ccf->getConnectionString();
ClientCoordinators::ClientCoordinators(Reference<IClusterConnectionRecord> ccr) : ccr(ccr) {
ClusterConnectionString cs = ccr->getConnectionString();
for (auto s = cs.coordinators().begin(); s != cs.coordinators().end(); ++s)
clientLeaderServers.push_back(ClientLeaderRegInterface(*s));
clusterKey = cs.clusterKey();
@ -379,7 +282,7 @@ ClientCoordinators::ClientCoordinators(Key clusterKey, std::vector<NetworkAddres
for (const auto& coord : coordinators) {
clientLeaderServers.push_back(ClientLeaderRegInterface(coord));
}
ccf = makeReference<ClusterConnectionFile>(ClusterConnectionString(coordinators, clusterKey));
ccr = makeReference<ClusterConnectionMemoryRecord>(ClusterConnectionString(coordinators, clusterKey));
}
ClientLeaderRegInterface::ClientLeaderRegInterface(NetworkAddress remote)
@ -476,10 +379,10 @@ Optional<std::pair<LeaderInfo, bool>> getLeader(const std::vector<Optional<Leade
}
// Leader is the process that will be elected by coordinators as the cluster controller
ACTOR Future<MonitorLeaderInfo> monitorLeaderOneGeneration(Reference<ClusterConnectionFile> connFile,
ACTOR Future<MonitorLeaderInfo> monitorLeaderOneGeneration(Reference<IClusterConnectionRecord> connRecord,
Reference<AsyncVar<Value>> outSerializedLeaderInfo,
MonitorLeaderInfo info) {
state ClientCoordinators coordinators(info.intermediateConnFile);
state ClientCoordinators coordinators(info.intermediateConnRecord);
state AsyncTrigger nomineeChange;
state std::vector<Optional<LeaderInfo>> nominees;
state Future<Void> allActors;
@ -502,25 +405,26 @@ ACTOR Future<MonitorLeaderInfo> monitorLeaderOneGeneration(Reference<ClusterConn
if (leader.get().first.forward) {
TraceEvent("MonitorLeaderForwarding")
.detail("NewConnStr", leader.get().first.serializedInfo.toString())
.detail("OldConnStr", info.intermediateConnFile->getConnectionString().toString())
.detail("OldConnStr", info.intermediateConnRecord->getConnectionString().toString())
.trackLatest("MonitorLeaderForwarding");
info.intermediateConnFile = makeReference<ClusterConnectionFile>(
connFile->getFilename(), ClusterConnectionString(leader.get().first.serializedInfo.toString()));
info.intermediateConnRecord = connRecord->makeIntermediateRecord(
ClusterConnectionString(leader.get().first.serializedInfo.toString()));
return info;
}
if (connFile != info.intermediateConnFile) {
if (connRecord != info.intermediateConnRecord) {
if (!info.hasConnected) {
TraceEvent(SevWarnAlways, "IncorrectClusterFileContentsAtConnection")
.detail("Filename", connFile->getFilename())
.detail("ConnectionStringFromFile", connFile->getConnectionString().toString())
.detail("CurrentConnectionString", info.intermediateConnFile->getConnectionString().toString());
.detail("ClusterFile", connRecord->toString())
.detail("StoredConnectionString", connRecord->getConnectionString().toString())
.detail("CurrentConnectionString",
info.intermediateConnRecord->getConnectionString().toString());
}
connFile->setConnectionString(info.intermediateConnFile->getConnectionString());
info.intermediateConnFile = connFile;
connRecord->setConnectionString(info.intermediateConnRecord->getConnectionString());
info.intermediateConnRecord = connRecord;
}
info.hasConnected = true;
connFile->notifyConnected();
connRecord->notifyConnected();
outSerializedLeaderInfo->set(leader.get().first.serializedInfo);
}
@ -528,11 +432,11 @@ ACTOR Future<MonitorLeaderInfo> monitorLeaderOneGeneration(Reference<ClusterConn
}
}
ACTOR Future<Void> monitorLeaderInternal(Reference<ClusterConnectionFile> connFile,
ACTOR Future<Void> monitorLeaderInternal(Reference<IClusterConnectionRecord> connRecord,
Reference<AsyncVar<Value>> outSerializedLeaderInfo) {
state MonitorLeaderInfo info(connFile);
state MonitorLeaderInfo info(connRecord);
loop {
MonitorLeaderInfo _info = wait(monitorLeaderOneGeneration(connFile, outSerializedLeaderInfo, info));
MonitorLeaderInfo _info = wait(monitorLeaderOneGeneration(connRecord, outSerializedLeaderInfo, info));
info = _info;
}
}
@ -750,13 +654,13 @@ void shrinkProxyList(ClientDBInfo& ni,
}
ACTOR Future<MonitorLeaderInfo> monitorProxiesOneGeneration(
Reference<ClusterConnectionFile> connFile,
Reference<IClusterConnectionRecord> connRecord,
Reference<AsyncVar<ClientDBInfo>> clientInfo,
Reference<AsyncVar<Optional<ClientLeaderRegInterface>>> coordinator,
MonitorLeaderInfo info,
Reference<ReferencedObject<Standalone<VectorRef<ClientVersionRef>>>> supportedVersions,
Key traceLogGroup) {
state ClusterConnectionString cs = info.intermediateConnFile->getConnectionString();
state ClusterConnectionString cs = info.intermediateConnRecord->getConnectionString();
state std::vector<NetworkAddress> addrs = cs.coordinators();
state int idx = 0;
state int successIndex = 0;
@ -779,20 +683,24 @@ ACTOR Future<MonitorLeaderInfo> monitorProxiesOneGeneration(
req.supportedVersions = supportedVersions->get();
req.traceLogGroup = traceLogGroup;
ClusterConnectionString fileConnectionString;
if (connFile && !connFile->fileContentsUpToDate(fileConnectionString)) {
req.issues.push_back_deep(req.issues.arena(), LiteralStringRef("incorrect_cluster_file_contents"));
std::string connectionString = connFile->getConnectionString().toString();
if (!incorrectTime.present()) {
incorrectTime = now();
}
if (connFile->canGetFilename()) {
// Don't log a SevWarnAlways initially to account for transient issues (e.g. someone else changing the
// file right before us)
state ClusterConnectionString storedConnectionString;
if (connRecord) {
bool upToDate = wait(connRecord->upToDate(storedConnectionString));
if (!upToDate) {
req.issues.push_back_deep(req.issues.arena(), LiteralStringRef("incorrect_cluster_file_contents"));
std::string connectionString = connRecord->getConnectionString().toString();
if (!incorrectTime.present()) {
incorrectTime = now();
}
// Don't log a SevWarnAlways initially to account for transient issues (e.g. someone else changing
// the file right before us)
TraceEvent(now() - incorrectTime.get() > 300 ? SevWarnAlways : SevWarn, "IncorrectClusterFileContents")
.detail("Filename", connFile->getFilename())
.detail("ConnectionStringFromFile", fileConnectionString.toString())
.detail("ClusterFile", connRecord->toString())
.detail("StoredConnectionString", storedConnectionString.toString())
.detail("CurrentConnectionString", connectionString);
} else {
incorrectTime = Optional<double>();
}
} else {
incorrectTime = Optional<double>();
@ -804,24 +712,25 @@ ACTOR Future<MonitorLeaderInfo> monitorProxiesOneGeneration(
if (rep.get().read().forward.present()) {
TraceEvent("MonitorProxiesForwarding")
.detail("NewConnStr", rep.get().read().forward.get().toString())
.detail("OldConnStr", info.intermediateConnFile->getConnectionString().toString());
info.intermediateConnFile = Reference<ClusterConnectionFile>(new ClusterConnectionFile(
connFile->getFilename(), ClusterConnectionString(rep.get().read().forward.get().toString())));
.detail("OldConnStr", info.intermediateConnRecord->getConnectionString().toString());
info.intermediateConnRecord = connRecord->makeIntermediateRecord(
ClusterConnectionString(rep.get().read().forward.get().toString()));
return info;
}
if (connFile != info.intermediateConnFile) {
if (connRecord != info.intermediateConnRecord) {
if (!info.hasConnected) {
TraceEvent(SevWarnAlways, "IncorrectClusterFileContentsAtConnection")
.detail("Filename", connFile->getFilename())
.detail("ConnectionStringFromFile", connFile->getConnectionString().toString())
.detail("CurrentConnectionString", info.intermediateConnFile->getConnectionString().toString());
.detail("ClusterFile", connRecord->toString())
.detail("StoredConnectionString", connRecord->getConnectionString().toString())
.detail("CurrentConnectionString",
info.intermediateConnRecord->getConnectionString().toString());
}
connFile->setConnectionString(info.intermediateConnFile->getConnectionString());
info.intermediateConnFile = connFile;
connRecord->setConnectionString(info.intermediateConnRecord->getConnectionString());
info.intermediateConnRecord = connRecord;
}
info.hasConnected = true;
connFile->notifyConnected();
connRecord->notifyConnected();
auto& ni = rep.get().mutate();
shrinkProxyList(ni, lastCommitProxyUIDs, lastCommitProxies, lastGrvProxyUIDs, lastGrvProxies);
@ -838,21 +747,21 @@ ACTOR Future<MonitorLeaderInfo> monitorProxiesOneGeneration(
}
ACTOR Future<Void> monitorProxies(
Reference<AsyncVar<Reference<ClusterConnectionFile>>> connFile,
Reference<AsyncVar<Reference<IClusterConnectionRecord>>> connRecord,
Reference<AsyncVar<ClientDBInfo>> clientInfo,
Reference<AsyncVar<Optional<ClientLeaderRegInterface>>> coordinator,
Reference<ReferencedObject<Standalone<VectorRef<ClientVersionRef>>>> supportedVersions,
Key traceLogGroup) {
state MonitorLeaderInfo info(connFile->get());
state MonitorLeaderInfo info(connRecord->get());
loop {
choose {
when(MonitorLeaderInfo _info = wait(monitorProxiesOneGeneration(
connFile->get(), clientInfo, coordinator, info, supportedVersions, traceLogGroup))) {
connRecord->get(), clientInfo, coordinator, info, supportedVersions, traceLogGroup))) {
info = _info;
}
when(wait(connFile->onChange())) {
when(wait(connRecord->onChange())) {
info.hasConnected = false;
info.intermediateConnFile = connFile->get();
info.intermediateConnRecord = connRecord->get();
}
}
}

View File

@ -54,11 +54,11 @@ struct ClientData {
struct MonitorLeaderInfo {
bool hasConnected;
Reference<ClusterConnectionFile> intermediateConnFile;
Reference<IClusterConnectionRecord> intermediateConnRecord;
MonitorLeaderInfo() : hasConnected(false) {}
explicit MonitorLeaderInfo(Reference<ClusterConnectionFile> intermediateConnFile)
: hasConnected(false), intermediateConnFile(intermediateConnFile) {}
explicit MonitorLeaderInfo(Reference<IClusterConnectionRecord> intermediateConnRecord)
: hasConnected(false), intermediateConnRecord(intermediateConnRecord) {}
};
Optional<std::pair<LeaderInfo, bool>> getLeader(const std::vector<Optional<LeaderInfo>>& nominees);
@ -68,7 +68,7 @@ Optional<std::pair<LeaderInfo, bool>> getLeader(const std::vector<Optional<Leade
// If a leader is elected for long enough and communication with a quorum of coordinators is possible, eventually
// outKnownLeader will be that leader's interface.
template <class LeaderInterface>
Future<Void> monitorLeader(Reference<ClusterConnectionFile> const& connFile,
Future<Void> monitorLeader(Reference<IClusterConnectionRecord> const& connFile,
Reference<AsyncVar<Optional<LeaderInterface>>> const& outKnownLeader);
// This is one place where the leader election algorithm is run. The coodinator contacts all coodinators to collect
@ -80,7 +80,7 @@ Future<Void> monitorLeaderAndGetClientInfo(Value const& key,
Reference<AsyncVar<Optional<LeaderInfo>>> const& leaderInfo);
Future<Void> monitorProxies(
Reference<AsyncVar<Reference<ClusterConnectionFile>>> const& connFile,
Reference<AsyncVar<Reference<IClusterConnectionRecord>>> const& connRecord,
Reference<AsyncVar<ClientDBInfo>> const& clientInfo,
Reference<AsyncVar<Optional<ClientLeaderRegInterface>>> const& coordinator,
Reference<ReferencedObject<Standalone<VectorRef<ClientVersionRef>>>> const& supportedVersions,
@ -96,7 +96,7 @@ void shrinkProxyList(ClientDBInfo& ni,
#pragma region Implementation
#endif
Future<Void> monitorLeaderInternal(Reference<ClusterConnectionFile> const& connFile,
Future<Void> monitorLeaderInternal(Reference<IClusterConnectionRecord> const& connRecord,
Reference<AsyncVar<Value>> const& outSerializedLeaderInfo);
template <class LeaderInterface>
@ -119,11 +119,11 @@ struct LeaderDeserializer<ClusterInterface> {
};
template <class LeaderInterface>
Future<Void> monitorLeader(Reference<ClusterConnectionFile> const& connFile,
Future<Void> monitorLeader(Reference<IClusterConnectionRecord> const& connRecord,
Reference<AsyncVar<Optional<LeaderInterface>>> const& outKnownLeader) {
LeaderDeserializer<LeaderInterface> deserializer;
auto serializedInfo = makeReference<AsyncVar<Value>>();
Future<Void> m = monitorLeaderInternal(connFile, serializedInfo);
Future<Void> m = monitorLeaderInternal(connRecord, serializedInfo);
return m || deserializer(serializedInfo, outKnownLeader);
}

View File

@ -538,6 +538,8 @@ void DLApi::runNetwork() {
hook.first(hook.second);
} catch (Error& e) {
TraceEvent(SevError, "NetworkShutdownHookError").error(e);
} catch (std::exception& e) {
TraceEvent(SevError, "NetworkShutdownHookError").error(unknown_error()).detail("RootException", e.what());
} catch (...) {
TraceEvent(SevError, "NetworkShutdownHookError").error(unknown_error());
}
@ -1813,9 +1815,14 @@ THREAD_FUNC_RETURN runNetworkThread(void* param) {
try {
((ClientInfo*)param)->api->runNetwork();
} catch (Error& e) {
TraceEvent(SevError, "RunNetworkError").error(e);
TraceEvent(SevError, "ExternalRunNetworkError").error(e);
} catch (std::exception& e) {
TraceEvent(SevError, "ExternalRunNetworkError").error(unknown_error()).detail("RootException", e.what());
} catch (...) {
TraceEvent(SevError, "ExternalRunNetworkError").error(unknown_error());
}
TraceEvent("ExternalNetworkThreadTerminating");
THREAD_RETURN;
}
@ -1852,6 +1859,7 @@ void MultiVersionApi::stopNetwork() {
}
lock.leave();
TraceEvent("MultiVersionStopNetwork");
localClient->api->stopNetwork();
if (!bypassMultiClientApi) {

View File

@ -36,6 +36,7 @@
#include "fdbclient/AnnotateActor.h"
#include "fdbclient/Atomic.h"
#include "fdbclient/ClusterInterface.h"
#include "fdbclient/ClusterConnectionFile.h"
#include "fdbclient/CoordinationInterface.h"
#include "fdbclient/DatabaseContext.h"
#include "fdbclient/GlobalConfig.actor.h"
@ -376,8 +377,9 @@ ACTOR Future<Void> databaseLogger(DatabaseContext* cx) {
ev.detail("Elapsed", (lastLogged == 0) ? 0 : now() - lastLogged)
.detail("Cluster",
cx->getConnectionFile() ? cx->getConnectionFile()->getConnectionString().clusterKeyName().toString()
: "")
cx->getConnectionRecord()
? cx->getConnectionRecord()->getConnectionString().clusterKeyName().toString()
: "")
.detail("Internal", cx->internal);
cx->cc.logToTraceEvent(ev);
@ -402,7 +404,8 @@ ACTOR Future<Void> databaseLogger(DatabaseContext* cx) {
.detail("MaxMutationsPerCommit", cx->mutationsPerCommit.max())
.detail("MeanBytesPerCommit", cx->bytesPerCommit.mean())
.detail("MedianBytesPerCommit", cx->bytesPerCommit.median())
.detail("MaxBytesPerCommit", cx->bytesPerCommit.max());
.detail("MaxBytesPerCommit", cx->bytesPerCommit.max())
.detail("NumLocalityCacheEntries", cx->locationCache.size());
cx->latencies.clear();
cx->readLatencies.clear();
@ -669,19 +672,82 @@ ACTOR static Future<Void> clientStatusUpdateActor(DatabaseContext* cx) {
}
}
ACTOR static Future<Void> monitorProxiesChange(Reference<AsyncVar<ClientDBInfo> const> clientDBInfo,
ACTOR Future<Void> assertFailure(GrvProxyInterface remote, Future<ErrorOr<GetReadVersionReply>> reply) {
try {
ErrorOr<GetReadVersionReply> res = wait(reply);
if (!res.isError()) {
TraceEvent(SevError, "GotStaleReadVersion")
.detail("Remote", remote.getConsistentReadVersion.getEndpoint().addresses.address.toString())
.detail("Provisional", remote.provisional)
.detail("ReadVersion", res.get().version);
ASSERT_WE_THINK(false);
}
} catch (Error& e) {
if (e.code() == error_code_actor_cancelled) {
throw;
}
// we want this to fail -- so getting here is good, we'll just ignore the error.
}
return Void();
}
Future<Void> attemptGRVFromOldProxies(std::vector<GrvProxyInterface> oldProxies,
std::vector<GrvProxyInterface> newProxies) {
Span span(deterministicRandom()->randomUniqueID(), "VerifyCausalReadRisky"_loc);
std::vector<Future<Void>> replies;
replies.reserve(oldProxies.size());
GetReadVersionRequest req(
span.context, 1, TransactionPriority::IMMEDIATE, GetReadVersionRequest::FLAG_CAUSAL_READ_RISKY);
TraceEvent evt("AttemptGRVFromOldProxies");
evt.detail("NumOldProxies", oldProxies.size()).detail("NumNewProxies", newProxies.size());
auto traceProxies = [&](std::vector<GrvProxyInterface>& proxies, std::string const& key) {
for (int i = 0; i < proxies.size(); ++i) {
auto k = key + std::to_string(i);
evt.detail(k.c_str(), proxies[i].id());
}
};
traceProxies(oldProxies, "OldProxy"s);
traceProxies(newProxies, "NewProxy"s);
evt.log();
for (auto& i : oldProxies) {
req.reply = ReplyPromise<GetReadVersionReply>();
replies.push_back(assertFailure(i, i.getConsistentReadVersion.tryGetReply(req)));
}
return waitForAll(replies);
}
ACTOR static Future<Void> monitorProxiesChange(DatabaseContext* cx,
Reference<AsyncVar<ClientDBInfo> const> clientDBInfo,
AsyncTrigger* triggerVar) {
state std::vector<CommitProxyInterface> curCommitProxies;
state std::vector<GrvProxyInterface> curGrvProxies;
state ActorCollection actors(false);
curCommitProxies = clientDBInfo->get().commitProxies;
curGrvProxies = clientDBInfo->get().grvProxies;
loop {
wait(clientDBInfo->onChange());
if (clientDBInfo->get().commitProxies != curCommitProxies || clientDBInfo->get().grvProxies != curGrvProxies) {
curCommitProxies = clientDBInfo->get().commitProxies;
curGrvProxies = clientDBInfo->get().grvProxies;
triggerVar->trigger();
choose {
when(wait(clientDBInfo->onChange())) {
if (clientDBInfo->get().commitProxies != curCommitProxies ||
clientDBInfo->get().grvProxies != curGrvProxies) {
// This condition is a bit complicated. Here we want to verify that we're unable to receive a read
// version from a proxy of an old generation after a successful recovery. The conditions are:
// 1. We only do this with a configured probability.
// 2. If the old set of Grv proxies is empty, there's nothing to do
// 3. If the new set of Grv proxies is empty, it means the recovery is not complete. So if an old
// Grv proxy still gives out read versions, this would be correct behavior.
// 4. If we see a provisional proxy, it means the recovery didn't complete yet, so the same as (3)
// applies.
if (deterministicRandom()->random01() < cx->verifyCausalReadsProp && !curGrvProxies.empty() &&
!clientDBInfo->get().grvProxies.empty() && !clientDBInfo->get().grvProxies[0].provisional) {
actors.add(attemptGRVFromOldProxies(curGrvProxies, clientDBInfo->get().grvProxies));
}
curCommitProxies = clientDBInfo->get().commitProxies;
curGrvProxies = clientDBInfo->get().grvProxies;
triggerVar->trigger();
}
}
when(wait(actors.getResult())) { UNSTOPPABLE_ASSERT(false); }
}
}
}
@ -963,14 +1029,14 @@ void DatabaseContext::registerSpecialKeySpaceModule(SpecialKeySpace::MODULE modu
specialKeySpaceModules.push_back(std::move(impl));
}
ACTOR Future<RangeResult> getWorkerInterfaces(Reference<ClusterConnectionFile> clusterFile);
ACTOR Future<RangeResult> getWorkerInterfaces(Reference<IClusterConnectionRecord> clusterRecord);
ACTOR Future<Optional<Value>> getJSON(Database db);
struct WorkerInterfacesSpecialKeyImpl : SpecialKeyRangeReadImpl {
Future<RangeResult> getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const override {
if (ryw->getDatabase().getPtr() && ryw->getDatabase()->getConnectionFile()) {
if (ryw->getDatabase().getPtr() && ryw->getDatabase()->getConnectionRecord()) {
Key prefix = Key(getKeyRange().begin);
return map(getWorkerInterfaces(ryw->getDatabase()->getConnectionFile()),
return map(getWorkerInterfaces(ryw->getDatabase()->getConnectionRecord()),
[prefix = prefix, kr = KeyRange(kr)](const RangeResult& in) {
RangeResult result;
for (const auto& [k_, v] : in) {
@ -1102,7 +1168,7 @@ Future<RangeResult> HealthMetricsRangeImpl::getRange(ReadYourWritesTransaction*
return healthMetricsGetRangeActor(ryw, kr);
}
DatabaseContext::DatabaseContext(Reference<AsyncVar<Reference<ClusterConnectionFile>>> connectionFile,
DatabaseContext::DatabaseContext(Reference<AsyncVar<Reference<IClusterConnectionRecord>>> connectionRecord,
Reference<AsyncVar<ClientDBInfo>> clientInfo,
Reference<AsyncVar<Optional<ClientLeaderRegInterface>> const> coordinator,
Future<Void> clientInfoMonitor,
@ -1113,7 +1179,7 @@ DatabaseContext::DatabaseContext(Reference<AsyncVar<Reference<ClusterConnectionF
IsInternal internal,
int apiVersion,
IsSwitchable switchable)
: lockAware(lockAware), switchable(switchable), connectionFile(connectionFile), proxyProvisional(false),
: lockAware(lockAware), switchable(switchable), connectionRecord(connectionRecord), proxyProvisional(false),
clientLocality(clientLocality), enableLocalityLoadBalance(enableLocalityLoadBalance), internal(internal),
cc("TransactionMetrics"), transactionReadVersions("ReadVersions", cc),
transactionReadVersionsThrottled("ReadVersionsThrottled", cc),
@ -1167,7 +1233,7 @@ DatabaseContext::DatabaseContext(Reference<AsyncVar<Reference<ClusterConnectionF
getValueSubmitted.init(LiteralStringRef("NativeAPI.GetValueSubmitted"));
getValueCompleted.init(LiteralStringRef("NativeAPI.GetValueCompleted"));
monitorProxiesInfoChange = monitorProxiesChange(clientInfo, &proxiesChangeTrigger);
monitorProxiesInfoChange = monitorProxiesChange(this, clientInfo, &proxiesChangeTrigger);
tssMismatchHandler = handleTssMismatches(this);
clientStatusUpdater.actor = clientStatusUpdateActor(this);
cacheListMonitor = monitorCacheList(this);
@ -1319,7 +1385,7 @@ DatabaseContext::DatabaseContext(Reference<AsyncVar<Reference<ClusterConnectionF
std::make_unique<SingleSpecialKeyImpl>(LiteralStringRef("\xff\xff/status/json"),
[](ReadYourWritesTransaction* ryw) -> Future<Optional<Value>> {
if (ryw->getDatabase().getPtr() &&
ryw->getDatabase()->getConnectionFile()) {
ryw->getDatabase()->getConnectionRecord()) {
++ryw->getDatabase()->transactionStatusRequests;
return getJSON(ryw->getDatabase());
} else {
@ -1333,8 +1399,9 @@ DatabaseContext::DatabaseContext(Reference<AsyncVar<Reference<ClusterConnectionF
LiteralStringRef("\xff\xff/cluster_file_path"),
[](ReadYourWritesTransaction* ryw) -> Future<Optional<Value>> {
try {
if (ryw->getDatabase().getPtr() && ryw->getDatabase()->getConnectionFile()) {
Optional<Value> output = StringRef(ryw->getDatabase()->getConnectionFile()->getFilename());
if (ryw->getDatabase().getPtr() && ryw->getDatabase()->getConnectionRecord()) {
Optional<Value> output =
StringRef(ryw->getDatabase()->getConnectionRecord()->getLocation());
return output;
}
} catch (Error& e) {
@ -1350,8 +1417,8 @@ DatabaseContext::DatabaseContext(Reference<AsyncVar<Reference<ClusterConnectionF
LiteralStringRef("\xff\xff/connection_string"),
[](ReadYourWritesTransaction* ryw) -> Future<Optional<Value>> {
try {
if (ryw->getDatabase().getPtr() && ryw->getDatabase()->getConnectionFile()) {
Reference<ClusterConnectionFile> f = ryw->getDatabase()->getConnectionFile();
if (ryw->getDatabase().getPtr() && ryw->getDatabase()->getConnectionRecord()) {
Reference<IClusterConnectionRecord> f = ryw->getDatabase()->getConnectionRecord();
Optional<Value> output = StringRef(f->getConnectionString().toString());
return output;
}
@ -1411,7 +1478,7 @@ Database DatabaseContext::create(Reference<AsyncVar<ClientDBInfo>> clientInfo,
LockAware lockAware,
int apiVersion,
IsSwitchable switchable) {
return Database(new DatabaseContext(Reference<AsyncVar<Reference<ClusterConnectionFile>>>(),
return Database(new DatabaseContext(Reference<AsyncVar<Reference<IClusterConnectionRecord>>>(),
clientInfo,
makeReference<AsyncVar<Optional<ClientLeaderRegInterface>>>(),
clientInfoMonitor,
@ -1610,6 +1677,9 @@ void DatabaseContext::setOption(FDBDatabaseOptions::Option option, Optional<Stri
validateOptionValueNotPresent(value);
useConfigDatabase = true;
break;
case FDBDatabaseOptions::TEST_CAUSAL_READ_RISKY:
verifyCausalReadsProp = double(extractIntOption(value, 0, 100)) / 100.0;
break;
default:
break;
}
@ -1632,11 +1702,12 @@ Future<Void> DatabaseContext::onConnected() {
return connected;
}
ACTOR static Future<Void> switchConnectionFileImpl(Reference<ClusterConnectionFile> connFile, DatabaseContext* self) {
ACTOR static Future<Void> switchConnectionRecordImpl(Reference<IClusterConnectionRecord> connRecord,
DatabaseContext* self) {
TEST(true); // Switch connection file
TraceEvent("SwitchConnectionFile")
.detail("ConnectionFile", connFile->canGetFilename() ? connFile->getFilename() : "")
.detail("ConnectionString", connFile->getConnectionString().toString());
TraceEvent("SwitchConnectionRecord")
.detail("ClusterFile", connRecord->toString())
.detail("ConnectionString", connRecord->getConnectionString().toString());
// Reset state from former cluster.
self->commitProxies.clear();
@ -1649,38 +1720,38 @@ ACTOR static Future<Void> switchConnectionFileImpl(Reference<ClusterConnectionFi
clearedClientInfo.grvProxies.clear();
clearedClientInfo.id = deterministicRandom()->randomUniqueID();
self->clientInfo->set(clearedClientInfo);
self->connectionFile->set(connFile);
self->connectionRecord->set(connRecord);
state Database db(Reference<DatabaseContext>::addRef(self));
state Transaction tr(db);
loop {
tr.setOption(FDBTransactionOptions::READ_LOCK_AWARE);
try {
TraceEvent("SwitchConnectionFileAttemptingGRV").log();
TraceEvent("SwitchConnectionRecordAttemptingGRV").log();
Version v = wait(tr.getReadVersion());
TraceEvent("SwitchConnectionFileGotRV")
TraceEvent("SwitchConnectionRecordGotRV")
.detail("ReadVersion", v)
.detail("MinAcceptableReadVersion", self->minAcceptableReadVersion);
ASSERT(self->minAcceptableReadVersion != std::numeric_limits<Version>::max());
self->connectionFileChangedTrigger.trigger();
return Void();
} catch (Error& e) {
TraceEvent("SwitchConnectionFileError").detail("Error", e.what());
TraceEvent("SwitchConnectionRecordError").detail("Error", e.what());
wait(tr.onError(e));
}
}
}
Reference<ClusterConnectionFile> DatabaseContext::getConnectionFile() {
if (connectionFile) {
return connectionFile->get();
Reference<IClusterConnectionRecord> DatabaseContext::getConnectionRecord() {
if (connectionRecord) {
return connectionRecord->get();
}
return Reference<ClusterConnectionFile>();
return Reference<IClusterConnectionRecord>();
}
Future<Void> DatabaseContext::switchConnectionFile(Reference<ClusterConnectionFile> standby) {
Future<Void> DatabaseContext::switchConnectionRecord(Reference<IClusterConnectionRecord> standby) {
ASSERT(switchable);
return switchConnectionFileImpl(standby, this);
return switchConnectionRecordImpl(standby, this);
}
Future<Void> DatabaseContext::connectionFileChanged() {
@ -1705,7 +1776,7 @@ extern IPAddress determinePublicIPAutomatically(ClusterConnectionString const& c
// Creates a database object that represents a connection to a cluster
// This constructor uses a preallocated DatabaseContext that may have been created
// on another thread
Database Database::createDatabase(Reference<ClusterConnectionFile> connFile,
Database Database::createDatabase(Reference<IClusterConnectionRecord> connRecord,
int apiVersion,
IsInternal internal,
LocalityData const& clientLocality,
@ -1713,13 +1784,13 @@ Database Database::createDatabase(Reference<ClusterConnectionFile> connFile,
if (!g_network)
throw network_not_setup();
if (connFile) {
if (connRecord) {
if (networkOptions.traceDirectory.present() && !traceFileIsOpen()) {
g_network->initMetrics();
FlowTransport::transport().initMetrics();
initTraceEventMetrics();
auto publicIP = determinePublicIPAutomatically(connFile->getConnectionString());
auto publicIP = determinePublicIPAutomatically(connRecord->getConnectionString());
selectTraceFormatter(networkOptions.traceFormat);
selectTraceClockSource(networkOptions.traceClockSource);
openTraceFile(NetworkAddress(publicIP, ::getpid()),
@ -1735,8 +1806,8 @@ Database Database::createDatabase(Reference<ClusterConnectionFile> connFile,
.detail("SourceVersion", getSourceVersion())
.detail("Version", FDB_VT_VERSION)
.detail("PackageName", FDB_VT_PACKAGE_NAME)
.detail("ClusterFile", connFile->getFilename().c_str())
.detail("ConnectionString", connFile->getConnectionString().toString())
.detail("ClusterFile", connRecord->toString())
.detail("ConnectionString", connRecord->getConnectionString().toString())
.detailf("ActualTime", "%lld", DEBUG_DETERMINISM ? 0 : time(nullptr))
.detail("ApiVersion", apiVersion)
.detailf("ImageOffset", "%p", platform::getImageOffset())
@ -1753,9 +1824,9 @@ Database Database::createDatabase(Reference<ClusterConnectionFile> connFile,
auto clientInfo = makeReference<AsyncVar<ClientDBInfo>>();
auto coordinator = makeReference<AsyncVar<Optional<ClientLeaderRegInterface>>>();
auto connectionFile = makeReference<AsyncVar<Reference<ClusterConnectionFile>>>();
connectionFile->set(connFile);
Future<Void> clientInfoMonitor = monitorProxies(connectionFile,
auto connectionRecord = makeReference<AsyncVar<Reference<IClusterConnectionRecord>>>();
connectionRecord->set(connRecord);
Future<Void> clientInfoMonitor = monitorProxies(connectionRecord,
clientInfo,
coordinator,
networkOptions.supportedVersions,
@ -1763,7 +1834,7 @@ Database Database::createDatabase(Reference<ClusterConnectionFile> connFile,
DatabaseContext* db;
if (preallocatedDb) {
db = new (preallocatedDb) DatabaseContext(connectionFile,
db = new (preallocatedDb) DatabaseContext(connectionRecord,
clientInfo,
coordinator,
clientInfoMonitor,
@ -1775,7 +1846,7 @@ Database Database::createDatabase(Reference<ClusterConnectionFile> connFile,
apiVersion,
IsSwitchable::True);
} else {
db = new DatabaseContext(connectionFile,
db = new DatabaseContext(connectionRecord,
clientInfo,
coordinator,
clientInfoMonitor,
@ -1800,9 +1871,9 @@ Database Database::createDatabase(std::string connFileName,
int apiVersion,
IsInternal internal,
LocalityData const& clientLocality) {
Reference<ClusterConnectionFile> rccf = Reference<ClusterConnectionFile>(
Reference<IClusterConnectionRecord> rccr = Reference<IClusterConnectionRecord>(
new ClusterConnectionFile(ClusterConnectionFile::lookupClusterFileName(connFileName).first));
return Database::createDatabase(rccf, apiVersion, internal, clientLocality);
return Database::createDatabase(rccr, apiVersion, internal, clientLocality);
}
Reference<WatchMetadata> DatabaseContext::getWatchMetadata(KeyRef key) const {
@ -2097,6 +2168,7 @@ void stopNetwork() {
if (!g_network)
throw network_not_setup();
TraceEvent("ClientStopNetwork");
g_network->stop();
closeTraceFile();
}
@ -2763,7 +2835,7 @@ ACTOR Future<Version> watchValue(Future<Version> version,
TaskPriority::DefaultPromiseEndpoint))) {
resp = r;
}
when(wait(cx->connectionFile ? cx->connectionFile->onChange() : Never())) { wait(Never()); }
when(wait(cx->connectionRecord ? cx->connectionRecord->onChange() : Never())) { wait(Never()); }
}
if (info.debugID.present()) {
g_traceBatch.addEvent("WatchValueDebug",
@ -4133,11 +4205,14 @@ void debugAddTags(Transaction* tr) {
}
SpanID generateSpanID(int transactionTracingEnabled) {
uint64_t tid = deterministicRandom()->randomUInt64();
uint64_t txnId = deterministicRandom()->randomUInt64();
if (transactionTracingEnabled > 0) {
return SpanID(tid, deterministicRandom()->randomUInt64());
uint64_t tokenId = deterministicRandom()->random01() <= FLOW_KNOBS->TRACING_SAMPLE_RATE
? deterministicRandom()->randomUInt64()
: 0;
return SpanID(txnId, tokenId);
} else {
return SpanID(tid, 0);
return SpanID(txnId, 0);
}
}
@ -5795,7 +5870,7 @@ Future<Version> Transaction::getReadVersion(uint32_t flags) {
}
Location location = "NAPI:getReadVersion"_loc;
UID spanContext = deterministicRandom()->randomUniqueID();
UID spanContext = generateSpanID(cx->transactionTracingEnabled);
auto const req = DatabaseContext::VersionRequest(spanContext, options.tags, info.debugID);
batcher.stream.send(req);
startTime = now();
@ -6158,7 +6233,7 @@ ACTOR Future<std::pair<Optional<StorageMetrics>, int>> waitStorageMetrics(Databa
StorageMetrics permittedError,
int shardLimit,
int expectedShardCount) {
state Span span("NAPI:WaitStorageMetrics"_loc);
state Span span("NAPI:WaitStorageMetrics"_loc, generateSpanID(cx->transactionTracingEnabled));
loop {
std::vector<std::pair<KeyRange, Reference<LocationInfo>>> locations =
wait(getKeyRangeLocations(cx,
@ -6466,7 +6541,7 @@ ACTOR Future<bool> checkSafeExclusions(Database cx, std::vector<AddressExclusion
throw;
}
TraceEvent("ExclusionSafetyCheckCoordinators").log();
state ClientCoordinators coordinatorList(cx->getConnectionFile());
state ClientCoordinators coordinatorList(cx->getConnectionRecord());
state std::vector<Future<Optional<LeaderInfo>>> leaderServers;
leaderServers.reserve(coordinatorList.clientLeaderServers.size());
for (int i = 0; i < coordinatorList.clientLeaderServers.size(); i++) {
@ -6545,9 +6620,9 @@ ACTOR static Future<int64_t> rebootWorkerActor(DatabaseContext* cx, ValueRef add
duration = 0;
// fetch the addresses of all workers
state std::map<Key, std::pair<Value, ClientLeaderRegInterface>> address_interface;
if (!cx->getConnectionFile())
if (!cx->getConnectionRecord())
return 0;
RangeResult kvs = wait(getWorkerInterfaces(cx->getConnectionFile()));
RangeResult kvs = wait(getWorkerInterfaces(cx->getConnectionRecord()));
ASSERT(!kvs.more);
// Note: reuse this knob from fdbcli, change it if necessary
Reference<FlowLock> connectLock(new FlowLock(CLIENT_KNOBS->CLI_CONNECT_PARALLELISM));
@ -6569,7 +6644,7 @@ Future<int64_t> DatabaseContext::rebootWorker(StringRef addr, bool check, int du
}
Future<Void> DatabaseContext::forceRecoveryWithDataLoss(StringRef dcId) {
return forceRecovery(getConnectionFile(), dcId);
return forceRecovery(getConnectionRecord(), dcId);
}
ACTOR static Future<Void> createSnapshotActor(DatabaseContext* cx, UID snapUID, StringRef snapCmd) {

View File

@ -83,7 +83,7 @@ public:
// Creates a database object that represents a connection to a cluster
// This constructor uses a preallocated DatabaseContext that may have been created
// on another thread
static Database createDatabase(Reference<ClusterConnectionFile> connFile,
static Database createDatabase(Reference<IClusterConnectionRecord> connRecord,
int apiVersion,
IsInternal internal = IsInternal::True,
LocalityData const& clientLocality = LocalityData(),

View File

@ -58,6 +58,11 @@ class CommitQuorum {
wait(retryBrokenPromise(cti.commit, self->getCommitRequest(generation)));
++self->successful;
} catch (Error& e) {
// self might be destroyed if this actor is canceled
if (e.code() == error_code_actor_cancelled) {
throw;
}
if (e.code() == error_code_not_committed) {
++self->failed;
} else {
@ -312,7 +317,7 @@ public:
Future<Void> commit() { return commit(this); }
PaxosConfigTransactionImpl(Database const& cx) : cx(cx) {
auto coordinators = cx->getConnectionFile()->getConnectionString().coordinators();
auto coordinators = cx->getConnectionRecord()->getConnectionString().coordinators();
ctis.reserve(coordinators.size());
for (const auto& coordinator : coordinators) {
ctis.emplace_back(coordinator);

View File

@ -1336,9 +1336,9 @@ ACTOR Future<Optional<Value>> getJSON(Database db) {
return getValueFromJSON(statusObj);
}
ACTOR Future<RangeResult> getWorkerInterfaces(Reference<ClusterConnectionFile> clusterFile) {
ACTOR Future<RangeResult> getWorkerInterfaces(Reference<IClusterConnectionRecord> connRecord) {
state Reference<AsyncVar<Optional<ClusterInterface>>> clusterInterface(new AsyncVar<Optional<ClusterInterface>>);
state Future<Void> leaderMon = monitorLeader<ClusterInterface>(clusterFile, clusterInterface);
state Future<Void> leaderMon = monitorLeader<ClusterInterface>(connRecord, clusterInterface);
loop {
choose {
@ -1371,7 +1371,7 @@ Future<Optional<Value>> ReadYourWritesTransaction::get(const Key& key, Snapshot
}
} else {
if (key == LiteralStringRef("\xff\xff/status/json")) {
if (tr.getDatabase().getPtr() && tr.getDatabase()->getConnectionFile()) {
if (tr.getDatabase().getPtr() && tr.getDatabase()->getConnectionRecord()) {
++tr.getDatabase()->transactionStatusRequests;
return getJSON(tr.getDatabase());
} else {
@ -1381,8 +1381,8 @@ Future<Optional<Value>> ReadYourWritesTransaction::get(const Key& key, Snapshot
if (key == LiteralStringRef("\xff\xff/cluster_file_path")) {
try {
if (tr.getDatabase().getPtr() && tr.getDatabase()->getConnectionFile()) {
Optional<Value> output = StringRef(tr.getDatabase()->getConnectionFile()->getFilename());
if (tr.getDatabase().getPtr() && tr.getDatabase()->getConnectionRecord()) {
Optional<Value> output = StringRef(tr.getDatabase()->getConnectionRecord()->getLocation());
return output;
}
} catch (Error& e) {
@ -1393,8 +1393,8 @@ Future<Optional<Value>> ReadYourWritesTransaction::get(const Key& key, Snapshot
if (key == LiteralStringRef("\xff\xff/connection_string")) {
try {
if (tr.getDatabase().getPtr() && tr.getDatabase()->getConnectionFile()) {
Reference<ClusterConnectionFile> f = tr.getDatabase()->getConnectionFile();
if (tr.getDatabase().getPtr() && tr.getDatabase()->getConnectionRecord()) {
Reference<IClusterConnectionRecord> f = tr.getDatabase()->getConnectionRecord();
Optional<Value> output = StringRef(f->getConnectionString().toString());
return output;
}
@ -1454,8 +1454,8 @@ Future<RangeResult> ReadYourWritesTransaction::getRange(KeySelector begin,
}
} else {
if (begin.getKey() == LiteralStringRef("\xff\xff/worker_interfaces")) {
if (tr.getDatabase().getPtr() && tr.getDatabase()->getConnectionFile()) {
return getWorkerInterfaces(tr.getDatabase()->getConnectionFile());
if (tr.getDatabase().getPtr() && tr.getDatabase()->getConnectionRecord()) {
return getWorkerInterfaces(tr.getDatabase()->getConnectionRecord());
} else {
return RangeResult();
}

View File

@ -55,7 +55,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
init( BUGGIFY_RECOVER_MEMORY_LIMIT, 1e6 );
init( BUGGIFY_WORKER_REMOVED_MAX_LAG, 30 );
init( UPDATE_STORAGE_BYTE_LIMIT, 1e6 );
init( TLOG_PEEK_DELAY, 0.00005 );
init( TLOG_PEEK_DELAY, 0.0005 );
init( LEGACY_TLOG_UPGRADE_ENTRIES_PER_VERSION, 100 );
init( VERSION_MESSAGES_OVERHEAD_FACTOR_1024THS, 1072 ); // Based on a naive interpretation of the gcc version of std::deque, we would expect this to be 16 bytes overhead per 512 bytes data. In practice, it seems to be 24 bytes overhead per 512.
init( VERSION_MESSAGES_ENTRY_BYTES_WITH_OVERHEAD, std::ceil(16.0 * VERSION_MESSAGES_OVERHEAD_FACTOR_1024THS / 1024) );
@ -64,7 +64,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
init( TLOG_MESSAGE_BLOCK_BYTES, 10e6 );
init( TLOG_MESSAGE_BLOCK_OVERHEAD_FACTOR, double(TLOG_MESSAGE_BLOCK_BYTES) / (TLOG_MESSAGE_BLOCK_BYTES - MAX_MESSAGE_SIZE) ); //1.0121466709838096006362758832473
init( PEEK_TRACKER_EXPIRATION_TIME, 600 ); if( randomize && BUGGIFY ) PEEK_TRACKER_EXPIRATION_TIME = deterministicRandom()->coinflip() ? 0.1 : 120;
init( PEEK_USING_STREAMING, true );
init( PEEK_USING_STREAMING, true ); if( randomize && BUGGIFY ) PEEK_USING_STREAMING = false;
init( PARALLEL_GET_MORE_REQUESTS, 32 ); if( randomize && BUGGIFY ) PARALLEL_GET_MORE_REQUESTS = 2;
init( MULTI_CURSOR_PRE_FETCH_LIMIT, 10 );
init( MAX_QUEUE_COMMIT_BYTES, 15e6 ); if( randomize && BUGGIFY ) MAX_QUEUE_COMMIT_BYTES = 5000;
@ -454,6 +454,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
init( SIM_SHUTDOWN_TIMEOUT, 10 );
init( SHUTDOWN_TIMEOUT, 600 ); if( randomize && BUGGIFY ) SHUTDOWN_TIMEOUT = 60.0;
init( MASTER_SPIN_DELAY, 1.0 ); if( randomize && BUGGIFY ) MASTER_SPIN_DELAY = 10.0;
init( CC_PRUNE_CLIENTS_INTERVAL, 60.0 );
init( CC_CHANGE_DELAY, 0.1 );
init( CC_CLASS_DELAY, 0.01 );
init( WAIT_FOR_GOOD_RECRUITMENT_DELAY, 1.0 );
@ -533,6 +534,8 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
init( TARGET_BYTES_PER_STORAGE_SERVER_BATCH, 750e6 ); if( smallStorageTarget ) TARGET_BYTES_PER_STORAGE_SERVER_BATCH = 1500e3;
init( SPRING_BYTES_STORAGE_SERVER_BATCH, 100e6 ); if( smallStorageTarget ) SPRING_BYTES_STORAGE_SERVER_BATCH = 150e3;
init( STORAGE_HARD_LIMIT_BYTES, 1500e6 ); if( smallStorageTarget ) STORAGE_HARD_LIMIT_BYTES = 4500e3;
init( STORAGE_HARD_LIMIT_BYTES_OVERAGE, 5000e3 ); if( smallStorageTarget ) STORAGE_HARD_LIMIT_BYTES_OVERAGE = 100e3; // byte+version overage ensures storage server makes enough progress on freeing up storage queue memory at hard limit by ensuring it advances desiredOldestVersion enough per commit cycle.
init( STORAGE_HARD_LIMIT_VERSION_OVERAGE, VERSIONS_PER_SECOND / 4.0 );
init( STORAGE_DURABILITY_LAG_HARD_MAX, 2000e6 ); if( smallStorageTarget ) STORAGE_DURABILITY_LAG_HARD_MAX = 100e6;
init( STORAGE_DURABILITY_LAG_SOFT_MAX, 250e6 ); if( smallStorageTarget ) STORAGE_DURABILITY_LAG_SOFT_MAX = 10e6;
@ -740,6 +743,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
init( FASTRESTORE_EXPENSIVE_VALIDATION, false ); if( randomize && BUGGIFY ) { FASTRESTORE_EXPENSIVE_VALIDATION = deterministicRandom()->random01() < 0.5 ? true : false;}
init( FASTRESTORE_WRITE_BW_MB, 70 ); if( randomize && BUGGIFY ) { FASTRESTORE_WRITE_BW_MB = deterministicRandom()->random01() < 0.5 ? 2 : 100;}
init( FASTRESTORE_RATE_UPDATE_SECONDS, 1.0 ); if( randomize && BUGGIFY ) { FASTRESTORE_RATE_UPDATE_SECONDS = deterministicRandom()->random01() < 0.5 ? 0.1 : 2;}
init( FASTRESTORE_DUMP_INSERT_RANGE_VERSION, false );
init( REDWOOD_DEFAULT_PAGE_SIZE, 4096 );
init( REDWOOD_DEFAULT_EXTENT_SIZE, 32 * 1024 * 1024 );

View File

@ -377,6 +377,7 @@ public:
double SIM_SHUTDOWN_TIMEOUT;
double SHUTDOWN_TIMEOUT;
double MASTER_SPIN_DELAY;
double CC_PRUNE_CLIENTS_INTERVAL;
double CC_CHANGE_DELAY;
double CC_CLASS_DELAY;
double WAIT_FOR_GOOD_RECRUITMENT_DELAY;
@ -470,6 +471,8 @@ public:
int64_t TARGET_BYTES_PER_STORAGE_SERVER_BATCH;
int64_t SPRING_BYTES_STORAGE_SERVER_BATCH;
int64_t STORAGE_HARD_LIMIT_BYTES;
int64_t STORAGE_HARD_LIMIT_BYTES_OVERAGE;
int64_t STORAGE_HARD_LIMIT_VERSION_OVERAGE;
int64_t STORAGE_DURABILITY_LAG_HARD_MAX;
int64_t STORAGE_DURABILITY_LAG_SOFT_MAX;
@ -688,6 +691,8 @@ public:
bool FASTRESTORE_EXPENSIVE_VALIDATION; // when set true, performance will be heavily affected
double FASTRESTORE_WRITE_BW_MB; // target aggregated write bandwidth from all appliers
double FASTRESTORE_RATE_UPDATE_SECONDS; // how long to update appliers target write rate
bool FASTRESTORE_DUMP_INSERT_RANGE_VERSION; // Dump all the range version after insertion. This is for debugging
// purpose.
int REDWOOD_DEFAULT_PAGE_SIZE; // Page size for new Redwood files
int REDWOOD_DEFAULT_EXTENT_SIZE; // Extent size for new Redwood files

View File

@ -126,7 +126,7 @@ class SimpleConfigTransactionImpl {
public:
SimpleConfigTransactionImpl(Database const& cx) : cx(cx) {
auto coordinators = cx->getConnectionFile()->getConnectionString().coordinators();
auto coordinators = cx->getConnectionRecord()->getConnectionString().coordinators();
std::sort(coordinators.begin(), coordinators.end());
cti = ConfigTransactionInterface(coordinators[0]);
}

View File

@ -27,6 +27,7 @@
#include <exception>
#include "fdbclient/ActorLineageProfiler.h"
#include "fdbclient/ClusterConnectionMemoryRecord.h"
#include "fdbclient/Knobs.h"
#include "fdbclient/ProcessInterface.h"
#include "fdbclient/GlobalConfig.actor.h"
@ -1590,8 +1591,7 @@ CoordinatorsImpl::CoordinatorsImpl(KeyRangeRef kr) : SpecialKeyRangeRWImpl(kr) {
Future<RangeResult> CoordinatorsImpl::getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const {
RangeResult result;
KeyRef prefix(getKeyRange().begin);
// the constructor of ClusterConnectionFile already checks whether the file is valid
auto cs = ClusterConnectionFile(ryw->getDatabase()->getConnectionFile()->getFilename()).getConnectionString();
auto cs = ryw->getDatabase()->getConnectionRecord()->getConnectionString();
auto coordinator_processes = cs.coordinators();
Key cluster_decription_key = prefix.withSuffix(LiteralStringRef("cluster_description"));
if (kr.contains(cluster_decription_key)) {
@ -1737,7 +1737,10 @@ ACTOR static Future<RangeResult> CoordinatorsAutoImplActor(ReadYourWritesTransac
state CoordinatorsResult result = CoordinatorsResult::SUCCESS;
std::vector<NetworkAddress> _desiredCoordinators = wait(autoQuorumChange()->getDesiredCoordinators(
&tr, old.coordinators(), Reference<ClusterConnectionFile>(new ClusterConnectionFile(old)), result));
&tr,
old.coordinators(),
Reference<ClusterConnectionMemoryRecord>(new ClusterConnectionMemoryRecord(old)),
result));
if (result == CoordinatorsResult::NOT_ENOUGH_MACHINES) {
// we could get not_enough_machines if we happen to see the database while the cluster controller is updating

View File

@ -302,11 +302,11 @@ void JSONDoc::mergeValueInto(json_spirit::mValue& dst, const json_spirit::mValue
// Check if a quorum of coordination servers is reachable
// Will not throw, will just return non-present Optional if error
ACTOR Future<Optional<StatusObject>> clientCoordinatorsStatusFetcher(Reference<ClusterConnectionFile> f,
ACTOR Future<Optional<StatusObject>> clientCoordinatorsStatusFetcher(Reference<IClusterConnectionRecord> connRecord,
bool* quorum_reachable,
int* coordinatorsFaultTolerance) {
try {
state ClientCoordinators coord(f);
state ClientCoordinators coord(connRecord);
state StatusObject statusObj;
state std::vector<Future<Optional<LeaderInfo>>> leaderServers;
@ -365,14 +365,16 @@ ACTOR Future<Optional<StatusObject>> clientCoordinatorsStatusFetcher(Reference<C
// Client section of the json output
// Will NOT throw, errors will be put into messages array
ACTOR Future<StatusObject> clientStatusFetcher(Reference<ClusterConnectionFile> f,
ACTOR Future<StatusObject> clientStatusFetcher(Reference<IClusterConnectionRecord> connRecord,
StatusArray* messages,
bool* quorum_reachable,
int* coordinatorsFaultTolerance) {
state StatusObject statusObj;
Optional<StatusObject> coordsStatusObj =
wait(clientCoordinatorsStatusFetcher(f, quorum_reachable, coordinatorsFaultTolerance));
state Optional<StatusObject> coordsStatusObj =
wait(clientCoordinatorsStatusFetcher(connRecord, quorum_reachable, coordinatorsFaultTolerance));
state bool contentsUpToDate = wait(connRecord->upToDate());
if (coordsStatusObj.present()) {
statusObj["coordinators"] = coordsStatusObj.get();
if (!*quorum_reachable)
@ -381,17 +383,17 @@ ACTOR Future<StatusObject> clientStatusFetcher(Reference<ClusterConnectionFile>
messages->push_back(makeMessage("status_incomplete_coordinators", "Could not fetch coordinator info."));
StatusObject statusObjClusterFile;
statusObjClusterFile["path"] = f->getFilename();
bool contentsUpToDate = f->fileContentsUpToDate();
statusObjClusterFile["path"] = connRecord->getLocation();
statusObjClusterFile["up_to_date"] = contentsUpToDate;
statusObj["cluster_file"] = statusObjClusterFile;
if (!contentsUpToDate) {
ClusterConnectionString storedConnectionString = wait(connRecord->getStoredConnectionString());
std::string description = "Cluster file contents do not match current cluster connection string.";
description += "\nThe file contains the connection string: ";
description += ClusterConnectionFile(f->getFilename()).getConnectionString().toString().c_str();
description += storedConnectionString.toString().c_str();
description += "\nThe current connection string is: ";
description += f->getConnectionString().toString().c_str();
description += connRecord->getConnectionString().toString().c_str();
description += "\nVerify the cluster file and its parent directory are writable and that the cluster file has "
"not been overwritten externally. To change coordinators without manual intervention, the "
"cluster file and its containing folder must be writable by all servers and clients. If a "
@ -491,7 +493,7 @@ StatusObject getClientDatabaseStatus(StatusObjectReader client, StatusObjectRead
return databaseStatus;
}
ACTOR Future<StatusObject> statusFetcherImpl(Reference<ClusterConnectionFile> f,
ACTOR Future<StatusObject> statusFetcherImpl(Reference<IClusterConnectionRecord> connRecord,
Reference<AsyncVar<Optional<ClusterInterface>>> clusterInterface) {
if (!g_network)
throw network_not_setup();
@ -508,7 +510,7 @@ ACTOR Future<StatusObject> statusFetcherImpl(Reference<ClusterConnectionFile> f,
state int64_t clientTime = g_network->timer();
StatusObject _statusObjClient =
wait(clientStatusFetcher(f, &clientMessages, &quorum_reachable, &coordinatorsFaultTolerance));
wait(clientStatusFetcher(connRecord, &clientMessages, &quorum_reachable, &coordinatorsFaultTolerance));
statusObjClient = _statusObjClient;
if (clientTime != -1)
@ -598,7 +600,7 @@ ACTOR Future<StatusObject> statusFetcherImpl(Reference<ClusterConnectionFile> f,
}
ACTOR Future<Void> timeoutMonitorLeader(Database db) {
state Future<Void> leadMon = monitorLeader<ClusterInterface>(db->getConnectionFile(), db->statusClusterInterface);
state Future<Void> leadMon = monitorLeader<ClusterInterface>(db->getConnectionRecord(), db->statusClusterInterface);
loop {
wait(delay(CLIENT_KNOBS->STATUS_IDLE_TIMEOUT + 0.00001 + db->lastStatusFetch - now()));
if (now() - db->lastStatusFetch > CLIENT_KNOBS->STATUS_IDLE_TIMEOUT) {
@ -615,5 +617,5 @@ Future<StatusObject> StatusClient::statusFetcher(Database db) {
db->statusLeaderMon = timeoutMonitorLeader(db);
}
return statusFetcherImpl(db->getConnectionFile(), db->statusClusterInterface);
return statusFetcherImpl(db->getConnectionRecord(), db->statusClusterInterface);
}

View File

@ -18,6 +18,7 @@
* limitations under the License.
*/
#include "fdbclient/ClusterConnectionFile.h"
#include "fdbclient/ThreadSafeTransaction.h"
#include "fdbclient/DatabaseContext.h"
#include "fdbclient/versions.h"
@ -443,7 +444,14 @@ void ThreadSafeApi::runNetwork() {
try {
::runNetwork();
} catch (Error& e) {
TraceEvent(SevError, "RunNetworkError").error(e);
runErr = e;
} catch (std::exception& e) {
runErr = unknown_error();
TraceEvent(SevError, "RunNetworkError").error(unknown_error()).detail("RootException", e.what());
} catch (...) {
runErr = unknown_error();
TraceEvent(SevError, "RunNetworkError").error(unknown_error());
}
for (auto& hook : threadCompletionHooks) {
@ -451,6 +459,8 @@ void ThreadSafeApi::runNetwork() {
hook.first(hook.second);
} catch (Error& e) {
TraceEvent(SevError, "NetworkShutdownHookError").error(e);
} catch (std::exception& e) {
TraceEvent(SevError, "NetworkShutdownHookError").error(unknown_error()).detail("RootException", e.what());
} catch (...) {
TraceEvent(SevError, "NetworkShutdownHookError").error(unknown_error());
}
@ -459,6 +469,8 @@ void ThreadSafeApi::runNetwork() {
if (runErr.present()) {
throw runErr.get();
}
TraceEvent("RunNetworkTerminating");
}
void ThreadSafeApi::stopNetwork() {

View File

@ -200,6 +200,8 @@ description is not currently required but encouraged.
defaultFor="1100"/>
<Option name="use_config_database" code="800"
description="Use configuration database." />
<Option name="test_causal_read_risky" code="900"
description="An integer between 0 and 100 (default is 0) expressing the probability that a client will verify it can't read stale data whenever it detects a recovery." />
</Scope>
<Scope name="TransactionOption">

View File

@ -457,13 +457,13 @@ private:
void const* data,
int length,
int64_t offset) {
state Standalone<StringRef> dataCopy(StringRef((uint8_t*)data, length));
state ISimulator::ProcessInfo* currentProcess = g_simulator.getCurrentProcess();
state TaskPriority currentTaskID = g_network->getCurrentTask();
wait(g_simulator.onMachine(currentProcess));
state double delayDuration =
g_simulator.speedUpSimulation ? 0.0001 : (deterministicRandom()->random01() * self->maxWriteDelay);
state Standalone<StringRef> dataCopy(StringRef((uint8_t*)data, length));
state Future<bool> startSyncFuture = self->startSyncPromise.getFuture();

View File

@ -210,6 +210,7 @@ public:
void operator=(RangeMap&& r) noexcept { map = std::move(r.map); }
// void clear( const Val& value ) { ranges.clear(); ranges.insert(std::make_pair(Key(),value)); }
void clear() { map.clear(); }
void insert(const Range& keys, const Val& value);

View File

@ -833,6 +833,7 @@ private:
if (Optional<Value> tagV = txnStateStore->readValue(serverTagKeyFor(ssId)).get(); tagV.present()) {
MutationRef privatized = m;
privatized.param1 = m.param1.withPrefix(systemKeys.begin, arena);
privatized.param2 = m.param2.withPrefix(systemKeys.begin, arena);
toCommit->addTag(decodeServerTagValue(tagV.get()));
toCommit->writeTypedMessage(privatized);
}
@ -858,6 +859,7 @@ private:
MutationRef privatized = m;
privatized.param1 = m.param1.withPrefix(systemKeys.begin, arena);
privatized.param2 = m.param2.withPrefix(systemKeys.begin, arena);
toCommit->addTag(decodeServerTagValue(tagV.get()));
toCommit->writeTypedMessage(privatized);
}
@ -1037,4 +1039,4 @@ void applyMetadataMutations(SpanID const& spanContext,
const VectorRef<MutationRef>& mutations,
IKeyValueStore* txnStateStore) {
ApplyMetadataMutationsImpl(spanContext, dbgid, arena, mutations, txnStateStore).apply();
}
}

View File

@ -170,6 +170,7 @@ set(FDBSERVER_SRCS
workloads/CpuProfiler.actor.cpp
workloads/Cycle.actor.cpp
workloads/DataDistributionMetrics.actor.cpp
workloads/DataLossRecovery.actor.cpp
workloads/DDBalance.actor.cpp
workloads/DDMetrics.actor.cpp
workloads/DDMetricsExclude.actor.cpp

View File

@ -27,6 +27,7 @@
#include "fdbrpc/FailureMonitor.h"
#include "flow/ActorCollection.h"
#include "flow/SystemMonitor.h"
#include "fdbclient/ClusterConnectionMemoryRecord.h"
#include "fdbclient/NativeAPI.actor.h"
#include "fdbserver/BackupInterface.h"
#include "fdbserver/CoordinationInterface.h"
@ -133,6 +134,8 @@ public:
int logGenerations;
bool cachePopulated;
std::map<NetworkAddress, std::pair<double, OpenDatabaseRequest>> clientStatus;
Future<Void> clientCounter;
int clientCount;
DBInfo()
: clientInfo(new AsyncVar<ClientDBInfo>()), serverInfo(new AsyncVar<ServerDBInfo>()),
@ -143,7 +146,9 @@ public:
EnableLocalityLoadBalance::True,
TaskPriority::DefaultEndpoint,
LockAware::True)), // SOMEDAY: Locality!
unfinishedRecoveries(0), logGenerations(0), cachePopulated(false) {}
unfinishedRecoveries(0), logGenerations(0), cachePopulated(false), clientCount(0) {
clientCounter = countClients(this);
}
void setDistributor(const DataDistributorInterface& interf) {
auto newInfo = serverInfo->get();
@ -172,6 +177,22 @@ public:
}
serverInfo->set(newInfo);
}
ACTOR static Future<Void> countClients(DBInfo* self) {
loop {
wait(delay(SERVER_KNOBS->CC_PRUNE_CLIENTS_INTERVAL));
self->clientCount = 0;
for (auto itr = self->clientStatus.begin(); itr != self->clientStatus.end();) {
if (now() - itr->second.first < 2 * SERVER_KNOBS->COORDINATOR_REGISTER_INTERVAL) {
self->clientCount += itr->second.second.clientCount;
++itr;
} else {
itr = self->clientStatus.erase(itr);
}
}
}
}
};
struct UpdateWorkerList {
@ -3174,6 +3195,8 @@ public:
serverInfo.myLocality = locality;
db.serverInfo->set(serverInfo);
cx = openDBOnServer(db.serverInfo, TaskPriority::DefaultEndpoint, LockAware::True);
specialCounter(clusterControllerMetrics, "ClientCount", [this]() { return db.clientCount; });
}
~ClusterControllerData() {
@ -5277,7 +5300,7 @@ ACTOR Future<Void> clusterController(ServerCoordinators coordinators,
}
}
ACTOR Future<Void> clusterController(Reference<ClusterConnectionFile> connFile,
ACTOR Future<Void> clusterController(Reference<IClusterConnectionRecord> connRecord,
Reference<AsyncVar<Optional<ClusterControllerFullInterface>>> currentCC,
Reference<AsyncVar<ClusterControllerPriorityInfo>> asyncPriorityInfo,
Future<Void> recoveredDiskFiles,
@ -5287,7 +5310,7 @@ ACTOR Future<Void> clusterController(Reference<ClusterConnectionFile> connFile,
state bool hasConnected = false;
loop {
try {
ServerCoordinators coordinators(connFile);
ServerCoordinators coordinators(connRecord);
wait(clusterController(coordinators, currentCC, hasConnected, asyncPriorityInfo, locality, configDBType));
} catch (Error& e) {
if (e.code() != error_code_coordinators_changed)
@ -5306,7 +5329,8 @@ TEST_CASE("/fdbserver/clustercontroller/updateWorkerHealth") {
// Create a testing ClusterControllerData. Most of the internal states do not matter in this test.
state ClusterControllerData data(ClusterControllerFullInterface(),
LocalityData(),
ServerCoordinators(Reference<ClusterConnectionFile>(new ClusterConnectionFile())));
ServerCoordinators(Reference<IClusterConnectionRecord>(
new ClusterConnectionMemoryRecord(ClusterConnectionString()))));
state NetworkAddress workerAddress(IPAddress(0x01010101), 1);
state NetworkAddress badPeer1(IPAddress(0x02020202), 1);
state NetworkAddress badPeer2(IPAddress(0x03030303), 1);
@ -5364,7 +5388,8 @@ TEST_CASE("/fdbserver/clustercontroller/updateRecoveredWorkers") {
// Create a testing ClusterControllerData. Most of the internal states do not matter in this test.
ClusterControllerData data(ClusterControllerFullInterface(),
LocalityData(),
ServerCoordinators(Reference<ClusterConnectionFile>(new ClusterConnectionFile())));
ServerCoordinators(Reference<IClusterConnectionRecord>(
new ClusterConnectionMemoryRecord(ClusterConnectionString()))));
NetworkAddress worker1(IPAddress(0x01010101), 1);
NetworkAddress worker2(IPAddress(0x11111111), 1);
NetworkAddress badPeer1(IPAddress(0x02020202), 1);
@ -5400,7 +5425,8 @@ TEST_CASE("/fdbserver/clustercontroller/getServersWithDegradedLink") {
// Create a testing ClusterControllerData. Most of the internal states do not matter in this test.
ClusterControllerData data(ClusterControllerFullInterface(),
LocalityData(),
ServerCoordinators(Reference<ClusterConnectionFile>(new ClusterConnectionFile())));
ServerCoordinators(Reference<IClusterConnectionRecord>(
new ClusterConnectionMemoryRecord(ClusterConnectionString()))));
NetworkAddress worker(IPAddress(0x01010101), 1);
NetworkAddress badPeer1(IPAddress(0x02020202), 1);
NetworkAddress badPeer2(IPAddress(0x03030303), 1);
@ -5501,7 +5527,8 @@ TEST_CASE("/fdbserver/clustercontroller/recentRecoveryCountDueToHealth") {
// Create a testing ClusterControllerData. Most of the internal states do not matter in this test.
ClusterControllerData data(ClusterControllerFullInterface(),
LocalityData(),
ServerCoordinators(Reference<ClusterConnectionFile>(new ClusterConnectionFile())));
ServerCoordinators(Reference<IClusterConnectionRecord>(
new ClusterConnectionMemoryRecord(ClusterConnectionString()))));
ASSERT_EQ(data.recentRecoveryCountDueToHealth(), 0);
@ -5521,7 +5548,8 @@ TEST_CASE("/fdbserver/clustercontroller/shouldTriggerRecoveryDueToDegradedServer
// Create a testing ClusterControllerData. Most of the internal states do not matter in this test.
ClusterControllerData data(ClusterControllerFullInterface(),
LocalityData(),
ServerCoordinators(Reference<ClusterConnectionFile>(new ClusterConnectionFile())));
ServerCoordinators(Reference<IClusterConnectionRecord>(
new ClusterConnectionMemoryRecord(ClusterConnectionString()))));
NetworkAddress master(IPAddress(0x01010101), 1);
NetworkAddress tlog(IPAddress(0x02020202), 1);
NetworkAddress satelliteTlog(IPAddress(0x03030303), 1);
@ -5625,7 +5653,8 @@ TEST_CASE("/fdbserver/clustercontroller/shouldTriggerFailoverDueToDegradedServer
// Create a testing ClusterControllerData. Most of the internal states do not matter in this test.
ClusterControllerData data(ClusterControllerFullInterface(),
LocalityData(),
ServerCoordinators(Reference<ClusterConnectionFile>(new ClusterConnectionFile())));
ServerCoordinators(Reference<IClusterConnectionRecord>(
new ClusterConnectionMemoryRecord(ClusterConnectionString()))));
NetworkAddress master(IPAddress(0x01010101), 1);
NetworkAddress tlog(IPAddress(0x02020202), 1);
NetworkAddress satelliteTlog(IPAddress(0x03030303), 1);

View File

@ -18,6 +18,7 @@
* limitations under the License.
*/
#include "fdbclient/ClusterConnectionMemoryRecord.h"
#include "fdbserver/CoordinatedState.h"
#include "fdbserver/CoordinationInterface.h"
#include "fdbserver/Knobs.h"
@ -288,8 +289,7 @@ struct MovableCoordinatedStateImpl {
// reached the point where a leader elected by the new coordinators should be doing the rest of the work
// (and therefore the caller should die).
state CoordinatedState cs(self->coordinators);
state CoordinatedState nccs(
ServerCoordinators(Reference<ClusterConnectionFile>(new ClusterConnectionFile(nc))));
state CoordinatedState nccs(ServerCoordinators(makeReference<ClusterConnectionMemoryRecord>(nc)));
state Future<Void> creationTimeout = delay(30);
ASSERT(self->lastValue.present() && self->lastCSValue.present());
TraceEvent("StartMove").detail("ConnectionString", nc.toString());
@ -306,7 +306,7 @@ struct MovableCoordinatedStateImpl {
when(wait(nccs.setExclusive(
BinaryWriter::toValue(MovableValue(self->lastValue.get(),
MovableValue::MovingFrom,
self->coordinators.ccf->getConnectionString().toString()),
self->coordinators.ccr->getConnectionString().toString()),
IncludeVersion(ProtocolVersion::withMovableCoordinatedStateV2()))))) {}
}

View File

@ -95,8 +95,8 @@ LeaderElectionRegInterface::LeaderElectionRegInterface(INetwork* local) : Client
forward.makeWellKnownEndpoint(WLTOKEN_LEADERELECTIONREG_FORWARD, TaskPriority::Coordination);
}
ServerCoordinators::ServerCoordinators(Reference<ClusterConnectionFile> cf) : ClientCoordinators(cf) {
ClusterConnectionString cs = ccf->getConnectionString();
ServerCoordinators::ServerCoordinators(Reference<IClusterConnectionRecord> ccr) : ClientCoordinators(ccr) {
ClusterConnectionString cs = ccr->getConnectionString();
for (auto s = cs.coordinators().begin(); s != cs.coordinators().end(); ++s) {
leaderElectionServers.emplace_back(*s);
stateServers.emplace_back(*s);
@ -588,7 +588,7 @@ StringRef getClusterDescriptor(Key key) {
ACTOR Future<Void> leaderServer(LeaderElectionRegInterface interf,
OnDemandStore* pStore,
UID id,
Reference<ClusterConnectionFile> ccf) {
Reference<IClusterConnectionRecord> ccr) {
state LeaderRegisterCollection regs(pStore);
state ActorCollection forwarders(false);
@ -609,12 +609,12 @@ ACTOR Future<Void> leaderServer(LeaderElectionRegInterface interf,
info.forward = forward.get().serializedInfo;
req.reply.send(CachedSerialization<ClientDBInfo>(info));
} else {
StringRef clusterName = ccf->getConnectionString().clusterKeyName();
StringRef clusterName = ccr->getConnectionString().clusterKeyName();
if (!SERVER_KNOBS->ENABLE_CROSS_CLUSTER_SUPPORT &&
getClusterDescriptor(req.clusterKey).compare(clusterName)) {
TraceEvent(SevWarn, "CCFMismatch")
TraceEvent(SevWarn, "CCRMismatch")
.detail("RequestType", "OpenDatabaseCoordRequest")
.detail("LocalCS", ccf->getConnectionString().toString())
.detail("LocalCS", ccr->getConnectionString().toString())
.detail("IncomingClusterKey", req.clusterKey)
.detail("IncomingCoordinators", describeList(req.coordinators, req.coordinators.size()));
req.reply.sendError(wrong_connection_file());
@ -628,13 +628,13 @@ ACTOR Future<Void> leaderServer(LeaderElectionRegInterface interf,
if (forward.present()) {
req.reply.send(forward.get());
} else {
StringRef clusterName = ccf->getConnectionString().clusterKeyName();
StringRef clusterName = ccr->getConnectionString().clusterKeyName();
if (!SERVER_KNOBS->ENABLE_CROSS_CLUSTER_SUPPORT && getClusterDescriptor(req.key).compare(clusterName)) {
TraceEvent(SevWarn, "CCFMismatch")
TraceEvent(SevWarn, "CCRMismatch")
.detail("RequestType", "ElectionResultRequest")
.detail("LocalCS", ccf->getConnectionString().toString())
.detail("LocalCS", ccr->getConnectionString().toString())
.detail("IncomingClusterKey", req.key)
.detail("ClusterKey", ccf->getConnectionString().clusterKey())
.detail("ClusterKey", ccr->getConnectionString().clusterKey())
.detail("IncomingCoordinators", describeList(req.coordinators, req.coordinators.size()));
req.reply.sendError(wrong_connection_file());
} else {
@ -647,13 +647,13 @@ ACTOR Future<Void> leaderServer(LeaderElectionRegInterface interf,
if (forward.present())
req.reply.send(forward.get());
else {
StringRef clusterName = ccf->getConnectionString().clusterKeyName();
StringRef clusterName = ccr->getConnectionString().clusterKeyName();
if (!SERVER_KNOBS->ENABLE_CROSS_CLUSTER_SUPPORT && getClusterDescriptor(req.key).compare(clusterName)) {
TraceEvent(SevWarn, "CCFMismatch")
TraceEvent(SevWarn, "CCRMismatch")
.detail("RequestType", "GetLeaderRequest")
.detail("LocalCS", ccf->getConnectionString().toString())
.detail("LocalCS", ccr->getConnectionString().toString())
.detail("IncomingClusterKey", req.key)
.detail("ClusterKey", ccf->getConnectionString().clusterKey());
.detail("ClusterKey", ccr->getConnectionString().clusterKey());
req.reply.sendError(wrong_connection_file());
} else {
regs.getInterface(req.key, id).getLeader.send(req);
@ -665,11 +665,11 @@ ACTOR Future<Void> leaderServer(LeaderElectionRegInterface interf,
if (forward.present())
req.reply.send(forward.get());
else {
StringRef clusterName = ccf->getConnectionString().clusterKeyName();
StringRef clusterName = ccr->getConnectionString().clusterKeyName();
if (!SERVER_KNOBS->ENABLE_CROSS_CLUSTER_SUPPORT && getClusterDescriptor(req.key).compare(clusterName)) {
TraceEvent(SevWarn, "CCFMismatch")
TraceEvent(SevWarn, "CCRMismatch")
.detail("RequestType", "CandidacyRequest")
.detail("LocalCS", ccf->getConnectionString().toString())
.detail("LocalCS", ccr->getConnectionString().toString())
.detail("IncomingClusterKey", req.key);
req.reply.sendError(wrong_connection_file());
} else {
@ -682,11 +682,11 @@ ACTOR Future<Void> leaderServer(LeaderElectionRegInterface interf,
if (forward.present())
req.reply.send(LeaderHeartbeatReply{ false });
else {
StringRef clusterName = ccf->getConnectionString().clusterKeyName();
StringRef clusterName = ccr->getConnectionString().clusterKeyName();
if (!SERVER_KNOBS->ENABLE_CROSS_CLUSTER_SUPPORT && getClusterDescriptor(req.key).compare(clusterName)) {
TraceEvent(SevWarn, "CCFMismatch")
TraceEvent(SevWarn, "CCRMismatch")
.detail("RequestType", "LeaderHeartbeatRequest")
.detail("LocalCS", ccf->getConnectionString().toString())
.detail("LocalCS", ccr->getConnectionString().toString())
.detail("IncomingClusterKey", req.key);
req.reply.sendError(wrong_connection_file());
} else {
@ -699,11 +699,11 @@ ACTOR Future<Void> leaderServer(LeaderElectionRegInterface interf,
if (forward.present())
req.reply.send(Void());
else {
StringRef clusterName = ccf->getConnectionString().clusterKeyName();
StringRef clusterName = ccr->getConnectionString().clusterKeyName();
if (!SERVER_KNOBS->ENABLE_CROSS_CLUSTER_SUPPORT && getClusterDescriptor(req.key).compare(clusterName)) {
TraceEvent(SevWarn, "CCFMismatch")
TraceEvent(SevWarn, "CCRMismatch")
.detail("RequestType", "ForwardRequest")
.detail("LocalCS", ccf->getConnectionString().toString())
.detail("LocalCS", ccr->getConnectionString().toString())
.detail("IncomingClusterKey", req.key);
req.reply.sendError(wrong_connection_file());
} else {
@ -721,7 +721,7 @@ ACTOR Future<Void> leaderServer(LeaderElectionRegInterface interf,
}
ACTOR Future<Void> coordinationServer(std::string dataFolder,
Reference<ClusterConnectionFile> ccf,
Reference<IClusterConnectionRecord> ccr,
ConfigDBType configDBType) {
state UID myID = deterministicRandom()->randomUniqueID();
state LeaderElectionRegInterface myLeaderInterface(g_network);
@ -744,7 +744,7 @@ ACTOR Future<Void> coordinationServer(std::string dataFolder,
}
try {
wait(localGenerationReg(myInterface, &store) || leaderServer(myLeaderInterface, &store, myID, ccf) ||
wait(localGenerationReg(myInterface, &store) || leaderServer(myLeaderInterface, &store, myID, ccr) ||
store.getError() || configDatabaseServer);
throw internal_error();
} catch (Error& e) {

View File

@ -214,7 +214,7 @@ struct ForwardRequest {
class ServerCoordinators : public ClientCoordinators {
public:
explicit ServerCoordinators(Reference<ClusterConnectionFile>);
explicit ServerCoordinators(Reference<IClusterConnectionRecord>);
std::vector<LeaderElectionRegInterface> leaderElectionServers;
std::vector<GenerationRegInterface> stateServers;
@ -222,7 +222,7 @@ public:
};
Future<Void> coordinationServer(std::string const& dataFolder,
Reference<ClusterConnectionFile> const& ccf,
Reference<IClusterConnectionRecord> const& ccf,
ConfigDBType const&);
#endif

View File

@ -922,14 +922,19 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
// Prefer a healthy team not containing excludeServer.
if (candidates.size() > 0) {
return teams[deterministicRandom()->randomInt(0, candidates.size())]->getServerIDs();
}
// The backup choice is a team with at least one server besides excludeServer, in this
// case, the team will be possibily relocated to a healthy destination later by DD.
if (backup.size() > 0) {
std::vector<UID> res = teams[deterministicRandom()->randomInt(0, backup.size())]->getServerIDs();
std::remove(res.begin(), res.end(), excludeServer);
return teams[candidates[deterministicRandom()->randomInt(0, candidates.size())]]->getServerIDs();
} else if (backup.size() > 0) {
// The backup choice is a team with at least one server besides excludeServer, in this
// case, the team will be possibily relocated to a healthy destination later by DD.
std::vector<UID> servers =
teams[backup[deterministicRandom()->randomInt(0, backup.size())]]->getServerIDs();
std::vector<UID> res;
for (const UID& id : servers) {
if (id != excludeServer) {
res.push_back(id);
}
}
TraceEvent("FoundNonoptimalTeamForDroppedShard", excludeServer).detail("Team", describe(res));
return res;
}
@ -3096,7 +3101,8 @@ ACTOR Future<Void> printSnapshotTeamsInfo(Reference<DDTeamCollection> self) {
auto const& keys = self->server_status.getKeys();
for (auto const& key : keys) {
server_status.emplace(key, self->server_status.get(key));
// Add to or update the local server_status map
server_status[key] = self->server_status.get(key);
}
TraceEvent("DDPrintSnapshotTeasmInfo", self->distributorId)
@ -3131,13 +3137,22 @@ ACTOR Future<Void> printSnapshotTeamsInfo(Reference<DDTeamCollection> self) {
server = server_info.begin();
for (i = 0; i < server_info.size(); i++) {
const UID& uid = server->first;
TraceEvent("ServerStatus", self->distributorId)
.detail("ServerUID", uid)
.detail("Healthy", !get(server_status, uid).isUnhealthy())
TraceEvent e("ServerStatus", self->distributorId);
e.detail("ServerUID", uid)
.detail("MachineIsValid", server_info[uid]->machine.isValid())
.detail("MachineTeamSize",
server_info[uid]->machine.isValid() ? server_info[uid]->machine->machineTeams.size() : -1)
.detail("Primary", self->primary);
// ServerStatus might not be known if server was very recently added and storageServerFailureTracker()
// has not yet updated self->server_status
// If the UID is not found, do not assume the server is healthy or unhealthy
auto it = server_status.find(uid);
if (it != server_status.end()) {
e.detail("Healthy", !it->second.isUnhealthy());
}
server++;
if (++traceEventsPrinted % SERVER_KNOBS->DD_TEAMS_INFO_PRINT_YIELD_COUNT == 0) {
wait(yield());
@ -3174,7 +3189,11 @@ ACTOR Future<Void> printSnapshotTeamsInfo(Reference<DDTeamCollection> self) {
// Healthy machine has at least one healthy server
for (auto& server : _machine->serversOnMachine) {
if (!get(server_status, server->id).isUnhealthy()) {
// ServerStatus might not be known if server was very recently added and
// storageServerFailureTracker() has not yet updated self->server_status If the UID is not found, do
// not assume the server is healthy
auto it = server_status.find(server->id);
if (it != server_status.end() && !it->second.isUnhealthy()) {
isMachineHealthy = true;
}
}
@ -6278,7 +6297,18 @@ ACTOR Future<Void> dataDistribution(Reference<DataDistributorData> self,
self->teamCollection = nullptr;
primaryTeamCollection = Reference<DDTeamCollection>();
remoteTeamCollection = Reference<DDTeamCollection>();
wait(shards.clearAsync());
if (err.code() == error_code_actor_cancelled) {
// When cancelled, we cannot clear asyncronously because
// this will result in invalid memory access. This should only
// be an issue in simulation.
if (!g_network->isSimulated()) {
TraceEvent(SevWarnAlways, "DataDistributorCancelled");
}
shards.clear();
throw e;
} else {
wait(shards.clearAsync());
}
TraceEvent("DataDistributorTeamCollectionsDestroyed").error(err);
if (removeFailedServer.getFuture().isReady() && !removeFailedServer.getFuture().isError()) {
TraceEvent("RemoveFailedServer", removeFailedServer.getFuture().get()).error(err);

View File

@ -999,8 +999,8 @@ public:
return endLocation();
}
Future<Void> getError() override { return rawQueue->getError(); }
Future<Void> onClosed() override { return rawQueue->onClosed(); }
Future<Void> getError() const override { return rawQueue->getError(); }
Future<Void> onClosed() const override { return rawQueue->onClosed(); }
void dispose() override {
TraceEvent("DQDestroy", dbgid)
@ -1551,8 +1551,8 @@ public:
popped(0), committed(0){};
// IClosable
Future<Void> getError() override { return queue->getError(); }
Future<Void> onClosed() override { return queue->onClosed(); }
Future<Void> getError() const override { return queue->getError(); }
Future<Void> onClosed() const override { return queue->onClosed(); }
void dispose() override {
queue->dispose();
delete this;

View File

@ -30,10 +30,12 @@ public:
// IClosable is a base interface for any disk-backed data structure that needs to support asynchronous errors,
// shutdown and deletion
virtual Future<Void> getError() = 0; // asynchronously throws an error if there is an internal error. Never set
// inside (on the stack of) a call to another API function on this object.
virtual Future<Void> onClosed() = 0; // the future is set to Void when this is totally shut down after dispose() or
// close(). But this function cannot be called after dispose or close!
virtual Future<Void> getError()
const = 0; // asynchronously throws an error if there is an internal error. Never set
// inside (on the stack of) a call to another API function on this object.
virtual Future<Void> onClosed()
const = 0; // the future is set to Void when this is totally shut down after dispose() or
// close(). But this function cannot be called after dispose or close!
virtual void dispose() = 0; // permanently delete the data AND invalidate this interface
virtual void close() = 0; // invalidate this interface, but do not delete the data. Outstanding operations may or
// may not take effect in the background.
@ -47,16 +49,30 @@ public:
virtual Future<Void> commit(
bool sequential = false) = 0; // returns when prior sets and clears are (atomically) durable
virtual Future<Optional<Value>> readValue(KeyRef key, Optional<UID> debugID = Optional<UID>()) = 0;
enum class ReadType {
EAGER,
FETCH,
LOW,
NORMAL,
HIGH,
};
virtual Future<Optional<Value>> readValue(KeyRef key,
ReadType type = ReadType::NORMAL,
Optional<UID> debugID = Optional<UID>()) = 0;
// Like readValue(), but returns only the first maxLength bytes of the value if it is longer
virtual Future<Optional<Value>> readValuePrefix(KeyRef key,
int maxLength,
ReadType type = ReadType::NORMAL,
Optional<UID> debugID = Optional<UID>()) = 0;
// If rowLimit>=0, reads first rows sorted ascending, otherwise reads last rows sorted descending
// The total size of the returned value (less the last entry) will be less than byteLimit
virtual Future<RangeResult> readRange(KeyRangeRef keys, int rowLimit = 1 << 30, int byteLimit = 1 << 30) = 0;
virtual Future<RangeResult> readRange(KeyRangeRef keys,
int rowLimit = 1 << 30,
int byteLimit = 1 << 30,
ReadType type = ReadType::NORMAL) = 0;
// To debug MEMORY_RADIXTREE type ONLY
// Returns (1) how many key & value pairs have been inserted (2) how many nodes have been created (3) how many

View File

@ -269,8 +269,10 @@ public:
// The snapshot shall be usable until setOldVersion() is called with a version > v.
virtual Reference<IPagerSnapshot> getReadSnapshot(Version v) = 0;
// Atomically make durable all pending page writes, page frees, and update the metadata string.
virtual Future<Void> commit() = 0;
// Atomically make durable all pending page writes, page frees, and update the metadata string,
// setting the committed version to v
// v must be >= the highest versioned page write.
virtual Future<Void> commit(Version v) = 0;
// Get the latest meta key set or committed
virtual Key getMetaKey() const = 0;
@ -278,9 +280,6 @@ public:
// Set the metakey which will be stored in the next commit
virtual void setMetaKey(KeyRef metaKey) = 0;
// Sets the next commit version
virtual void setCommitVersion(Version v) = 0;
virtual StorageBytes getStorageBytes() const = 0;
virtual int64_t getPageCount() = 0;
@ -294,16 +293,16 @@ public:
virtual Future<Void> init() = 0;
// Returns latest committed version
virtual Version getLatestVersion() const = 0;
virtual Version getLastCommittedVersion() const = 0;
// Returns the oldest readable version as of the most recent committed version
virtual Version getOldestVersion() const = 0;
virtual Version getOldestReadableVersion() const = 0;
// Sets the oldest readable version to be put into affect at the next commit.
// The pager can reuse pages that were freed at a version less than v.
// If any snapshots are in use at a version less than v, the pager can either forcefully
// invalidate them or keep their versions around until the snapshots are no longer in use.
virtual void setOldestVersion(Version v) = 0;
virtual void setOldestReadableVersion(Version v) = 0;
protected:
~IPager2() {} // Destruction should be done using close()/dispose() from the IClosable interface

View File

@ -35,8 +35,8 @@ struct KeyValueStoreCompressTestData final : IKeyValueStore {
KeyValueStoreCompressTestData(IKeyValueStore* store) : store(store) {}
Future<Void> getError() override { return store->getError(); }
Future<Void> onClosed() override { return store->onClosed(); }
Future<Void> getError() const override { return store->getError(); }
Future<Void> onClosed() const override { return store->onClosed(); }
void dispose() override {
store->dispose();
@ -56,7 +56,7 @@ struct KeyValueStoreCompressTestData final : IKeyValueStore {
void clear(KeyRangeRef range, const Arena* arena = nullptr) override { store->clear(range, arena); }
Future<Void> commit(bool sequential = false) override { return store->commit(sequential); }
Future<Optional<Value>> readValue(KeyRef key, Optional<UID> debugID = Optional<UID>()) override {
Future<Optional<Value>> readValue(KeyRef key, IKeyValueStore::ReadType, Optional<UID> debugID) override {
return doReadValue(store, key, debugID);
}
@ -66,19 +66,20 @@ struct KeyValueStoreCompressTestData final : IKeyValueStore {
// reason, you will need to fix this.
Future<Optional<Value>> readValuePrefix(KeyRef key,
int maxLength,
Optional<UID> debugID = Optional<UID>()) override {
IKeyValueStore::ReadType,
Optional<UID> debugID) override {
return doReadValuePrefix(store, key, maxLength, debugID);
}
// If rowLimit>=0, reads first rows sorted ascending, otherwise reads last rows sorted descending
// The total size of the returned value (less the last entry) will be less than byteLimit
Future<RangeResult> readRange(KeyRangeRef keys, int rowLimit = 1 << 30, int byteLimit = 1 << 30) override {
Future<RangeResult> readRange(KeyRangeRef keys, int rowLimit, int byteLimit, IKeyValueStore::ReadType) override {
return doReadRange(store, keys, rowLimit, byteLimit);
}
private:
ACTOR static Future<Optional<Value>> doReadValue(IKeyValueStore* store, Key key, Optional<UID> debugID) {
Optional<Value> v = wait(store->readValue(key, debugID));
Optional<Value> v = wait(store->readValue(key, IKeyValueStore::ReadType::NORMAL, debugID));
if (!v.present())
return v;
return unpack(v.get());

View File

@ -43,8 +43,8 @@ public:
bool exactRecovery);
// IClosable
Future<Void> getError() override { return log->getError(); }
Future<Void> onClosed() override { return log->onClosed(); }
Future<Void> getError() const override { return log->getError(); }
Future<Void> onClosed() const override { return log->onClosed(); }
void dispose() override {
recovering.cancel();
log->dispose();
@ -194,7 +194,7 @@ public:
return c;
}
Future<Optional<Value>> readValue(KeyRef key, Optional<UID> debugID = Optional<UID>()) override {
Future<Optional<Value>> readValue(KeyRef key, IKeyValueStore::ReadType, Optional<UID> debugID) override {
if (recovering.isError())
throw recovering.getError();
if (!recovering.isReady())
@ -208,7 +208,8 @@ public:
Future<Optional<Value>> readValuePrefix(KeyRef key,
int maxLength,
Optional<UID> debugID = Optional<UID>()) override {
IKeyValueStore::ReadType,
Optional<UID> debugID) override {
if (recovering.isError())
throw recovering.getError();
if (!recovering.isReady())
@ -227,7 +228,7 @@ public:
// If rowLimit>=0, reads first rows sorted ascending, otherwise reads last rows sorted descending
// The total size of the returned value (less the last entry) will be less than byteLimit
Future<RangeResult> readRange(KeyRangeRef keys, int rowLimit = 1 << 30, int byteLimit = 1 << 30) override {
Future<RangeResult> readRange(KeyRangeRef keys, int rowLimit, int byteLimit, IKeyValueStore::ReadType) override {
if (recovering.isError())
throw recovering.getError();
if (!recovering.isReady())
@ -826,18 +827,18 @@ private:
ACTOR static Future<Optional<Value>> waitAndReadValue(KeyValueStoreMemory* self, Key key) {
wait(self->recovering);
return self->readValue(key).get();
return static_cast<IKeyValueStore*>(self)->readValue(key).get();
}
ACTOR static Future<Optional<Value>> waitAndReadValuePrefix(KeyValueStoreMemory* self, Key key, int maxLength) {
wait(self->recovering);
return self->readValuePrefix(key, maxLength).get();
return static_cast<IKeyValueStore*>(self)->readValuePrefix(key, maxLength).get();
}
ACTOR static Future<RangeResult> waitAndReadRange(KeyValueStoreMemory* self,
KeyRange keys,
int rowLimit,
int byteLimit) {
wait(self->recovering);
return self->readRange(keys, rowLimit, byteLimit).get();
return static_cast<IKeyValueStore*>(self)->readRange(keys, rowLimit, byteLimit).get();
}
ACTOR static Future<Void> waitAndCommit(KeyValueStoreMemory* self, bool sequential) {
wait(self->recovering);

View File

@ -581,7 +581,7 @@ struct RocksDBKeyValueStore : IKeyValueStore {
}
}
Future<Void> getError() override { return errorPromise.getFuture(); }
Future<Void> getError() const override { return errorPromise.getFuture(); }
ACTOR static void doClose(RocksDBKeyValueStore* self, bool deleteOnClose) {
// The metrics future retains a reference to the DB, so stop it before we delete it.
@ -600,7 +600,7 @@ struct RocksDBKeyValueStore : IKeyValueStore {
delete self;
}
Future<Void> onClosed() override { return closePromise.getFuture(); }
Future<Void> onClosed() const override { return closePromise.getFuture(); }
void dispose() override { doClose(this, true); }
@ -645,21 +645,24 @@ struct RocksDBKeyValueStore : IKeyValueStore {
return res;
}
Future<Optional<Value>> readValue(KeyRef key, Optional<UID> debugID) override {
Future<Optional<Value>> readValue(KeyRef key, IKeyValueStore::ReadType, Optional<UID> debugID) override {
auto a = new Reader::ReadValueAction(key, debugID);
auto res = a->result.getFuture();
readThreads->post(a);
return res;
}
Future<Optional<Value>> readValuePrefix(KeyRef key, int maxLength, Optional<UID> debugID) override {
Future<Optional<Value>> readValuePrefix(KeyRef key,
int maxLength,
IKeyValueStore::ReadType,
Optional<UID> debugID) override {
auto a = new Reader::ReadValuePrefixAction(key, maxLength, debugID);
auto res = a->result.getFuture();
readThreads->post(a);
return res;
}
Future<RangeResult> readRange(KeyRangeRef keys, int rowLimit, int byteLimit) override {
Future<RangeResult> readRange(KeyRangeRef keys, int rowLimit, int byteLimit, IKeyValueStore::ReadType) override {
auto a = new Reader::ReadRangeAction(keys, rowLimit, byteLimit);
auto res = a->result.getFuture();
readThreads->post(a);

View File

@ -1567,8 +1567,8 @@ public:
void dispose() override { doClose(this, true); }
void close() override { doClose(this, false); }
Future<Void> getError() override { return delayed(readThreads->getError() || writeThread->getError()); }
Future<Void> onClosed() override { return stopped.getFuture(); }
Future<Void> getError() const override { return delayed(readThreads->getError() || writeThread->getError()); }
Future<Void> onClosed() const override { return stopped.getFuture(); }
KeyValueStoreType getType() const override { return type; }
StorageBytes getStorageBytes() const override;
@ -1577,9 +1577,12 @@ public:
void clear(KeyRangeRef range, const Arena* arena = nullptr) override;
Future<Void> commit(bool sequential = false) override;
Future<Optional<Value>> readValue(KeyRef key, Optional<UID> debugID) override;
Future<Optional<Value>> readValuePrefix(KeyRef key, int maxLength, Optional<UID> debugID) override;
Future<RangeResult> readRange(KeyRangeRef keys, int rowLimit = 1 << 30, int byteLimit = 1 << 30) override;
Future<Optional<Value>> readValue(KeyRef key, IKeyValueStore::ReadType, Optional<UID> debugID) override;
Future<Optional<Value>> readValuePrefix(KeyRef key,
int maxLength,
IKeyValueStore::ReadType,
Optional<UID> debugID) override;
Future<RangeResult> readRange(KeyRangeRef keys, int rowLimit, int byteLimit, IKeyValueStore::ReadType) override;
KeyValueStoreSQLite(std::string const& filename,
UID logID,
@ -2192,21 +2195,27 @@ Future<Void> KeyValueStoreSQLite::commit(bool sequential) {
writeThread->post(p);
return f;
}
Future<Optional<Value>> KeyValueStoreSQLite::readValue(KeyRef key, Optional<UID> debugID) {
Future<Optional<Value>> KeyValueStoreSQLite::readValue(KeyRef key, IKeyValueStore::ReadType, Optional<UID> debugID) {
++readsRequested;
auto p = new Reader::ReadValueAction(key, debugID);
auto f = p->result.getFuture();
readThreads->post(p);
return f;
}
Future<Optional<Value>> KeyValueStoreSQLite::readValuePrefix(KeyRef key, int maxLength, Optional<UID> debugID) {
Future<Optional<Value>> KeyValueStoreSQLite::readValuePrefix(KeyRef key,
int maxLength,
IKeyValueStore::ReadType,
Optional<UID> debugID) {
++readsRequested;
auto p = new Reader::ReadValuePrefixAction(key, maxLength, debugID);
auto f = p->result.getFuture();
readThreads->post(p);
return f;
}
Future<RangeResult> KeyValueStoreSQLite::readRange(KeyRangeRef keys, int rowLimit, int byteLimit) {
Future<RangeResult> KeyValueStoreSQLite::readRange(KeyRangeRef keys,
int rowLimit,
int byteLimit,
IKeyValueStore::ReadType) {
++readsRequested;
auto p = new Reader::ReadRangeAction(keys, rowLimit, byteLimit);
auto f = p->result.getFuture();

View File

@ -137,21 +137,21 @@ ACTOR Future<Void> tryBecomeLeaderInternal(ServerCoordinators coordinators,
if (!hasConnected) {
TraceEvent(SevWarnAlways, "IncorrectClusterFileContentsAtConnection")
.detail("Filename", coordinators.ccf->getFilename())
.detail("ConnectionStringFromFile", coordinators.ccf->getConnectionString().toString())
.detail("ClusterFile", coordinators.ccr->toString())
.detail("StoredConnectionString", coordinators.ccr->getConnectionString().toString())
.detail("CurrentConnectionString", leader.get().first.serializedInfo.toString());
}
coordinators.ccf->setConnectionString(
coordinators.ccr->setConnectionString(
ClusterConnectionString(leader.get().first.serializedInfo.toString()));
TraceEvent("LeaderForwarding")
.detail("ConnStr", coordinators.ccf->getConnectionString().toString())
.detail("ConnStr", coordinators.ccr->getConnectionString().toString())
.trackLatest("LeaderForwarding");
throw coordinators_changed();
}
if (leader.present() && leader.get().second) {
hasConnected = true;
coordinators.ccf->notifyConnected();
coordinators.ccr->notifyConnected();
}
if (leader.present() && leader.get().second && leader.get().first.equalInternalId(myInfo)) {

View File

@ -218,11 +218,11 @@ Future<Void> LogSystemDiskQueueAdapter::commit() {
return cm.acknowledge.getFuture();
}
Future<Void> LogSystemDiskQueueAdapter::getError() {
Future<Void> LogSystemDiskQueueAdapter::getError() const {
return Void();
}
Future<Void> LogSystemDiskQueueAdapter::onClosed() {
Future<Void> LogSystemDiskQueueAdapter::onClosed() const {
return Void();
}

View File

@ -88,8 +88,8 @@ public:
Future<CommitMessage> getCommitMessage();
// IClosable interface
Future<Void> getError() override;
Future<Void> onClosed() override;
Future<Void> getError() const override;
Future<Void> onClosed() const override;
void dispose() override;
void close() override;

View File

@ -157,8 +157,8 @@ public:
Future<Void> commit() { return queue->commit(); }
// Implements IClosable
Future<Void> getError() override { return queue->getError(); }
Future<Void> onClosed() override { return queue->onClosed(); }
Future<Void> getError() const override { return queue->getError(); }
Future<Void> onClosed() const override { return queue->onClosed(); }
void dispose() override {
queue->dispose();
delete this;

View File

@ -123,8 +123,8 @@ public:
Future<Void> commit() { return queue->commit(); }
// Implements IClosable
Future<Void> getError() override { return queue->getError(); }
Future<Void> onClosed() override { return queue->onClosed(); }
Future<Void> getError() const override { return queue->getError(); }
Future<Void> onClosed() const override { return queue->onClosed(); }
void dispose() override {
queue->dispose();
delete this;

View File

@ -127,8 +127,8 @@ public:
Future<Void> commit() { return queue->commit(); }
// Implements IClosable
Future<Void> getError() override { return queue->getError(); }
Future<Void> onClosed() override { return queue->onClosed(); }
Future<Void> getError() const override { return queue->getError(); }
Future<Void> onClosed() const override { return queue->onClosed(); }
void dispose() override {
queue->dispose();
delete this;

View File

@ -56,11 +56,11 @@ IKeyValueStore* OnDemandStore::operator->() {
return get();
}
Future<Void> OnDemandStore::getError() {
Future<Void> OnDemandStore::getError() const {
return onErr(err.getFuture());
}
Future<Void> OnDemandStore::onClosed() {
Future<Void> OnDemandStore::onClosed() const {
return store->onClosed();
}

View File

@ -41,8 +41,8 @@ public:
bool exists() const;
IKeyValueStore* operator->();
Future<Void> getError() override;
Future<Void> onClosed() override;
Future<Void> getError() const override;
Future<Void> onClosed() const override;
void dispose() override;
void close() override;
};

View File

@ -638,6 +638,11 @@ ACTOR Future<Void> waitForQuietDatabase(Database cx,
if (g_network->isSimulated())
wait(delay(5.0));
TraceEvent("QuietDatabaseWaitingOnFullRecovery").log();
while (dbInfo->get().recoveryState != RecoveryState::FULLY_RECOVERED) {
wait(dbInfo->onChange());
}
// The quiet database check (which runs at the end of every test) will always time out due to active data movement.
// To get around this, quiet Database will disable the perpetual wiggle in the setup phase.

View File

@ -856,18 +856,20 @@ ACTOR static Future<Void> insertRangeVersion(KeyRangeMap<Version>* pRangeVersion
r->value() = std::max(r->value(), file->version);
}
// Dump the new key ranges
ranges = pRangeVersions->ranges();
int i = 0;
for (auto r = ranges.begin(); r != ranges.end(); ++r) {
TraceEvent(SevDebug, "RangeVersionsAfterUpdate")
.detail("File", file->toString())
.detail("FileRange", fileRange.toString())
.detail("FileVersion", file->version)
.detail("RangeIndex", i++)
.detail("RangeBegin", r->begin())
.detail("RangeEnd", r->end())
.detail("RangeVersion", r->value());
if (SERVER_KNOBS->FASTRESTORE_DUMP_INSERT_RANGE_VERSION) {
// Dump the new key ranges for debugging purpose.
ranges = pRangeVersions->ranges();
int i = 0;
for (auto r = ranges.begin(); r != ranges.end(); ++r) {
TraceEvent(SevDebug, "RangeVersionsAfterUpdate")
.detail("File", file->toString())
.detail("FileRange", fileRange.toString())
.detail("FileVersion", file->version)
.detail("RangeIndex", i++)
.detail("RangeBegin", r->begin())
.detail("RangeEnd", r->end())
.detail("RangeVersion", r->value());
}
}
return Void();

View File

@ -406,11 +406,11 @@ ACTOR Future<Void> _restoreWorker(Database cx, LocalityData locality) {
return Void();
}
ACTOR Future<Void> restoreWorker(Reference<ClusterConnectionFile> connFile,
ACTOR Future<Void> restoreWorker(Reference<IClusterConnectionRecord> connRecord,
LocalityData locality,
std::string coordFolder) {
try {
Database cx = Database::createDatabase(connFile, Database::API_VERSION_LATEST, IsInternal::True, locality);
Database cx = Database::createDatabase(connRecord, Database::API_VERSION_LATEST, IsInternal::True, locality);
wait(reportErrors(_restoreWorker(cx, locality), "RestoreWorker"));
} catch (Error& e) {
TraceEvent("FastRestoreWorker").detail("Error", e.what());

View File

@ -711,7 +711,9 @@ std::string getRoleStr(RestoreRole role);
////--- Interface functions
ACTOR Future<Void> _restoreWorker(Database cx, LocalityData locality);
ACTOR Future<Void> restoreWorker(Reference<ClusterConnectionFile> ccf, LocalityData locality, std::string coordFolder);
ACTOR Future<Void> restoreWorker(Reference<IClusterConnectionRecord> ccr,
LocalityData locality,
std::string coordFolder);
extern const KeyRef restoreLeaderKey;
extern const KeyRangeRef restoreWorkersKeys;

View File

@ -26,6 +26,8 @@
#include <toml.hpp>
#include "fdbrpc/Locality.h"
#include "fdbrpc/simulator.h"
#include "fdbclient/ClusterConnectionFile.h"
#include "fdbclient/ClusterConnectionMemoryRecord.h"
#include "fdbclient/DatabaseContext.h"
#include "fdbserver/TesterInterface.actor.h"
#include "fdbserver/WorkerInterface.actor.h"
@ -387,7 +389,7 @@ T simulate(const T& in) {
return out;
}
ACTOR Future<Void> runBackup(Reference<ClusterConnectionFile> connFile) {
ACTOR Future<Void> runBackup(Reference<IClusterConnectionRecord> connRecord) {
state std::vector<Future<Void>> agentFutures;
while (g_simulator.backupAgents == ISimulator::BackupAgentType::WaitForType) {
@ -395,7 +397,7 @@ ACTOR Future<Void> runBackup(Reference<ClusterConnectionFile> connFile) {
}
if (g_simulator.backupAgents == ISimulator::BackupAgentType::BackupToFile) {
Database cx = Database::createDatabase(connFile, -1);
Database cx = Database::createDatabase(connRecord, -1);
state FileBackupAgent fileAgent;
agentFutures.push_back(fileAgent.run(
@ -414,7 +416,7 @@ ACTOR Future<Void> runBackup(Reference<ClusterConnectionFile> connFile) {
throw internal_error();
}
ACTOR Future<Void> runDr(Reference<ClusterConnectionFile> connFile) {
ACTOR Future<Void> runDr(Reference<IClusterConnectionRecord> connRecord) {
state std::vector<Future<Void>> agentFutures;
while (g_simulator.drAgents == ISimulator::BackupAgentType::WaitForType) {
@ -422,13 +424,13 @@ ACTOR Future<Void> runDr(Reference<ClusterConnectionFile> connFile) {
}
if (g_simulator.drAgents == ISimulator::BackupAgentType::BackupToDB) {
Database cx = Database::createDatabase(connFile, -1);
Database cx = Database::createDatabase(connRecord, -1);
auto extraFile = makeReference<ClusterConnectionFile>(*g_simulator.extraDB);
auto extraFile = makeReference<ClusterConnectionMemoryRecord>(*g_simulator.extraDB);
state Database extraDB = Database::createDatabase(extraFile, -1);
TraceEvent("StartingDrAgents")
.detail("ConnFile", connFile->getConnectionString().toString())
.detail("ConnectionString", connRecord->getConnectionString().toString())
.detail("ExtraString", extraFile->getConnectionString().toString());
state DatabaseBackupAgent dbAgent = DatabaseBackupAgent(cx);
@ -459,7 +461,7 @@ enum AgentMode { AgentNone = 0, AgentOnly = 1, AgentAddition = 2 };
// SOMEDAY: when a process can be rebooted in isolation from the other on that machine,
// a loop{} will be needed around the waiting on simulatedFDBD(). For now this simply
// takes care of house-keeping such as context switching and file closing.
ACTOR Future<ISimulator::KillType> simulatedFDBDRebooter(Reference<ClusterConnectionFile> connFile,
ACTOR Future<ISimulator::KillType> simulatedFDBDRebooter(Reference<IClusterConnectionRecord> connRecord,
IPAddress ip,
bool sslEnabled,
uint16_t port,
@ -525,7 +527,7 @@ ACTOR Future<ISimulator::KillType> simulatedFDBDRebooter(Reference<ClusterConnec
.detail("Version", FDB_VT_VERSION)
.detail("PackageName", FDB_VT_PACKAGE_NAME)
.detail("DataFolder", *dataFolder)
.detail("ConnectionString", connFile ? connFile->getConnectionString().toString() : "")
.detail("ConnectionString", connRecord ? connRecord->getConnectionString().toString() : "")
.detailf("ActualTime", "%lld", DEBUG_DETERMINISM ? 0 : time(nullptr))
.detail("CommandLine", "fdbserver -r simulation")
.detail("BuggifyEnabled", isBuggifyEnabled(BuggifyType::General))
@ -546,7 +548,7 @@ ACTOR Future<ISimulator::KillType> simulatedFDBDRebooter(Reference<ClusterConnec
futures.push_back(FlowTransport::transport().bind(n, n));
}
if (runBackupAgents != AgentOnly) {
futures.push_back(fdbd(connFile,
futures.push_back(fdbd(connRecord,
localities,
processClass,
*dataFolder,
@ -561,8 +563,8 @@ ACTOR Future<ISimulator::KillType> simulatedFDBDRebooter(Reference<ClusterConnec
configDBType));
}
if (runBackupAgents != AgentNone) {
futures.push_back(runBackup(connFile));
futures.push_back(runDr(connFile));
futures.push_back(runBackup(connRecord));
futures.push_back(runDr(connRecord));
}
futures.push_back(success(onShutdown));
@ -666,9 +668,9 @@ ACTOR Future<ISimulator::KillType> simulatedFDBDRebooter(Reference<ClusterConnec
if (!useSeedFile) {
writeFile(joinPath(*dataFolder, "fdb.cluster"), connStr.toString());
connFile = makeReference<ClusterConnectionFile>(joinPath(*dataFolder, "fdb.cluster"));
connRecord = makeReference<ClusterConnectionFile>(joinPath(*dataFolder, "fdb.cluster"));
} else {
connFile =
connRecord =
makeReference<ClusterConnectionFile>(joinPath(*dataFolder, "fdb.cluster"), connStr.toString());
}
} else {
@ -747,9 +749,9 @@ ACTOR Future<Void> simulatedMachine(ClusterConnectionString connStr,
state std::vector<Future<ISimulator::KillType>> processes;
for (int i = 0; i < ips.size(); i++) {
std::string path = joinPath(myFolders[i], "fdb.cluster");
Reference<ClusterConnectionFile> clusterFile(useSeedFile
? new ClusterConnectionFile(path, connStr.toString())
: new ClusterConnectionFile(path));
Reference<IClusterConnectionRecord> clusterFile(
useSeedFile ? new ClusterConnectionFile(path, connStr.toString())
: new ClusterConnectionFile(path));
const int listenPort = i * listenPerProcess + 1;
AgentMode agentMode =
runBackupAgents == AgentOnly ? (i == ips.size() - 1 ? AgentOnly : AgentNone) : runBackupAgents;
@ -2196,7 +2198,7 @@ ACTOR void setupAndRun(std::string dataFolder,
bool restoring,
std::string whitelistBinPaths) {
state std::vector<Future<Void>> systemActors;
state Optional<ClusterConnectionString> connFile;
state Optional<ClusterConnectionString> connectionString;
state Standalone<StringRef> startingConfiguration;
state int testerCount = 1;
state TestConfig testConfig;
@ -2258,7 +2260,7 @@ ACTOR void setupAndRun(std::string dataFolder,
wait(timeoutError(restartSimulatedSystem(&systemActors,
dataFolder,
&testerCount,
&connFile,
&connectionString,
&startingConfiguration,
testConfig,
whitelistBinPaths,
@ -2273,7 +2275,7 @@ ACTOR void setupAndRun(std::string dataFolder,
setupSimulatedSystem(&systemActors,
dataFolder,
&testerCount,
&connFile,
&connectionString,
&startingConfiguration,
whitelistBinPaths,
testConfig,
@ -2282,7 +2284,7 @@ ACTOR void setupAndRun(std::string dataFolder,
}
std::string clusterFileDir = joinPath(dataFolder, deterministicRandom()->randomUniqueID().toString());
platform::createDirectory(clusterFileDir);
writeFile(joinPath(clusterFileDir, "fdb.cluster"), connFile.get().toString());
writeFile(joinPath(clusterFileDir, "fdb.cluster"), connectionString.get().toString());
wait(timeoutError(runTests(makeReference<ClusterConnectionFile>(joinPath(clusterFileDir, "fdb.cluster")),
TEST_TYPE_FROM_FILE,
TEST_ON_TESTERS,

View File

@ -813,7 +813,7 @@ ACTOR static Future<JsonBuilderObject> processStatusFetcher(
}
}
for (auto& coordinator : coordinators.ccf->getConnectionString().coordinators()) {
for (auto& coordinator : coordinators.ccr->getConnectionString().coordinators()) {
roles.addCoordinatorRole(coordinator);
}
@ -2423,7 +2423,7 @@ static JsonBuilderObject faultToleranceStatusFetcher(DatabaseConfiguration confi
workerZones[worker.interf.address()] = worker.interf.locality.zoneId().orDefault(LiteralStringRef(""));
}
std::map<StringRef, int> coordinatorZoneCounts;
for (auto& coordinator : coordinators.ccf->getConnectionString().coordinators()) {
for (auto& coordinator : coordinators.ccr->getConnectionString().coordinators()) {
auto zone = workerZones[coordinator];
coordinatorZoneCounts[zone] += 1;
}
@ -2806,7 +2806,7 @@ ACTOR Future<StatusReply> clusterGetStatus(
state JsonBuilderObject data_overlay;
statusObj["protocol_version"] = format("%" PRIx64, g_network->protocolVersion().version());
statusObj["connection_string"] = coordinators.ccf->getConnectionString().toString();
statusObj["connection_string"] = coordinators.ccr->getConnectionString().toString();
statusObj["bounce_impact"] = getBounceImpactInfo(statusCode);
state Optional<DatabaseConfiguration> configuration;

View File

@ -128,8 +128,8 @@ public:
Future<Void> commit() { return queue->commit(); }
// Implements IClosable
Future<Void> getError() override { return queue->getError(); }
Future<Void> onClosed() override { return queue->onClosed(); }
Future<Void> getError() const override { return queue->getError(); }
Future<Void> onClosed() const override { return queue->onClosed(); }
void dispose() override {
queue->dispose();
delete this;

View File

@ -113,14 +113,14 @@ struct TesterInterface {
};
ACTOR Future<Void> testerServerCore(TesterInterface interf,
Reference<ClusterConnectionFile> ccf,
Reference<IClusterConnectionRecord> ccr,
Reference<AsyncVar<struct ServerDBInfo> const> serverDBInfo,
LocalityData locality);
enum test_location_t { TEST_HERE, TEST_ON_SERVERS, TEST_ON_TESTERS };
enum test_type_t { TEST_TYPE_FROM_FILE, TEST_TYPE_CONSISTENCY_CHECK, TEST_TYPE_UNIT_TESTS };
ACTOR Future<Void> runTests(Reference<ClusterConnectionFile> connFile,
ACTOR Future<Void> runTests(Reference<IClusterConnectionRecord> connRecord,
test_type_t whatToRun,
test_location_t whereToRun,
int minTestersExpected,

File diff suppressed because it is too large Load Diff

View File

@ -884,7 +884,7 @@ ACTOR Future<Void> extractClusterInterface(
Reference<AsyncVar<Optional<struct ClusterControllerFullInterface>> const> in,
Reference<AsyncVar<Optional<struct ClusterInterface>>> out);
ACTOR Future<Void> fdbd(Reference<ClusterConnectionFile> ccf,
ACTOR Future<Void> fdbd(Reference<IClusterConnectionRecord> ccr,
LocalityData localities,
ProcessClass processClass,
std::string dataFolder,
@ -898,7 +898,7 @@ ACTOR Future<Void> fdbd(Reference<ClusterConnectionFile> ccf,
std::map<std::string, std::string> manualKnobOverrides,
ConfigDBType configDBType);
ACTOR Future<Void> clusterController(Reference<ClusterConnectionFile> ccf,
ACTOR Future<Void> clusterController(Reference<IClusterConnectionRecord> ccr,
Reference<AsyncVar<Optional<ClusterControllerFullInterface>>> currentCC,
Reference<AsyncVar<ClusterControllerPriorityInfo>> asyncPriorityInfo,
Future<Void> recoveredDiskFiles,
@ -922,8 +922,8 @@ ACTOR Future<Void> storageServer(
Reference<AsyncVar<ServerDBInfo> const> db,
std::string folder,
Promise<Void> recovered,
Reference<ClusterConnectionFile>
connFile); // changes pssi->id() to be the recovered ID); // changes pssi->id() to be the recovered ID
Reference<IClusterConnectionRecord>
connRecord); // changes pssi->id() to be the recovered ID); // changes pssi->id() to be the recovered ID
ACTOR Future<Void> masterServer(MasterInterface mi,
Reference<AsyncVar<ServerDBInfo> const> db,
Reference<AsyncVar<Optional<ClusterControllerFullInterface>> const> ccInterface,

View File

@ -36,6 +36,7 @@
#include <boost/interprocess/managed_shared_memory.hpp>
#include "fdbclient/ActorLineageProfiler.h"
#include "fdbclient/ClusterConnectionFile.h"
#include "fdbclient/IKnobCollection.h"
#include "fdbclient/NativeAPI.actor.h"
#include "fdbclient/SystemData.h"
@ -805,7 +806,7 @@ Optional<bool> checkBuggifyOverride(const char* testFile) {
// Takes a vector of public and listen address strings given via command line, and returns vector of NetworkAddress
// objects.
std::pair<NetworkAddressList, NetworkAddressList> buildNetworkAddresses(
const ClusterConnectionFile& connectionFile,
const IClusterConnectionRecord& connectionRecord,
const std::vector<std::string>& publicAddressStrs,
std::vector<std::string>& listenAddressStrs) {
if (listenAddressStrs.size() > 0 && publicAddressStrs.size() != listenAddressStrs.size()) {
@ -823,7 +824,7 @@ std::pair<NetworkAddressList, NetworkAddressList> buildNetworkAddresses(
NetworkAddressList publicNetworkAddresses;
NetworkAddressList listenNetworkAddresses;
auto& coordinators = connectionFile.getConnectionString().coordinators();
auto& coordinators = connectionRecord.getConnectionString().coordinators();
ASSERT(coordinators.size() > 0);
for (int ii = 0; ii < publicAddressStrs.size(); ++ii) {
@ -833,7 +834,7 @@ std::pair<NetworkAddressList, NetworkAddressList> buildNetworkAddresses(
if (autoPublicAddress) {
try {
const NetworkAddress& parsedAddress = NetworkAddress::parse("0.0.0.0:" + publicAddressStr.substr(5));
const IPAddress publicIP = determinePublicIPAutomatically(connectionFile.getConnectionString());
const IPAddress publicIP = determinePublicIPAutomatically(connectionRecord.getConnectionString());
currentPublicAddress = NetworkAddress(publicIP, parsedAddress.port, true, parsedAddress.isTLS());
} catch (Error& e) {
fprintf(stderr,
@ -998,7 +999,7 @@ struct CLIOptions {
std::string configPath;
ConfigDBType configDBType{ ConfigDBType::DISABLED };
Reference<ClusterConnectionFile> connectionFile;
Reference<IClusterConnectionRecord> connectionFile;
Standalone<StringRef> machineId;
UnitTestParameters testParams;
@ -1849,7 +1850,7 @@ int main(int argc, char* argv[]) {
.detail("FileSystem", opts.fileSystemPath)
.detail("DataFolder", opts.dataFolder)
.detail("WorkingDirectory", cwd)
.detail("ClusterFile", opts.connectionFile ? opts.connectionFile->getFilename().c_str() : "")
.detail("ClusterFile", opts.connectionFile ? opts.connectionFile->toString() : "")
.detail("ConnectionString",
opts.connectionFile ? opts.connectionFile->getConnectionString().toString() : "")
.detailf("ActualTime", "%lld", DEBUG_DETERMINISM ? 0 : time(nullptr))

View File

@ -649,7 +649,8 @@ ACTOR Future<Standalone<CommitTransactionRef>> provisionalMaster(Reference<Maste
loop choose {
when(GetReadVersionRequest req =
waitNext(parent->provisionalGrvProxies[0].getConsistentReadVersion.getFuture())) {
if (req.flags & GetReadVersionRequest::FLAG_CAUSAL_READ_RISKY && parent->lastEpochEnd) {
if ((req.flags & GetReadVersionRequest::FLAG_CAUSAL_READ_RISKY) &&
(req.flags & GetReadVersionRequest::FLAG_USE_PROVISIONAL_PROXIES) && parent->lastEpochEnd) {
GetReadVersionReply rep;
rep.version = parent->lastEpochEnd;
rep.locked = locked;
@ -1866,7 +1867,7 @@ ACTOR Future<Void> masterCore(Reference<MasterData> self) {
tr.set(
recoveryCommitRequest.arena, primaryLocalityKey, BinaryWriter::toValue(self->primaryLocality, Unversioned()));
tr.set(recoveryCommitRequest.arena, backupVersionKey, backupVersionValue);
tr.set(recoveryCommitRequest.arena, coordinatorsKey, self->coordinators.ccf->getConnectionString().toString());
tr.set(recoveryCommitRequest.arena, coordinatorsKey, self->coordinators.ccr->getConnectionString().toString());
tr.set(recoveryCommitRequest.arena, logsKey, self->logSystem->getLogsValue());
tr.set(recoveryCommitRequest.arena,
primaryDatacenterKey,

View File

@ -195,15 +195,25 @@ struct StorageServerDisk {
Future<Void> commit() { return storage->commit(); }
// SOMEDAY: Put readNextKeyInclusive in IKeyValueStore
Future<Key> readNextKeyInclusive(KeyRef key) { return readFirstKey(storage, KeyRangeRef(key, allKeys.end)); }
Future<Optional<Value>> readValue(KeyRef key, Optional<UID> debugID = Optional<UID>()) {
return storage->readValue(key, debugID);
Future<Key> readNextKeyInclusive(KeyRef key, IKeyValueStore::ReadType type = IKeyValueStore::ReadType::NORMAL) {
return readFirstKey(storage, KeyRangeRef(key, allKeys.end), type);
}
Future<Optional<Value>> readValuePrefix(KeyRef key, int maxLength, Optional<UID> debugID = Optional<UID>()) {
return storage->readValuePrefix(key, maxLength, debugID);
Future<Optional<Value>> readValue(KeyRef key,
IKeyValueStore::ReadType type = IKeyValueStore::ReadType::NORMAL,
Optional<UID> debugID = Optional<UID>()) {
return storage->readValue(key, type, debugID);
}
Future<RangeResult> readRange(KeyRangeRef keys, int rowLimit = 1 << 30, int byteLimit = 1 << 30) {
return storage->readRange(keys, rowLimit, byteLimit);
Future<Optional<Value>> readValuePrefix(KeyRef key,
int maxLength,
IKeyValueStore::ReadType type = IKeyValueStore::ReadType::NORMAL,
Optional<UID> debugID = Optional<UID>()) {
return storage->readValuePrefix(key, maxLength, type, debugID);
}
Future<RangeResult> readRange(KeyRangeRef keys,
int rowLimit = 1 << 30,
int byteLimit = 1 << 30,
IKeyValueStore::ReadType type = IKeyValueStore::ReadType::NORMAL) {
return storage->readRange(keys, rowLimit, byteLimit, type);
}
KeyValueStoreType getKeyValueStoreType() const { return storage->getType(); }
@ -216,8 +226,8 @@ private:
void writeMutations(const VectorRef<MutationRef>& mutations, Version debugVersion, const char* debugContext);
ACTOR static Future<Key> readFirstKey(IKeyValueStore* storage, KeyRangeRef range) {
RangeResult r = wait(storage->readRange(range, 1));
ACTOR static Future<Key> readFirstKey(IKeyValueStore* storage, KeyRangeRef range, IKeyValueStore::ReadType type) {
RangeResult r = wait(storage->readRange(range, 1, 1 << 30, type));
if (r.size())
return r[0].key;
else
@ -660,6 +670,9 @@ public:
bool debug_inApplyUpdate;
double debug_lastValidateTime;
int64_t lastBytesInputEBrake;
Version lastDurableVersionEBrake;
int maxQueryQueue;
int getAndResetMaxQueryQueueSize() {
int val = maxQueryQueue;
@ -859,7 +872,7 @@ public:
fetchKeysBytesBudget(SERVER_KNOBS->STORAGE_FETCH_BYTES), fetchKeysBudgetUsed(false),
instanceID(deterministicRandom()->randomUniqueID().first()), shuttingDown(false), behind(false),
versionBehind(false), debug_inApplyUpdate(false), debug_lastValidateTime(0), maxQueryQueue(0),
transactionTagCounter(ssi.id()), counters(this),
lastBytesInputEBrake(0), lastDurableVersionEBrake(0), transactionTagCounter(ssi.id()), counters(this),
storageServerSourceTLogIDEventHolder(
makeReference<EventCacheHolder>(ssi.id().toString() + "/StorageServerSourceTLogID")) {
version.initMetric(LiteralStringRef("StorageServer.Version"), counters.cc.id);
@ -1282,7 +1295,7 @@ ACTOR Future<Void> getValueQ(StorageServer* data, GetValueRequest req) {
path = 1;
} else if (!i || !i->isClearTo() || i->getEndKey() <= req.key) {
path = 2;
Optional<Value> vv = wait(data->storage.readValue(req.key, req.debugID));
Optional<Value> vv = wait(data->storage.readValue(req.key, IKeyValueStore::ReadType::NORMAL, req.debugID));
// Validate that while we were reading the data we didn't lose the version or shard
if (version < data->storageVersion()) {
TEST(true); // transaction_too_old after readValue
@ -1631,7 +1644,8 @@ ACTOR Future<GetKeyValuesReply> readRange(StorageServer* data,
KeyRange range,
int limit,
int* pLimitBytes,
SpanID parentSpan) {
SpanID parentSpan,
IKeyValueStore::ReadType type) {
state GetKeyValuesReply result;
state StorageServer::VersionedData::ViewAtVersion view = data->data().at(version);
state StorageServer::VersionedData::iterator vCurrent = view.end();
@ -1695,7 +1709,7 @@ ACTOR Future<GetKeyValuesReply> readRange(StorageServer* data,
// Read the data on disk up to vCurrent (or the end of the range)
readEnd = vCurrent ? std::min(vCurrent.key(), range.end) : range.end;
RangeResult atStorageVersion =
wait(data->storage.readRange(KeyRangeRef(readBegin, readEnd), limit, *pLimitBytes));
wait(data->storage.readRange(KeyRangeRef(readBegin, readEnd), limit, *pLimitBytes, type));
ASSERT(atStorageVersion.size() <= limit);
if (data->storageVersion() > version)
@ -1776,7 +1790,7 @@ ACTOR Future<GetKeyValuesReply> readRange(StorageServer* data,
readBegin = vCurrent ? std::max(vCurrent->isClearTo() ? vCurrent->getEndKey() : vCurrent.key(), range.begin)
: range.begin;
RangeResult atStorageVersion =
wait(data->storage.readRange(KeyRangeRef(readBegin, readEnd), limit, *pLimitBytes));
wait(data->storage.readRange(KeyRangeRef(readBegin, readEnd), limit, *pLimitBytes, type));
ASSERT(atStorageVersion.size() <= -limit);
if (data->storageVersion() > version)
@ -1833,7 +1847,8 @@ ACTOR Future<Key> findKey(StorageServer* data,
Version version,
KeyRange range,
int* pOffset,
SpanID parentSpan)
SpanID parentSpan,
IKeyValueStore::ReadType type)
// Attempts to find the key indicated by sel in the data at version, within range.
// Precondition: selectorInRange(sel, range)
// If it is found, offset is set to 0 and a key is returned which falls inside range.
@ -1871,7 +1886,8 @@ ACTOR Future<Key> findKey(StorageServer* data,
forward ? KeyRangeRef(sel.getKey(), range.end) : KeyRangeRef(range.begin, keyAfter(sel.getKey())),
(distance + skipEqualKey) * sign,
&maxBytes,
span.context));
span.context,
type));
state bool more = rep.more && rep.data.size() != distance + skipEqualKey;
// If we get only one result in the reverse direction as a result of the data being too large, we could get stuck in
@ -1879,8 +1895,8 @@ ACTOR Future<Key> findKey(StorageServer* data,
if (more && !forward && rep.data.size() == 1) {
TEST(true); // Reverse key selector returned only one result in range read
maxBytes = std::numeric_limits<int>::max();
GetKeyValuesReply rep2 = wait(
readRange(data, version, KeyRangeRef(range.begin, keyAfter(sel.getKey())), -2, &maxBytes, span.context));
GetKeyValuesReply rep2 = wait(readRange(
data, version, KeyRangeRef(range.begin, keyAfter(sel.getKey())), -2, &maxBytes, span.context, type));
rep = rep2;
more = rep.more && rep.data.size() != distance + skipEqualKey;
ASSERT(rep.data.size() == 2 || !more);
@ -1945,6 +1961,8 @@ ACTOR Future<Void> getKeyValuesQ(StorageServer* data, GetKeyValuesRequest req)
{
state Span span("SS:getKeyValues"_loc, { req.spanContext });
state int64_t resultSize = 0;
state IKeyValueStore::ReadType type =
req.isFetchKeys ? IKeyValueStore::ReadType::FETCH : IKeyValueStore::ReadType::NORMAL;
getCurrentLineage()->modify(&TransactionLineage::txID) = req.spanContext.first();
++data->counters.getRangeQueries;
@ -1989,10 +2007,10 @@ ACTOR Future<Void> getKeyValuesQ(StorageServer* data, GetKeyValuesRequest req)
state int offset2;
state Future<Key> fBegin = req.begin.isFirstGreaterOrEqual()
? Future<Key>(req.begin.getKey())
: findKey(data, req.begin, version, shard, &offset1, span.context);
: findKey(data, req.begin, version, shard, &offset1, span.context, type);
state Future<Key> fEnd = req.end.isFirstGreaterOrEqual()
? Future<Key>(req.end.getKey())
: findKey(data, req.end, version, shard, &offset2, span.context);
: findKey(data, req.end, version, shard, &offset2, span.context, type);
state Key begin = wait(fBegin);
state Key end = wait(fEnd);
@ -2032,8 +2050,8 @@ ACTOR Future<Void> getKeyValuesQ(StorageServer* data, GetKeyValuesRequest req)
} else {
state int remainingLimitBytes = req.limitBytes;
GetKeyValuesReply _r =
wait(readRange(data, version, KeyRangeRef(begin, end), req.limit, &remainingLimitBytes, span.context));
GetKeyValuesReply _r = wait(
readRange(data, version, KeyRangeRef(begin, end), req.limit, &remainingLimitBytes, span.context, type));
GetKeyValuesReply r = _r;
if (req.debugID.present())
@ -2110,6 +2128,8 @@ ACTOR Future<Void> getKeyValuesStreamQ(StorageServer* data, GetKeyValuesStreamRe
{
state Span span("SS:getKeyValuesStream"_loc, { req.spanContext });
state int64_t resultSize = 0;
state IKeyValueStore::ReadType type =
req.isFetchKeys ? IKeyValueStore::ReadType::FETCH : IKeyValueStore::ReadType::NORMAL;
req.reply.setByteLimit(SERVER_KNOBS->RANGESTREAM_LIMIT_BYTES);
++data->counters.getRangeStreamQueries;
@ -2155,10 +2175,10 @@ ACTOR Future<Void> getKeyValuesStreamQ(StorageServer* data, GetKeyValuesStreamRe
state int offset2;
state Future<Key> fBegin = req.begin.isFirstGreaterOrEqual()
? Future<Key>(req.begin.getKey())
: findKey(data, req.begin, version, shard, &offset1, span.context);
: findKey(data, req.begin, version, shard, &offset1, span.context, type);
state Future<Key> fEnd = req.end.isFirstGreaterOrEqual()
? Future<Key>(req.end.getKey())
: findKey(data, req.end, version, shard, &offset2, span.context);
: findKey(data, req.end, version, shard, &offset2, span.context, type);
state Key begin = wait(fBegin);
state Key end = wait(fEnd);
if (req.debugID.present())
@ -2207,7 +2227,7 @@ ACTOR Future<Void> getKeyValuesStreamQ(StorageServer* data, GetKeyValuesStreamRe
? 1
: CLIENT_KNOBS->REPLY_BYTE_LIMIT;
GetKeyValuesReply _r =
wait(readRange(data, version, KeyRangeRef(begin, end), req.limit, &byteLimit, span.context));
wait(readRange(data, version, KeyRangeRef(begin, end), req.limit, &byteLimit, span.context, type));
GetKeyValuesStreamReply r(_r);
if (req.debugID.present())
@ -2308,7 +2328,8 @@ ACTOR Future<Void> getKeyQ(StorageServer* data, GetKeyRequest req) {
state KeyRange shard = getShardKeyRange(data, req.sel);
state int offset;
Key k = wait(findKey(data, req.sel, version, shard, &offset, req.spanContext));
Key k =
wait(findKey(data, req.sel, version, shard, &offset, req.spanContext, IKeyValueStore::ReadType::NORMAL));
data->checkChangeCounter(
changeCounter, KeyRangeRef(std::min<KeyRef>(req.sel.getKey(), k), std::max<KeyRef>(req.sel.getKey(), k)));
@ -2406,7 +2427,7 @@ ACTOR Future<Void> doEagerReads(StorageServer* data, UpdateEagerReadInfo* eager)
if (SERVER_KNOBS->ENABLE_CLEAR_RANGE_EAGER_READS) {
std::vector<Future<Key>> keyEnd(eager->keyBegin.size());
for (int i = 0; i < keyEnd.size(); i++)
keyEnd[i] = data->storage.readNextKeyInclusive(eager->keyBegin[i]);
keyEnd[i] = data->storage.readNextKeyInclusive(eager->keyBegin[i], IKeyValueStore::ReadType::EAGER);
state Future<std::vector<Key>> futureKeyEnds = getAll(keyEnd);
state std::vector<Key> keyEndVal = wait(futureKeyEnds);
@ -2415,7 +2436,8 @@ ACTOR Future<Void> doEagerReads(StorageServer* data, UpdateEagerReadInfo* eager)
std::vector<Future<Optional<Value>>> value(eager->keys.size());
for (int i = 0; i < value.size(); i++)
value[i] = data->storage.readValuePrefix(eager->keys[i].first, eager->keys[i].second);
value[i] =
data->storage.readValuePrefix(eager->keys[i].first, eager->keys[i].second, IKeyValueStore::ReadType::EAGER);
state Future<std::vector<Optional<Value>>> futureValues = getAll(value);
std::vector<Optional<Value>> optionalValues = wait(futureValues);
@ -3244,7 +3266,7 @@ void ShardInfo::addMutation(Version version, MutationRef const& mutation) {
}
}
enum ChangeServerKeysContext { CSK_UPDATE, CSK_RESTORE };
enum ChangeServerKeysContext { CSK_UPDATE, CSK_RESTORE, CSK_ASSIGN_EMPTY };
const char* changeServerKeysContextName[] = { "Update", "Restore" };
void changeServerKeys(StorageServer* data,
@ -3312,6 +3334,7 @@ void changeServerKeys(StorageServer* data,
auto vr = data->newestAvailableVersion.intersectingRanges(keys);
std::vector<std::pair<KeyRange, Version>> changeNewestAvailable;
std::vector<KeyRange> removeRanges;
std::vector<KeyRange> newEmptyRanges;
for (auto r = vr.begin(); r != vr.end(); ++r) {
KeyRangeRef range = keys & r->range();
bool dataAvailable = r->value() == latestVersion || r->value() >= version;
@ -3322,7 +3345,14 @@ void changeServerKeys(StorageServer* data,
// .detail("NowAssigned", nowAssigned)
// .detail("NewestAvailable", r->value())
// .detail("ShardState0", data->shards[range.begin]->debugDescribeState());
if (!nowAssigned) {
if (context == CSK_ASSIGN_EMPTY && !dataAvailable) {
ASSERT(nowAssigned);
TraceEvent("ChangeServerKeysAddEmptyRange", data->thisServerID)
.detail("Begin", range.begin)
.detail("End", range.end);
newEmptyRanges.push_back(range);
data->addShard(ShardInfo::newReadWrite(range, data));
} else if (!nowAssigned) {
if (dataAvailable) {
ASSERT(r->value() ==
latestVersion); // Not that we care, but this used to be checked instead of dataAvailable
@ -3335,7 +3365,7 @@ void changeServerKeys(StorageServer* data,
} else if (!dataAvailable) {
// SOMEDAY: Avoid restarting adding/transferred shards
if (version == 0) { // bypass fetchkeys; shard is known empty at version 0
TraceEvent("ChangeServerKeysAddEmptyRange", data->thisServerID)
TraceEvent("ChangeServerKeysInitialRange", data->thisServerID)
.detail("Begin", range.begin)
.detail("End", range.end);
changeNewestAvailable.emplace_back(range, latestVersion);
@ -3369,6 +3399,14 @@ void changeServerKeys(StorageServer* data,
removeDataRange(data, data->addVersionToMutationLog(data->data().getLatestVersion()), data->shards, *r);
setAvailableStatus(data, *r, false);
}
// Clear the moving-in empty range, and set it available at the latestVersion.
for (const auto& range : newEmptyRanges) {
MutationRef clearRange(MutationRef::ClearRange, range.begin, range.end);
data->addMutation(data->data().getLatestVersion(), clearRange, range, data->updateEagerReads);
data->newestAvailableVersion.insert(range, latestVersion);
setAvailableStatus(data, range, true);
}
validate(data);
}
@ -3513,8 +3551,8 @@ private:
// the data for change.version-1 (changes from versions < change.version)
// If emptyRange, treat the shard as empty, see removeKeysFromFailedServer() for more details about this
// scenario.
const Version shardVersion = (emptyRange && nowAssigned) ? 0 : currentVersion - 1;
changeServerKeys(data, keys, nowAssigned, shardVersion, CSK_UPDATE);
const ChangeServerKeysContext context = emptyRange ? CSK_ASSIGN_EMPTY : CSK_UPDATE;
changeServerKeys(data, keys, nowAssigned, currentVersion - 1, context);
}
processedStartKey = false;
@ -3548,6 +3586,7 @@ private:
data->recoveryVersionSkips.emplace_back(rollbackVersion, currentVersion - rollbackVersion);
} else if (m.type == MutationRef::SetValue && m.param1 == killStoragePrivateKey) {
TraceEvent("StorageServerWorkerRemoved", data->thisServerID).detail("Reason", "KillStorage");
throw worker_removed();
} else if ((m.type == MutationRef::SetValue || m.type == MutationRef::ClearRange) &&
m.param1.substr(1).startsWith(serverTagPrefix)) {
@ -3557,6 +3596,10 @@ private:
if ((m.type == MutationRef::SetValue && !data->isTss() && !matchesThisServer) ||
(m.type == MutationRef::ClearRange &&
((!data->isTSSInQuarantine() && matchesThisServer) || (data->isTss() && matchesTssPair)))) {
TraceEvent("StorageServerWorkerRemoved", data->thisServerID)
.detail("Reason", "ServerTag")
.detail("TagMatches", matchesThisServer)
.detail("IsTSS", data->isTss());
throw worker_removed();
}
if (!data->isTss() && m.type == MutationRef::ClearRange && data->ssPairID.present() &&
@ -3651,18 +3694,36 @@ ACTOR Future<Void> update(StorageServer* data, bool* pReceivedUpdate) {
try {
// If we are disk bound and durableVersion is very old, we need to block updates or we could run out of memory
// This is often referred to as the storage server e-brake (emergency brake)
state double waitStartT = 0;
while (data->queueSize() >= SERVER_KNOBS->STORAGE_HARD_LIMIT_BYTES &&
data->durableVersion.get() < data->desiredOldestVersion.get()) {
if (now() - waitStartT >= 1) {
TraceEvent(SevWarn, "StorageServerUpdateLag", data->thisServerID)
.detail("Version", data->version.get())
.detail("DurableVersion", data->durableVersion.get());
waitStartT = now();
}
data->behind = true;
wait(delayJittered(.005, TaskPriority::TLogPeekReply));
// We allow the storage server to make some progress between e-brake periods, referreed to as "overage", in
// order to ensure that it advances desiredOldestVersion enough for updateStorage to make enough progress on
// freeing up queue size.
state double waitStartT = 0;
if (data->queueSize() >= SERVER_KNOBS->STORAGE_HARD_LIMIT_BYTES &&
data->durableVersion.get() < data->desiredOldestVersion.get() &&
((data->desiredOldestVersion.get() - SERVER_KNOBS->STORAGE_HARD_LIMIT_VERSION_OVERAGE >
data->lastDurableVersionEBrake) ||
(data->counters.bytesInput.getValue() - SERVER_KNOBS->STORAGE_HARD_LIMIT_BYTES_OVERAGE >
data->lastBytesInputEBrake))) {
while (data->queueSize() >= SERVER_KNOBS->STORAGE_HARD_LIMIT_BYTES &&
data->durableVersion.get() < data->desiredOldestVersion.get()) {
if (now() - waitStartT >= 1) {
TraceEvent(SevWarn, "StorageServerUpdateLag", data->thisServerID)
.detail("Version", data->version.get())
.detail("DurableVersion", data->durableVersion.get())
.detail("DesiredOldestVersion", data->desiredOldestVersion.get())
.detail("QueueSize", data->queueSize())
.detail("LastBytesInputEBrake", data->lastBytesInputEBrake)
.detail("LastDurableVersionEBrake", data->lastDurableVersionEBrake);
waitStartT = now();
}
data->behind = true;
wait(delayJittered(.005, TaskPriority::TLogPeekReply));
}
data->lastBytesInputEBrake = data->counters.bytesInput.getValue();
data->lastDurableVersionEBrake = data->durableVersion.get();
}
if (g_network->isSimulated() && data->isTss() && g_simulator.tssMode == ISimulator::TSSMode::EnabledAddDelay &&
@ -3696,6 +3757,7 @@ ACTOR Future<Void> update(StorageServer* data, bool* pReceivedUpdate) {
}
data->tlogCursorReadsLatencyHistogram->sampleSeconds(now() - beforeTLogCursorReads);
if (cursor->popped() > 0) {
TraceEvent("StorageServerWorkerRemoved", data->thisServerID).detail("Reason", "PeekPoppedTLogData");
throw worker_removed();
}
@ -4997,7 +5059,7 @@ ACTOR Future<Void> reportStorageServerState(StorageServer* self) {
level = SevWarnAlways;
}
TraceEvent(level, "FetchKeyCurrentStatus")
TraceEvent(level, "FetchKeysCurrentStatus", self->thisServerID)
.detail("Timestamp", now())
.detail("LongestRunningTime", longestRunningFetchKeys.first)
.detail("StartKey", longestRunningFetchKeys.second.begin)
@ -5149,13 +5211,13 @@ bool storageServerTerminated(StorageServer& self, IKeyValueStore* persistentData
return false;
}
ACTOR Future<Void> memoryStoreRecover(IKeyValueStore* store, Reference<ClusterConnectionFile> connFile, UID id) {
if (store->getType() != KeyValueStoreType::MEMORY || connFile.getPtr() == nullptr) {
ACTOR Future<Void> memoryStoreRecover(IKeyValueStore* store, Reference<IClusterConnectionRecord> connRecord, UID id) {
if (store->getType() != KeyValueStoreType::MEMORY || connRecord.getPtr() == nullptr) {
return Never();
}
// create a temp client connect to DB
Database cx = Database::createDatabase(connFile, Database::API_VERSION_LATEST);
Database cx = Database::createDatabase(connRecord, Database::API_VERSION_LATEST);
state Reference<ReadYourWritesTransaction> tr = makeReference<ReadYourWritesTransaction>(cx);
state int noCanRemoveCount = 0;
@ -5357,6 +5419,7 @@ ACTOR Future<Void> replaceTSSInterface(StorageServer* self, StorageServerInterfa
if (!pairTagValue.present()) {
TEST(true); // Race where tss was down, pair was removed, tss starts back up
TraceEvent("StorageServerWorkerRemoved", self->thisServerID).detail("Reason", "TssPairMissing");
throw worker_removed();
}
@ -5388,7 +5451,7 @@ ACTOR Future<Void> storageServer(IKeyValueStore* persistentData,
Reference<AsyncVar<ServerDBInfo> const> db,
std::string folder,
Promise<Void> recovered,
Reference<ClusterConnectionFile> connFile) {
Reference<IClusterConnectionRecord> connRecord) {
state StorageServer self(persistentData, db, ssi);
self.folder = folder;
@ -5402,7 +5465,7 @@ ACTOR Future<Void> storageServer(IKeyValueStore* persistentData,
// for memory storage engine type, wait until recovery is done before commit
when(wait(self.storage.commit())) {}
when(wait(memoryStoreRecover(persistentData, connFile, self.thisServerID))) {
when(wait(memoryStoreRecover(persistentData, connRecord, self.thisServerID))) {
TraceEvent("DisposeStorageServer", self.thisServerID).log();
throw worker_removed();
}

View File

@ -599,7 +599,7 @@ ACTOR Future<Void> runWorkloadAsync(Database cx,
}
ACTOR Future<Void> testerServerWorkload(WorkloadRequest work,
Reference<ClusterConnectionFile> ccf,
Reference<IClusterConnectionRecord> ccr,
Reference<AsyncVar<struct ServerDBInfo> const> dbInfo,
LocalityData locality) {
state WorkloadInterface workIface;
@ -614,7 +614,7 @@ ACTOR Future<Void> testerServerWorkload(WorkloadRequest work,
startRole(Role::TESTER, workIface.id(), UID(), details);
if (work.useDatabase) {
cx = Database::createDatabase(ccf, -1, IsInternal::True, locality);
cx = Database::createDatabase(ccr, -1, IsInternal::True, locality);
wait(delay(1.0));
}
@ -658,7 +658,7 @@ ACTOR Future<Void> testerServerWorkload(WorkloadRequest work,
}
ACTOR Future<Void> testerServerCore(TesterInterface interf,
Reference<ClusterConnectionFile> ccf,
Reference<IClusterConnectionRecord> ccr,
Reference<AsyncVar<struct ServerDBInfo> const> dbInfo,
LocalityData locality) {
state PromiseStream<Future<Void>> addWorkload;
@ -668,7 +668,7 @@ ACTOR Future<Void> testerServerCore(TesterInterface interf,
loop choose {
when(wait(workerFatalError)) {}
when(WorkloadRequest work = waitNext(interf.recruitments.getFuture())) {
addWorkload.send(testerServerWorkload(work, ccf, dbInfo, locality));
addWorkload.send(testerServerWorkload(work, ccr, dbInfo, locality));
}
}
}
@ -1583,8 +1583,8 @@ ACTOR Future<Void> runTests(Reference<AsyncVar<Optional<struct ClusterController
* functionality. Its main purpose is to generate the test specification from passed arguments and then call into the
* correct actor which will orchestrate the actual test.
*
* \param connFile A cluster connection file. Not all tests require a functional cluster but all tests require
* a cluster file.
* \param connRecord A cluster connection record. Not all tests require a functional cluster but all tests require
* a cluster record.
* \param whatToRun TEST_TYPE_FROM_FILE to read the test description from a passed toml file or
* TEST_TYPE_CONSISTENCY_CHECK to generate a test spec for consistency checking
* \param at TEST_HERE: this process will act as a test client and execute the given workload. TEST_ON_SERVERS: Run a
@ -1600,7 +1600,7 @@ ACTOR Future<Void> runTests(Reference<AsyncVar<Optional<struct ClusterController
*
* \returns A future which will be set after all tests finished.
*/
ACTOR Future<Void> runTests(Reference<ClusterConnectionFile> connFile,
ACTOR Future<Void> runTests(Reference<IClusterConnectionRecord> connRecord,
test_type_t whatToRun,
test_location_t at,
int minTestersExpected,
@ -1612,8 +1612,8 @@ ACTOR Future<Void> runTests(Reference<ClusterConnectionFile> connFile,
auto cc = makeReference<AsyncVar<Optional<ClusterControllerFullInterface>>>();
auto ci = makeReference<AsyncVar<Optional<ClusterInterface>>>();
std::vector<Future<Void>> actors;
if (connFile) {
actors.push_back(reportErrors(monitorLeader(connFile, cc), "MonitorLeader"));
if (connRecord) {
actors.push_back(reportErrors(monitorLeader(connRecord, cc), "MonitorLeader"));
actors.push_back(reportErrors(extractClusterInterface(cc, ci), "ExtractClusterInterface"));
}
@ -1688,7 +1688,7 @@ ACTOR Future<Void> runTests(Reference<ClusterConnectionFile> connFile,
std::vector<TesterInterface> iTesters(1);
actors.push_back(
reportErrors(monitorServerDBInfo(cc, LocalityData(), db), "MonitorServerDBInfo")); // FIXME: Locality
actors.push_back(reportErrors(testerServerCore(iTesters[0], connFile, db, locality), "TesterServerCore"));
actors.push_back(reportErrors(testerServerCore(iTesters[0], connRecord, db, locality), "TesterServerCore"));
tests = runTests(cc, ci, iTesters, testSpecs, startingConfiguration, locality);
} else {
tests = reportErrors(runTests(cc, ci, testSpecs, at, minTestersExpected, startingConfiguration, locality),

View File

@ -518,7 +518,7 @@ ACTOR Future<Void> registrationClient(Reference<AsyncVar<Optional<ClusterControl
Reference<AsyncVar<Optional<DataDistributorInterface>> const> ddInterf,
Reference<AsyncVar<Optional<RatekeeperInterface>> const> rkInterf,
Reference<AsyncVar<bool> const> degraded,
Reference<ClusterConnectionFile> connFile,
Reference<IClusterConnectionRecord> connRecord,
Reference<AsyncVar<std::set<std::string>> const> issues,
Reference<LocalConfiguration> localConfig) {
// Keeps the cluster controller (as it may be re-elected) informed that this worker exists
@ -532,6 +532,16 @@ ACTOR Future<Void> registrationClient(Reference<AsyncVar<Optional<ClusterControl
state Future<Void> cacheErrorsFuture;
state Optional<double> incorrectTime;
loop {
state ClusterConnectionString storedConnectionString;
state bool upToDate = true;
if (connRecord) {
bool upToDateResult = wait(connRecord->upToDate(storedConnectionString));
upToDate = upToDateResult;
}
if (upToDate) {
incorrectTime = Optional<double>();
}
RegisterWorkerRequest request(interf,
initialClass,
processClass,
@ -542,28 +552,25 @@ ACTOR Future<Void> registrationClient(Reference<AsyncVar<Optional<ClusterControl
degraded->get(),
localConfig->lastSeenVersion(),
localConfig->configClassSet());
for (auto const& i : issues->get()) {
request.issues.push_back_deep(request.issues.arena(), i);
}
ClusterConnectionString fileConnectionString;
if (connFile && !connFile->fileContentsUpToDate(fileConnectionString)) {
if (!upToDate) {
request.issues.push_back_deep(request.issues.arena(), LiteralStringRef("incorrect_cluster_file_contents"));
std::string connectionString = connFile->getConnectionString().toString();
std::string connectionString = connRecord->getConnectionString().toString();
if (!incorrectTime.present()) {
incorrectTime = now();
}
if (connFile->canGetFilename()) {
// Don't log a SevWarnAlways initially to account for transient issues (e.g. someone else changing the
// file right before us)
TraceEvent(now() - incorrectTime.get() > 300 ? SevWarnAlways : SevWarn, "IncorrectClusterFileContents")
.detail("Filename", connFile->getFilename())
.detail("ConnectionStringFromFile", fileConnectionString.toString())
.detail("CurrentConnectionString", connectionString);
}
} else {
incorrectTime = Optional<double>();
}
// Don't log a SevWarnAlways initially to account for transient issues (e.g. someone else changing
// the file right before us)
TraceEvent(now() - incorrectTime.get() > 300 ? SevWarnAlways : SevWarn, "IncorrectClusterFileContents")
.detail("ClusterFile", connRecord->toString())
.detail("StoredConnectionString", storedConnectionString.toString())
.detail("CurrentConnectionString", connectionString);
}
auto peers = FlowTransport::transport().getIncompatiblePeers();
for (auto it = peers->begin(); it != peers->end();) {
if (now() - it->second.second > FLOW_KNOBS->INCOMPATIBLE_PEER_DELAY_BEFORE_LOGGING) {
@ -1095,7 +1102,7 @@ ACTOR Future<Void> storageServerRollbackRebooter(std::set<std::pair<UID, KeyValu
DUMPTOKEN(recruited.getKeyValuesStream);
prevStorageServer =
storageServer(store, recruited, db, folder, Promise<Void>(), Reference<ClusterConnectionFile>(nullptr));
storageServer(store, recruited, db, folder, Promise<Void>(), Reference<IClusterConnectionRecord>(nullptr));
prevStorageServer = handleIOErrors(prevStorageServer, store, id, store->onClosed());
}
}
@ -1308,7 +1315,7 @@ struct SharedLogsValue {
: actor(actor), uid(uid), requests(requests) {}
};
ACTOR Future<Void> workerServer(Reference<ClusterConnectionFile> connFile,
ACTOR Future<Void> workerServer(Reference<IClusterConnectionRecord> connRecord,
Reference<AsyncVar<Optional<ClusterControllerFullInterface>> const> ccInterface,
LocalityData locality,
Reference<AsyncVar<ClusterControllerPriorityInfo>> asyncPriorityInfo,
@ -1383,7 +1390,7 @@ ACTOR Future<Void> workerServer(Reference<ClusterConnectionFile> connFile,
errorForwarders.add(loadedPonger(interf.debugPing.getFuture()));
errorForwarders.add(waitFailureServer(interf.waitFailure.getFuture()));
errorForwarders.add(monitorTraceLogIssues(issues));
errorForwarders.add(testerServerCore(interf.testerInterface, connFile, dbInfo, locality));
errorForwarders.add(testerServerCore(interf.testerInterface, connRecord, dbInfo, locality));
errorForwarders.add(monitorHighMemory(memoryProfileThreshold));
filesClosed.add(stopping.getFuture());
@ -1464,7 +1471,7 @@ ACTOR Future<Void> workerServer(Reference<ClusterConnectionFile> connFile,
DUMPTOKEN(recruited.getKeyValuesStream);
Promise<Void> recovery;
Future<Void> f = storageServer(kv, recruited, dbInfo, folder, recovery, connFile);
Future<Void> f = storageServer(kv, recruited, dbInfo, folder, recovery, connRecord);
recoveries.push_back(recovery.getFuture());
f = handleIOErrors(f, kv, s.storeID, kvClosed);
f = storageServerRollbackRebooter(&runningStorages,
@ -1590,7 +1597,7 @@ ACTOR Future<Void> workerServer(Reference<ClusterConnectionFile> connFile,
ddInterf,
rkInterf,
degraded,
connFile,
connRecord,
issues,
localConfig));
@ -1705,7 +1712,7 @@ ACTOR Future<Void> workerServer(Reference<ClusterConnectionFile> connFile,
// printf("Recruited as masterServer\n");
Future<Void> masterProcess = masterServer(
recruited, dbInfo, ccInterface, ServerCoordinators(connFile), req.lifetime, req.forceRecovery);
recruited, dbInfo, ccInterface, ServerCoordinators(connRecord), req.lifetime, req.forceRecovery);
errorForwarders.add(
zombie(recruited, forwardError(errors, Role::MASTER, recruited.id(), masterProcess)));
req.reply.send(recruited);
@ -2293,10 +2300,10 @@ ACTOR Future<UID> createAndLockProcessIdFile(std::string folder) {
}
ACTOR Future<MonitorLeaderInfo> monitorLeaderWithDelayedCandidacyImplOneGeneration(
Reference<ClusterConnectionFile> connFile,
Reference<IClusterConnectionRecord> connRecord,
Reference<AsyncVar<Value>> result,
MonitorLeaderInfo info) {
state ClusterConnectionString ccf = info.intermediateConnFile->getConnectionString();
state ClusterConnectionString ccf = info.intermediateConnRecord->getConnectionString();
state std::vector<NetworkAddress> addrs = ccf.coordinators();
state ElectionResultRequest request;
state int index = 0;
@ -2314,24 +2321,24 @@ ACTOR Future<MonitorLeaderInfo> monitorLeaderWithDelayedCandidacyImplOneGenerati
if (leader.present()) {
if (leader.get().present()) {
if (leader.get().get().forward) {
info.intermediateConnFile = makeReference<ClusterConnectionFile>(
connFile->getFilename(), ClusterConnectionString(leader.get().get().serializedInfo.toString()));
info.intermediateConnRecord = connRecord->makeIntermediateRecord(
ClusterConnectionString(leader.get().get().serializedInfo.toString()));
return info;
}
if (connFile != info.intermediateConnFile) {
if (connRecord != info.intermediateConnRecord) {
if (!info.hasConnected) {
TraceEvent(SevWarnAlways, "IncorrectClusterFileContentsAtConnection")
.detail("Filename", connFile->getFilename())
.detail("ConnectionStringFromFile", connFile->getConnectionString().toString())
.detail("ClusterFile", connRecord->toString())
.detail("StoredConnectionString", connRecord->getConnectionString().toString())
.detail("CurrentConnectionString",
info.intermediateConnFile->getConnectionString().toString());
info.intermediateConnRecord->getConnectionString().toString());
}
connFile->setConnectionString(info.intermediateConnFile->getConnectionString());
info.intermediateConnFile = connFile;
connRecord->setConnectionString(info.intermediateConnRecord->getConnectionString());
info.intermediateConnRecord = connRecord;
}
info.hasConnected = true;
connFile->notifyConnected();
connRecord->notifyConnected();
request.knownLeader = leader.get().get().changeID;
ClusterControllerPriorityInfo info = leader.get().get().getPriorityInfo();
@ -2354,35 +2361,35 @@ ACTOR Future<MonitorLeaderInfo> monitorLeaderWithDelayedCandidacyImplOneGenerati
}
}
ACTOR Future<Void> monitorLeaderWithDelayedCandidacyImplInternal(Reference<ClusterConnectionFile> connFile,
ACTOR Future<Void> monitorLeaderWithDelayedCandidacyImplInternal(Reference<IClusterConnectionRecord> connRecord,
Reference<AsyncVar<Value>> outSerializedLeaderInfo) {
state MonitorLeaderInfo info(connFile);
state MonitorLeaderInfo info(connRecord);
loop {
MonitorLeaderInfo _info =
wait(monitorLeaderWithDelayedCandidacyImplOneGeneration(connFile, outSerializedLeaderInfo, info));
wait(monitorLeaderWithDelayedCandidacyImplOneGeneration(connRecord, outSerializedLeaderInfo, info));
info = _info;
}
}
template <class LeaderInterface>
Future<Void> monitorLeaderWithDelayedCandidacyImpl(
Reference<ClusterConnectionFile> const& connFile,
Reference<IClusterConnectionRecord> const& connRecord,
Reference<AsyncVar<Optional<LeaderInterface>>> const& outKnownLeader) {
LeaderDeserializer<LeaderInterface> deserializer;
auto serializedInfo = makeReference<AsyncVar<Value>>();
Future<Void> m = monitorLeaderWithDelayedCandidacyImplInternal(connFile, serializedInfo);
Future<Void> m = monitorLeaderWithDelayedCandidacyImplInternal(connRecord, serializedInfo);
return m || deserializer(serializedInfo, outKnownLeader);
}
ACTOR Future<Void> monitorLeaderWithDelayedCandidacy(
Reference<ClusterConnectionFile> connFile,
Reference<IClusterConnectionRecord> connRecord,
Reference<AsyncVar<Optional<ClusterControllerFullInterface>>> currentCC,
Reference<AsyncVar<ClusterControllerPriorityInfo>> asyncPriorityInfo,
Future<Void> recoveredDiskFiles,
LocalityData locality,
Reference<AsyncVar<ServerDBInfo>> dbInfo,
ConfigDBType configDBType) {
state Future<Void> monitor = monitorLeaderWithDelayedCandidacyImpl(connFile, currentCC);
state Future<Void> monitor = monitorLeaderWithDelayedCandidacyImpl(connRecord, currentCC);
state Future<Void> timeout;
wait(recoveredDiskFiles);
@ -2408,7 +2415,7 @@ ACTOR Future<Void> monitorLeaderWithDelayedCandidacy(
when(wait(timeout.isValid() ? timeout : Never())) {
monitor.cancel();
wait(clusterController(
connFile, currentCC, asyncPriorityInfo, recoveredDiskFiles, locality, configDBType));
connRecord, currentCC, asyncPriorityInfo, recoveredDiskFiles, locality, configDBType));
return Void();
}
}
@ -2458,7 +2465,7 @@ ACTOR Future<Void> serveProcess() {
}
}
ACTOR Future<Void> fdbd(Reference<ClusterConnectionFile> connFile,
ACTOR Future<Void> fdbd(Reference<IClusterConnectionRecord> connRecord,
LocalityData localities,
ProcessClass processClass,
std::string dataFolder,
@ -2489,7 +2496,7 @@ ACTOR Future<Void> fdbd(Reference<ClusterConnectionFile> connFile,
actors.push_back(serveProcess());
try {
ServerCoordinators coordinators(connFile);
ServerCoordinators coordinators(connRecord);
if (g_network->isSimulated()) {
whitelistBinPaths = ",, random_path, /bin/snap_create.sh,,";
}
@ -2506,7 +2513,7 @@ ACTOR Future<Void> fdbd(Reference<ClusterConnectionFile> connFile,
if (coordFolder.size()) {
// SOMEDAY: remove the fileNotFound wrapper and make DiskQueue construction safe from errors setting up
// their files
actors.push_back(fileNotFoundToNever(coordinationServer(coordFolder, coordinators.ccf, configDBType)));
actors.push_back(fileNotFoundToNever(coordinationServer(coordFolder, coordinators.ccr, configDBType)));
}
state UID processIDUid = wait(createAndLockProcessIdFile(dataFolder));
@ -2523,21 +2530,25 @@ ACTOR Future<Void> fdbd(Reference<ClusterConnectionFile> connFile,
actors.push_back(reportErrors(monitorAndWriteCCPriorityInfo(fitnessFilePath, asyncPriorityInfo),
"MonitorAndWriteCCPriorityInfo"));
if (processClass.machineClassFitness(ProcessClass::ClusterController) == ProcessClass::NeverAssign) {
actors.push_back(reportErrors(monitorLeader(connFile, cc), "ClusterController"));
actors.push_back(reportErrors(monitorLeader(connRecord, cc), "ClusterController"));
} else if (processClass.machineClassFitness(ProcessClass::ClusterController) == ProcessClass::WorstFit &&
SERVER_KNOBS->MAX_DELAY_CC_WORST_FIT_CANDIDACY_SECONDS > 0) {
actors.push_back(reportErrors(
monitorLeaderWithDelayedCandidacy(
connFile, cc, asyncPriorityInfo, recoveredDiskFiles.getFuture(), localities, dbInfo, configDBType),
"ClusterController"));
actors.push_back(reportErrors(monitorLeaderWithDelayedCandidacy(connRecord,
cc,
asyncPriorityInfo,
recoveredDiskFiles.getFuture(),
localities,
dbInfo,
configDBType),
"ClusterController"));
} else {
actors.push_back(reportErrors(
clusterController(
connFile, cc, asyncPriorityInfo, recoveredDiskFiles.getFuture(), localities, configDBType),
connRecord, cc, asyncPriorityInfo, recoveredDiskFiles.getFuture(), localities, configDBType),
"ClusterController"));
}
actors.push_back(reportErrors(extractClusterInterface(cc, ci), "ExtractClusterInterface"));
actors.push_back(reportErrorsExcept(workerServer(connFile,
actors.push_back(reportErrorsExcept(workerServer(connRecord,
cc,
localities,
asyncPriorityInfo,

View File

@ -23,6 +23,7 @@
#pragma once
#include "fdbserver/workloads/workloads.actor.h"
#include "fdbclient/ClusterConnectionMemoryRecord.h"
#include "fdbclient/ReadYourWrites.h"
#include "fdbclient/ThreadSafeTransaction.h"
#include "fdbserver/workloads/MemoryKeyValueStore.h"
@ -239,7 +240,7 @@ struct ApiWorkload : TestWorkload {
useExtraDB = g_simulator.extraDB != nullptr;
if (useExtraDB) {
auto extraFile = makeReference<ClusterConnectionFile>(*g_simulator.extraDB);
auto extraFile = makeReference<ClusterConnectionMemoryRecord>(*g_simulator.extraDB);
extraDB = Database::createDatabase(extraFile, -1);
}
}

View File

@ -20,6 +20,7 @@
#include "fdbrpc/simulator.h"
#include "fdbclient/BackupAgent.actor.h"
#include "fdbclient/ClusterConnectionMemoryRecord.h"
#include "fdbserver/workloads/workloads.actor.h"
#include "fdbserver/workloads/BulkSetup.actor.h"
#include "flow/actorcompiler.h" // This must be the last #include.
@ -38,7 +39,7 @@ struct AtomicSwitchoverWorkload : TestWorkload {
backupRanges.push_back_deep(backupRanges.arena(), normalKeys);
auto extraFile = makeReference<ClusterConnectionFile>(*g_simulator.extraDB);
auto extraFile = makeReference<ClusterConnectionMemoryRecord>(*g_simulator.extraDB);
extraDB = Database::createDatabase(extraFile, -1);
}

View File

@ -19,6 +19,7 @@
*/
#include "fdbclient/BackupAgent.actor.h"
#include "fdbclient/ClusterConnectionMemoryRecord.h"
#include "fdbclient/ManagementAPI.actor.h"
#include "fdbclient/NativeAPI.actor.h"
#include "fdbserver/workloads/workloads.actor.h"
@ -35,7 +36,7 @@ struct BackupToDBAbort : TestWorkload {
backupRanges.push_back_deep(backupRanges.arena(), normalKeys);
auto extraFile = makeReference<ClusterConnectionFile>(*g_simulator.extraDB);
auto extraFile = makeReference<ClusterConnectionMemoryRecord>(*g_simulator.extraDB);
extraDB = Database::createDatabase(extraFile, -1);
lockid = UID(0xbeeffeed, 0xdecaf00d);

View File

@ -20,6 +20,7 @@
#include "fdbrpc/simulator.h"
#include "fdbclient/BackupAgent.actor.h"
#include "fdbclient/ClusterConnectionMemoryRecord.h"
#include "fdbserver/workloads/workloads.actor.h"
#include "fdbserver/workloads/BulkSetup.actor.h"
#include "flow/actorcompiler.h" // This must be the last #include.
@ -127,7 +128,7 @@ struct BackupToDBCorrectnessWorkload : TestWorkload {
}
}
auto extraFile = makeReference<ClusterConnectionFile>(*g_simulator.extraDB);
auto extraFile = makeReference<ClusterConnectionMemoryRecord>(*g_simulator.extraDB);
extraDB = Database::createDatabase(extraFile, -1);
TraceEvent("BARW_Start").detail("Locked", locked);

View File

@ -20,6 +20,7 @@
#include "fdbrpc/simulator.h"
#include "fdbclient/BackupAgent.actor.h"
#include "fdbclient/ClusterConnectionMemoryRecord.h"
#include "fdbserver/workloads/workloads.actor.h"
#include "fdbserver/workloads/BulkSetup.actor.h"
#include "fdbclient/ManagementAPI.actor.h"
@ -75,7 +76,7 @@ struct BackupToDBUpgradeWorkload : TestWorkload {
}
}
auto extraFile = makeReference<ClusterConnectionFile>(*g_simulator.extraDB);
auto extraFile = makeReference<ClusterConnectionMemoryRecord>(*g_simulator.extraDB);
extraDB = Database::createDatabase(extraFile, -1);
TraceEvent("DRU_Start").log();

View File

@ -19,6 +19,7 @@
*/
#include "fdbclient/NativeAPI.actor.h"
#include "fdbclient/ClusterConnectionMemoryRecord.h"
#include "fdbclient/ClusterInterface.h"
#include "fdbserver/TesterInterface.actor.h"
#include "fdbclient/ManagementAPI.actor.h"
@ -56,7 +57,7 @@ struct ChangeConfigWorkload : TestWorkload {
// for the extra cluster.
ACTOR Future<Void> extraDatabaseConfigure(ChangeConfigWorkload* self) {
if (g_network->isSimulated() && g_simulator.extraDB) {
auto extraFile = makeReference<ClusterConnectionFile>(*g_simulator.extraDB);
auto extraFile = makeReference<ClusterConnectionMemoryRecord>(*g_simulator.extraDB);
state Database extraDB = Database::createDatabase(extraFile, -1);
wait(delay(5 * deterministicRandom()->random01()));

View File

@ -0,0 +1,256 @@
/*
* DataLossRecovery.actor.cpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2021 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <cstdint>
#include <limits>
#include "fdbclient/NativeAPI.actor.h"
#include "fdbclient/ManagementAPI.actor.h"
#include "fdbserver/MoveKeys.actor.h"
#include "fdbserver/QuietDatabase.h"
#include "fdbrpc/simulator.h"
#include "fdbserver/workloads/workloads.actor.h"
#include "flow/Error.h"
#include "flow/IRandom.h"
#include "flow/flow.h"
#include "flow/actorcompiler.h" // This must be the last #include.
namespace {
std::string printValue(const ErrorOr<Optional<Value>>& value) {
if (value.isError()) {
return value.getError().name();
}
return value.get().present() ? value.get().get().toString() : "Value Not Found.";
}
} // namespace
struct DataLossRecoveryWorkload : TestWorkload {
FlowLock startMoveKeysParallelismLock;
FlowLock finishMoveKeysParallelismLock;
const bool enabled;
bool pass;
NetworkAddress addr;
DataLossRecoveryWorkload(WorkloadContext const& wcx)
: TestWorkload(wcx), startMoveKeysParallelismLock(1), finishMoveKeysParallelismLock(1), enabled(!clientId),
pass(true) {}
void validationFailed(ErrorOr<Optional<Value>> expectedValue, ErrorOr<Optional<Value>> actualValue) {
TraceEvent(SevError, "TestFailed")
.detail("ExpectedValue", printValue(expectedValue))
.detail("ActualValue", printValue(actualValue));
pass = false;
}
std::string description() const override { return "DataLossRecovery"; }
Future<Void> setup(Database const& cx) override { return Void(); }
Future<Void> start(Database const& cx) override {
if (!enabled) {
return Void();
}
return _start(this, cx);
}
ACTOR Future<Void> _start(DataLossRecoveryWorkload* self, Database cx) {
state Key key = "TestKey"_sr;
state Key endKey = "TestKey0"_sr;
state Value oldValue = "TestValue"_sr;
state Value newValue = "TestNewValue"_sr;
wait(self->writeAndVerify(self, cx, key, oldValue));
// Move [key, endKey) to team: {address}.
state NetworkAddress address = wait(self->disableDDAndMoveShard(self, cx, KeyRangeRef(key, endKey)));
wait(self->readAndVerify(self, cx, key, oldValue));
// Kill team {address}, and expect read to timeout.
self->killProcess(self, address);
wait(self->readAndVerify(self, cx, key, timed_out()));
// Reenable DD and exclude address as fail, so that [key, endKey) will be dropped and moved to a new team.
// Expect read to return 'value not found'.
int ignore = wait(setDDMode(cx, 1));
wait(self->exclude(cx, address));
wait(self->readAndVerify(self, cx, key, Optional<Value>()));
// Write will scceed.
wait(self->writeAndVerify(self, cx, key, newValue));
return Void();
}
ACTOR Future<Void> readAndVerify(DataLossRecoveryWorkload* self,
Database cx,
Key key,
ErrorOr<Optional<Value>> expectedValue) {
state Transaction tr(cx);
loop {
try {
state Optional<Value> res = wait(timeoutError(tr.get(key), 30.0));
const bool equal = !expectedValue.isError() && res == expectedValue.get();
if (!equal) {
self->validationFailed(expectedValue, ErrorOr<Optional<Value>>(res));
}
break;
} catch (Error& e) {
if (expectedValue.isError() && expectedValue.getError().code() == e.code()) {
break;
}
wait(tr.onError(e));
}
}
return Void();
}
ACTOR Future<Void> writeAndVerify(DataLossRecoveryWorkload* self, Database cx, Key key, Optional<Value> value) {
state Transaction tr(cx);
loop {
try {
if (value.present()) {
tr.set(key, value.get());
} else {
tr.clear(key);
}
wait(timeoutError(tr.commit(), 30.0));
break;
} catch (Error& e) {
wait(tr.onError(e));
}
}
wait(self->readAndVerify(self, cx, key, value));
return Void();
}
ACTOR Future<Void> exclude(Database cx, NetworkAddress addr) {
state Transaction tr(cx);
state std::vector<AddressExclusion> servers;
servers.push_back(AddressExclusion(addr.ip, addr.port));
loop {
try {
excludeServers(tr, servers, true);
wait(tr.commit());
break;
} catch (Error& e) {
wait(tr.onError(e));
}
}
// Wait until all data are moved out of servers.
std::set<NetworkAddress> inProgress = wait(checkForExcludingServers(cx, servers, true));
ASSERT(inProgress.empty());
TraceEvent("ExcludedFailedServer").detail("Address", addr.toString());
return Void();
}
// Move keys to a random selected team consisting of a single SS, after disabling DD, so that keys won't be
// kept in the new team until DD is enabled.
// Returns the address of the single SS of the new team.
ACTOR Future<NetworkAddress> disableDDAndMoveShard(DataLossRecoveryWorkload* self, Database cx, KeyRange keys) {
// Disable DD to avoid DD undoing of our move.
state int ignore = wait(setDDMode(cx, 0));
state NetworkAddress addr;
// Pick a random SS as the dest, keys will reside on a single server after the move.
state std::vector<UID> dest;
while (dest.empty()) {
std::vector<StorageServerInterface> interfs = wait(getStorageServers(cx));
if (!interfs.empty()) {
const auto& interf = interfs[deterministicRandom()->randomInt(0, interfs.size())];
if (g_simulator.protectedAddresses.count(interf.address()) == 0) {
dest.push_back(interf.uniqueID);
addr = interf.address();
}
}
}
state UID owner = deterministicRandom()->randomUniqueID();
state DDEnabledState ddEnabledState;
state Transaction tr(cx);
loop {
try {
BinaryWriter wrMyOwner(Unversioned());
wrMyOwner << owner;
tr.set(moveKeysLockOwnerKey, wrMyOwner.toValue());
wait(tr.commit());
MoveKeysLock moveKeysLock;
moveKeysLock.myOwner = owner;
wait(moveKeys(cx,
keys,
dest,
dest,
moveKeysLock,
Promise<Void>(),
&self->startMoveKeysParallelismLock,
&self->finishMoveKeysParallelismLock,
false,
UID(), // for logging only
&ddEnabledState));
break;
} catch (Error& e) {
if (e.code() == error_code_movekeys_conflict) {
// Conflict on moveKeysLocks with the current running DD is expected, just retry.
tr.reset();
} else {
wait(tr.onError(e));
}
}
}
TraceEvent("TestKeyMoved").detail("NewTeam", describe(dest)).detail("Address", addr.toString());
state Transaction validateTr(cx);
loop {
try {
Standalone<VectorRef<const char*>> addresses = wait(validateTr.getAddressesForKey(keys.begin));
// The move function is not what we are testing here, crash the test if the move fails.
ASSERT(addresses.size() == 1);
ASSERT(std::string(addresses[0]) == addr.toString());
break;
} catch (Error& e) {
wait(validateTr.onError(e));
}
}
return addr;
}
void killProcess(DataLossRecoveryWorkload* self, const NetworkAddress& addr) {
ISimulator::ProcessInfo* process = g_simulator.getProcessByAddress(addr);
ASSERT(process->addresses.contains(addr));
g_simulator.killProcess(process, ISimulator::KillInstantly);
TraceEvent("TestTeamKilled").detail("Address", addr);
}
Future<bool> check(Database const& cx) override { return pass; }
void getMetrics(std::vector<PerfMetric>& m) override {}
};
WorkloadFactory<DataLossRecoveryWorkload> DataLossRecoveryWorkloadFactory("DataLossRecovery");

View File

@ -18,6 +18,7 @@
* limitations under the License.
*/
#include "fdbclient/ClusterConnectionMemoryRecord.h"
#include "fdbclient/ManagementAPI.actor.h"
#include "fdbclient/RunTransaction.actor.h"
#include "fdbrpc/simulator.h"
@ -37,7 +38,7 @@ struct DifferentClustersSameRVWorkload : TestWorkload {
DifferentClustersSameRVWorkload(WorkloadContext const& wcx) : TestWorkload(wcx) {
ASSERT(g_simulator.extraDB != nullptr);
auto extraFile = makeReference<ClusterConnectionFile>(*g_simulator.extraDB);
auto extraFile = makeReference<ClusterConnectionMemoryRecord>(*g_simulator.extraDB);
extraDB = Database::createDatabase(extraFile, -1);
testDuration = getOption(options, LiteralStringRef("testDuration"), 100.0);
switchAfter = getOption(options, LiteralStringRef("switchAfter"), 50.0);
@ -53,7 +54,7 @@ struct DifferentClustersSameRVWorkload : TestWorkload {
if (clientId != 0) {
return Void();
}
auto switchConnFileDb = Database::createDatabase(cx->getConnectionFile(), -1);
auto switchConnFileDb = Database::createDatabase(cx->getConnectionRecord(), -1);
originalDB = cx;
std::vector<Future<Void>> clients = { readerClientSeparateDBs(cx, this),
doSwitch(switchConnFileDb, this),
@ -141,8 +142,8 @@ struct DifferentClustersSameRVWorkload : TestWorkload {
TraceEvent("DifferentClusters_CopiedDatabase").log();
wait(advanceVersion(self->extraDB, rv));
TraceEvent("DifferentClusters_AdvancedVersion").log();
wait(cx->switchConnectionFile(
makeReference<ClusterConnectionFile>(self->extraDB->getConnectionFile()->getConnectionString())));
wait(cx->switchConnectionRecord(
makeReference<ClusterConnectionMemoryRecord>(self->extraDB->getConnectionRecord()->getConnectionString())));
TraceEvent("DifferentClusters_SwitchedConnectionFile").log();
state Transaction tr(cx);
tr.setVersion(rv);
@ -156,7 +157,7 @@ struct DifferentClustersSameRVWorkload : TestWorkload {
TraceEvent("DifferentClusters_ReadError").error(e);
wait(tr.onError(e));
}
// In an actual switch we would call switchConnectionFile after unlocking the database. But it's possible
// In an actual switch we would call switchConnectionRecord after unlocking the database. But it's possible
// that a storage server serves a read at |rv| even after the recovery caused by unlocking the database, and we
// want to make that more likely for this test. So read at |rv| then unlock.
wait(unlockDatabase(self->extraDB, lockUid));

View File

@ -101,7 +101,7 @@ struct KillRegionWorkload : TestWorkload {
TraceEvent("ForceRecovery_Begin").log();
wait(forceRecovery(cx->getConnectionFile(), LiteralStringRef("1")));
wait(forceRecovery(cx->getConnectionRecord(), LiteralStringRef("1")));
TraceEvent("ForceRecovery_UsableRegions").log();

View File

@ -19,6 +19,7 @@
*/
#include "fdbrpc/ContinuousSample.h"
#include "fdbclient/ClusterConnectionMemoryRecord.h"
#include "fdbclient/NativeAPI.actor.h"
#include "fdbserver/TesterInterface.actor.h"
#include "fdbserver/workloads/BulkSetup.actor.h"
@ -150,7 +151,7 @@ struct VersionStampWorkload : TestWorkload {
ACTOR Future<bool> _check(Database cx, VersionStampWorkload* self) {
if (self->validateExtraDB) {
auto extraFile = makeReference<ClusterConnectionFile>(*g_simulator.extraDB);
auto extraFile = makeReference<ClusterConnectionMemoryRecord>(*g_simulator.extraDB);
cx = Database::createDatabase(extraFile, -1);
}
state ReadYourWritesTransaction tr(cx);
@ -309,7 +310,7 @@ struct VersionStampWorkload : TestWorkload {
state Database extraDB;
if (g_simulator.extraDB != nullptr) {
auto extraFile = makeReference<ClusterConnectionFile>(*g_simulator.extraDB);
auto extraFile = makeReference<ClusterConnectionMemoryRecord>(*g_simulator.extraDB);
extraDB = Database::createDatabase(extraFile, -1);
}

View File

@ -18,6 +18,7 @@
* limitations under the License.
*/
#include "fdbclient/ClusterConnectionMemoryRecord.h"
#include "fdbclient/NativeAPI.actor.h"
#include "fdbserver/TesterInterface.actor.h"
#include "fdbclient/ReadYourWrites.h"
@ -81,7 +82,7 @@ struct WriteDuringReadWorkload : TestWorkload {
useExtraDB = g_simulator.extraDB != nullptr;
if (useExtraDB) {
auto extraFile = makeReference<ClusterConnectionFile>(*g_simulator.extraDB);
auto extraFile = makeReference<ClusterConnectionMemoryRecord>(*g_simulator.extraDB);
extraDB = Database::createDatabase(extraFile, -1);
useSystemKeys = false;
}

View File

@ -57,6 +57,7 @@ void FlowKnobs::initialize(Randomize randomize, IsSimulated isSimulated) {
init( SLOWTASK_PROFILING_LOG_INTERVAL, 0 ); // A value of 0 means use RUN_LOOP_PROFILING_INTERVAL
init( SLOWTASK_PROFILING_MAX_LOG_INTERVAL, 1.0 );
init( SLOWTASK_PROFILING_LOG_BACKOFF, 2.0 );
init( SLOWTASK_BLOCKED_INTERVAL, 60.0 );
init( SATURATION_PROFILING_LOG_INTERVAL, 0.5 ); // A value of 0 means use RUN_LOOP_PROFILING_INTERVAL
init( SATURATION_PROFILING_MAX_LOG_INTERVAL, 5.0 );
init( SATURATION_PROFILING_LOG_BACKOFF, 2.0 );
@ -67,7 +68,8 @@ void FlowKnobs::initialize(Randomize randomize, IsSimulated isSimulated) {
init( HUGE_ARENA_LOGGING_INTERVAL, 5.0 );
init( WRITE_TRACING_ENABLED, true ); if( randomize && BUGGIFY ) WRITE_TRACING_ENABLED = false;
init( TRACING_UDP_LISTENER_PORT, 8889 ); // Only applicable if TracerType is set to a network option.
init( TRACING_SAMPLE_RATE, 1.0 ); // Fraction of traces (not spans) to sample (0 means ignore all traces)
init( TRACING_UDP_LISTENER_PORT, 8889 ); // Only applicable if TracerType is set to a network option
//connectionMonitor
init( CONNECTION_MONITOR_LOOP_TIME, isSimulated ? 0.75 : 1.0 ); if( randomize && BUGGIFY ) CONNECTION_MONITOR_LOOP_TIME = 6.0;

View File

@ -117,6 +117,7 @@ public:
double HUGE_ARENA_LOGGING_INTERVAL;
bool WRITE_TRACING_ENABLED;
double TRACING_SAMPLE_RATE;
int TRACING_UDP_LISTENER_PORT;
// run loop profiling
@ -124,6 +125,7 @@ public:
double SLOWTASK_PROFILING_LOG_INTERVAL;
double SLOWTASK_PROFILING_MAX_LOG_INTERVAL;
double SLOWTASK_PROFILING_LOG_BACKOFF;
double SLOWTASK_BLOCKED_INTERVAL;
double SATURATION_PROFILING_LOG_INTERVAL;
double SATURATION_PROFILING_MAX_LOG_INTERVAL;
double SATURATION_PROFILING_LOG_BACKOFF;

View File

@ -3581,8 +3581,10 @@ void* checkThread(void* arg) {
int64_t lastRunLoopIterations = net2RunLoopIterations.load();
int64_t lastRunLoopSleeps = net2RunLoopSleeps.load();
double slowTaskStart = 0;
double lastSlowTaskSignal = 0;
double lastSaturatedSignal = 0;
double lastSlowTaskBlockedLog = 0;
const double minSlowTaskLogInterval =
std::max(FLOW_KNOBS->SLOWTASK_PROFILING_LOG_INTERVAL, FLOW_KNOBS->RUN_LOOP_PROFILING_INTERVAL);
@ -3603,7 +3605,19 @@ void* checkThread(void* arg) {
if (slowTask) {
double t = timer();
if (lastSlowTaskSignal == 0 || t - lastSlowTaskSignal >= slowTaskLogInterval) {
bool newSlowTask = lastSlowTaskSignal == 0;
if (newSlowTask) {
slowTaskStart = t;
} else if (t - std::max(slowTaskStart, lastSlowTaskBlockedLog) > FLOW_KNOBS->SLOWTASK_BLOCKED_INTERVAL) {
lastSlowTaskBlockedLog = t;
// When this gets logged, it will be with a current timestamp (using timer()). If the network thread
// unblocks, it will log any slow task related events at an earlier timestamp. That means the order of
// events during this sequence will not match their timestamp order.
TraceEvent(SevWarnAlways, "RunLoopBlocked").detail("Duration", t - slowTaskStart);
}
if (newSlowTask || t - lastSlowTaskSignal >= slowTaskLogInterval) {
if (lastSlowTaskSignal > 0) {
slowTaskLogInterval = std::min(FLOW_KNOBS->SLOWTASK_PROFILING_MAX_LOG_INTERVAL,
FLOW_KNOBS->SLOWTASK_PROFILING_LOG_BACKOFF * slowTaskLogInterval);
@ -3614,6 +3628,7 @@ void* checkThread(void* arg) {
pthread_kill(mainThread, SIGPROF);
}
} else {
slowTaskStart = 0;
lastSlowTaskSignal = 0;
lastRunLoopIterations = currentRunLoopIterations;
slowTaskLogInterval = minSlowTaskLogInterval;

View File

@ -161,6 +161,39 @@ SystemStatistics customSystemMonitor(std::string const& eventName, StatisticsSta
.detail("ZoneID", machineState.zoneId)
.detail("MachineID", machineState.machineId);
uint64_t total_memory = 0;
total_memory += FastAllocator<16>::getTotalMemory();
total_memory += FastAllocator<32>::getTotalMemory();
total_memory += FastAllocator<64>::getTotalMemory();
total_memory += FastAllocator<96>::getTotalMemory();
total_memory += FastAllocator<128>::getTotalMemory();
total_memory += FastAllocator<256>::getTotalMemory();
total_memory += FastAllocator<512>::getTotalMemory();
total_memory += FastAllocator<1024>::getTotalMemory();
total_memory += FastAllocator<2048>::getTotalMemory();
total_memory += FastAllocator<4096>::getTotalMemory();
total_memory += FastAllocator<8192>::getTotalMemory();
uint64_t unused_memory = 0;
unused_memory += FastAllocator<16>::getApproximateMemoryUnused();
unused_memory += FastAllocator<32>::getApproximateMemoryUnused();
unused_memory += FastAllocator<64>::getApproximateMemoryUnused();
unused_memory += FastAllocator<96>::getApproximateMemoryUnused();
unused_memory += FastAllocator<128>::getApproximateMemoryUnused();
unused_memory += FastAllocator<256>::getApproximateMemoryUnused();
unused_memory += FastAllocator<512>::getApproximateMemoryUnused();
unused_memory += FastAllocator<1024>::getApproximateMemoryUnused();
unused_memory += FastAllocator<2048>::getApproximateMemoryUnused();
unused_memory += FastAllocator<4096>::getApproximateMemoryUnused();
unused_memory += FastAllocator<8192>::getApproximateMemoryUnused();
if (total_memory > 0) {
TraceEvent("FastAllocMemoryUsage")
.detail("TotalMemory", total_memory)
.detail("UnusedMemory", unused_memory)
.detail("Utilization", format("%f%%", (total_memory - unused_memory) * 100.0 / total_memory));
}
TraceEvent n("NetworkMetrics");
n.detail("Elapsed", currentStats.elapsed)
.detail("CantSleep", netData.countCantSleep - statState->networkState.countCantSleep)

View File

@ -373,7 +373,7 @@ void openTracer(TracerType type) {
ITracer::~ITracer() {}
Span& Span::operator=(Span&& o) {
if (begin > 0.0) {
if (begin > 0.0 && context.second() > 0) {
end = g_network->now();
g_tracer->trace(*this);
}
@ -388,7 +388,7 @@ Span& Span::operator=(Span&& o) {
}
Span::~Span() {
if (begin > 0.0) {
if (begin > 0.0 && context.second() > 0) {
end = g_network->now();
g_tracer->trace(*this);
}

View File

@ -37,11 +37,19 @@ struct Span {
Span(SpanID context, Location location, std::initializer_list<SpanID> const& parents = {})
: context(context), begin(g_network->now()), location(location), parents(arena, parents.begin(), parents.end()) {
if (parents.size() > 0) {
this->context = SpanID((*parents.begin()).first(), context.second());
// If the parents' token is 0 (meaning the trace should not be
// recorded), set the child token to 0 as well. Otherwise, use the
// existing (likely randomly generated) value.
uint64_t traceId = (*parents.begin()).second() > 0 ? context.second() : 0;
this->context = SpanID((*parents.begin()).first(), traceId);
}
}
Span(Location location, std::initializer_list<SpanID> const& parents = {})
: Span(deterministicRandom()->randomUniqueID(), location, parents) {}
Span(Location location, std::initializer_list<SpanID> const& parents = {}) {
uint64_t tokenId = deterministicRandom()->random01() < FLOW_KNOBS->TRACING_SAMPLE_RATE
? deterministicRandom()->randomUInt64()
: 0;
Span(UID(deterministicRandom()->randomUInt64(), tokenId), location, parents);
}
Span(Location location, SpanID context) : Span(location, { context }) {}
Span(const Span&) = delete;
Span(Span&& o) {
@ -70,12 +78,13 @@ struct Span {
void addParent(SpanID span) {
if (parents.size() == 0) {
uint64_t traceId = (*parents.begin()).second() > 0 ? context.second() : 0;
// Use first parent to set trace ID. This is non-ideal for spans
// with multiple parents, because the trace ID will associate the
// span with only one trace. A workaround is to look at the parent
// relationships instead of the trace ID. Another option in the
// future is to keep a list of trace IDs.
context = SpanID(span.first(), context.second());
context = SpanID(span.first(), traceId);
}
parents.push_back(arena, span);
}
@ -112,7 +121,7 @@ void openTracer(TracerType type);
template <class T>
struct SpannedDeque : Deque<T> {
Span span;
explicit SpannedDeque(Location loc) : span(deterministicRandom()->randomUniqueID(), loc) {}
explicit SpannedDeque(Location loc) : span(loc) {}
SpannedDeque(SpannedDeque&& other) : Deque<T>(std::move(other)), span(std::move(other.span)) {}
SpannedDeque(SpannedDeque const&) = delete;
SpannedDeque& operator=(SpannedDeque const&) = delete;

View File

@ -78,6 +78,7 @@ ERROR( wrong_connection_file, 1054, "Connection file mismatch")
ERROR( version_already_compacted, 1055, "The requested changes have been compacted away")
ERROR( local_config_changed, 1056, "Local configuration file has changed. Restart and apply these changes" )
ERROR( failed_to_reach_quorum, 1057, "Failed to reach quorum from configuration database nodes. Retry sending these requests" )
ERROR( wrong_format_version, 1058, "Format version not recognize." )
ERROR( broken_promise, 1100, "Broken promise" )
ERROR( operation_cancelled, 1101, "Asynchronous operation cancelled" )

View File

@ -51,9 +51,7 @@ LineageReference getCurrentLineage() {
}
return *currentLineage;
}
#endif
#ifdef ENABLE_SAMPLING
void sample(LineageReference* lineagePtr);
void replaceLineage(LineageReference* lineage) {

View File

@ -545,6 +545,11 @@ public:
LineageReference() : Reference<ActorLineage>(nullptr), actorName_(""), allocated_(false) {}
explicit LineageReference(ActorLineage* ptr) : Reference<ActorLineage>(ptr), actorName_(""), allocated_(false) {}
LineageReference(const LineageReference& r) : Reference<ActorLineage>(r), actorName_(""), allocated_(false) {}
LineageReference(LineageReference&& r)
: Reference<ActorLineage>(std::forward<LineageReference>(r)), actorName_(r.actorName_), allocated_(r.allocated_) {
r.actorName_ = "";
r.allocated_ = false;
}
void setActorName(const char* name) { actorName_ = name; }
const char* actorName() { return actorName_; }

View File

@ -1,17 +1,17 @@
#!/bin/sh
#!/usr/bin/env bash
set -Eeuo pipefail
set -x
set -euxo pipefail
DOCKER_ROOT="$(realpath "$(dirname "${BASH_SOURCE[0]}")")"
BUILD_OUTPUT=$(realpath "${DOCKER_ROOT}"/../..)
DOCKER_ROOT=$(realpath $(dirname ${BASH_SOURCE[0]}))
BUILD_OUTPUT=$(realpath ${DOCKER_ROOT}/../..)
echo Docker root: "${DOCKER_ROOT}"
echo Build output: "${BUILD_OUTPUT}"
echo Docker root: $DOCKER_ROOT
echo Build output: $BUILD_OUTPUT
cd ${DOCKER_ROOT}
cd "${DOCKER_ROOT}"
## eg: CMAKE_PROJECT_VERSION:STATIC=7.0.0
FDB_VERSION=$(grep CMAKE_PROJECT_VERSION\: ${BUILD_OUTPUT}/CMakeCache.txt | cut -d '=' -f 2)
FDB_VERSION=$(grep CMAKE_PROJECT_VERSION: "${BUILD_OUTPUT}"/CMakeCache.txt | cut -d '=' -f 2)
# Options (passed via environment variables)
@ -20,18 +20,18 @@ FDB_VERSION=$(grep CMAKE_PROJECT_VERSION\: ${BUILD_OUTPUT}/CMakeCache.txt | cut
TAG=${TAG:-${FDB_VERSION}-${OKTETO_NAME}}
ECR=${ECR:-112664522426.dkr.ecr.us-west-2.amazonaws.com}
echo Building with tag ${TAG}
echo Building with tag "${TAG}"
# Login to ECR
# TODO: Move this to a common place instead of repeatedly copy-pasting it.
aws ecr get-login-password | docker login --username AWS --password-stdin ${ECR}
aws ecr get-login-password | docker login --username AWS --password-stdin "${ECR}"
docker pull ${ECR}/amazonlinux:2.0.20210326.0
docker tag ${ECR}/amazonlinux:2.0.20210326.0 amazonlinux:2.0.20210326.0
docker pull "${ECR}"/amazonlinux:2.0.20210326.0
docker tag "${ECR}"/amazonlinux:2.0.20210326.0 amazonlinux:2.0.20210326.0
#derived variables
# derived variables
IMAGE=foundationdb/foundationdb:${TAG}
SIDECAR=foundationdb/foundationdb-kubernetes-sidecar:${TAG}-1
STRIPPED=${STRIPPED:-false}
@ -41,20 +41,22 @@ STRIPPED=${STRIPPED:-false}
if $STRIPPED; then
rsync -av --delete --exclude=*.xml ${BUILD_OUTPUT}/packages/bin .
rsync -av --delete --exclude=*.a --exclude=*.xml ${BUILD_OUTPUT}/packages/lib .
rsync -av --delete --exclude=*.xml "${BUILD_OUTPUT}"/packages/bin .
rsync -av --delete --exclude=*.a --exclude=*.xml "${BUILD_OUTPUT}"/packages/lib .
else
rsync -av --delete --exclude=*.xml ${BUILD_OUTPUT}/bin .
rsync -av --delete --exclude=*.a --exclude=*.xml ${BUILD_OUTPUT}/lib .
rsync -av --delete --exclude=*.xml "${BUILD_OUTPUT}"/bin .
rsync -av --delete --exclude=*.a --exclude=*.xml "${BUILD_OUTPUT}"/lib .
fi
BUILD_ARGS="--build-arg FDB_VERSION=$FDB_VERSION"
docker build ${BUILD_ARGS} -t ${IMAGE} --target foundationdb -f Dockerfile.eks .
docker build ${BUILD_ARGS} -t ${SIDECAR} --target sidecar -f Dockerfile.eks .
docker tag ${IMAGE} ${ECR}/${IMAGE}
docker tag ${SIDECAR} ${ECR}/${SIDECAR}
docker push ${ECR}/${IMAGE}
docker push ${ECR}/${SIDECAR}
docker build ${BUILD_ARGS} -t "${IMAGE}" --target foundationdb -f Dockerfile.eks .
docker build ${BUILD_ARGS} -t "${SIDECAR}" --target sidecar -f Dockerfile.eks .
docker tag "${IMAGE}" "${ECR}"/"${IMAGE}"
docker tag "${SIDECAR}" "${ECR}"/"${SIDECAR}"
docker push "${ECR}"/"${IMAGE}"
docker push "${ECR}"/"${SIDECAR}"

View File

@ -1,17 +1,17 @@
#!/bin/bash
#!/usr/bin/env bash
set -Eeuo pipefail
set -x
set -euxo pipefail
DOCKER_ROOT="$(realpath "$(dirname "${BASH_SOURCE[0]}")")"
BUILD_OUTPUT=$(realpath "${DOCKER_ROOT}"/../..)
DOCKER_ROOT=$(realpath $(dirname ${BASH_SOURCE[0]}))
BUILD_OUTPUT=$(realpath ${DOCKER_ROOT}/../..)
echo Docker root: "${DOCKER_ROOT}"
echo Build output: "${BUILD_OUTPUT}"
echo Docker root: $DOCKER_ROOT
echo Build output: $BUILD_OUTPUT
cd ${DOCKER_ROOT}
cd "${DOCKER_ROOT}"
## eg: CMAKE_PROJECT_VERSION:STATIC=7.0.0
FDB_VERSION=$(grep CMAKE_PROJECT_VERSION\: ${BUILD_OUTPUT}/CMakeCache.txt | cut -d '=' -f 2)
FDB_VERSION=$(grep CMAKE_PROJECT_VERSION: "${BUILD_OUTPUT}"/CMakeCache.txt | cut -d '=' -f 2)
# Options (passed via environment variables)
@ -20,43 +20,43 @@ FDB_VERSION=$(grep CMAKE_PROJECT_VERSION\: ${BUILD_OUTPUT}/CMakeCache.txt | cut
TAG=${TAG:-${FDB_VERSION}-${OKTETO_NAME}}
ECR=${ECR:-112664522426.dkr.ecr.us-west-2.amazonaws.com}
echo Building with tag ${TAG}
echo Building with tag "${TAG}"
# Login to ECR
# TODO: Move this to a common place instead of repeatedly copy-pasting it.
aws ecr get-login-password | docker login --username AWS --password-stdin ${ECR}
aws ecr get-login-password | docker login --username AWS --password-stdin "${ECR}"
docker pull ${ECR}/ubuntu:18.04
docker tag ${ECR}/ubuntu:18.04 ubuntu:18.04
docker pull ${ECR}/python:3.9-slim
docker tag ${ECR}/python:3.9-slim python:3.9-slim
docker pull "${ECR}"/ubuntu:18.04
docker tag "${ECR}"/ubuntu:18.04 ubuntu:18.04
docker pull "${ECR}"/python:3.9-slim
docker tag "${ECR}"/python:3.9-slim python:3.9-slim
# derived variables
IMAGE=foundationdb/foundationdb:${TAG}
SIDECAR=foundationdb/foundationdb-kubernetes-sidecar:${TAG}-1
IMAGE=foundationdb/foundationdb:"${TAG}"
SIDECAR=foundationdb/foundationdb-kubernetes-sidecar:"${TAG}"-1
STRIPPED=${STRIPPED:-false}
WEBSITE_BIN_DIR=website/downloads/${FDB_VERSION}/linux/
TARBALL=${WEBSITE_BIN_DIR}/fdb_${FDB_VERSION}.tar.gz
mkdir -p ${WEBSITE_BIN_DIR}
WEBSITE_BIN_DIR=website/downloads/"${FDB_VERSION}"/linux
TARBALL=${WEBSITE_BIN_DIR}/fdb_"${FDB_VERSION}".tar.gz
mkdir -p "${WEBSITE_BIN_DIR}"
if $STRIPPED; then
tar -C ~/build_output/packages/ -zcvf ${TARBALL} bin lib
cp ~/build_output/packages/lib/libfdb_c.so ${WEBSITE_BIN_DIR}/libfdb_c_${FDB_VERSION}.so
tar -C ~/build_output/packages/ -zcvf "${TARBALL}" bin lib
cp ~/build_output/packages/lib/libfdb_c.so "${WEBSITE_BIN_DIR}"/libfdb_c_"${FDB_VERSION}".so
else
tar -C ~/build_output/ -zcvf ${TARBALL} bin lib
cp ~/build_output/lib/libfdb_c.so ${WEBSITE_BIN_DIR}/libfdb_c_${FDB_VERSION}.so
tar -C ~/build_output/ -zcvf "${TARBALL}" bin lib
cp ~/build_output/lib/libfdb_c.so "${WEBSITE_BIN_DIR}"/libfdb_c_"${FDB_VERSION}".so
fi
BUILD_ARGS="--build-arg FDB_WEBSITE=file:///mnt/website "
BUILD_ARGS+="--build-arg FDB_VERSION=$FDB_VERSION "
BUILD_ARGS+="--build-arg FDB_ADDITIONAL_VERSIONS=$FDB_VERSION"
BUILD_ARGS="--build-arg FDB_VERSION=${FDB_VERSION}"
BUILD_ARGS+=" --build-arg FDB_WEBSITE=file:///mnt/website"
BUILD_ARGS+=" --build-arg FDB_ADDITIONAL_VERSIONS=${FDB_VERSION}"
docker build -t ${IMAGE} ${BUILD_ARGS} -f release/Dockerfile .
docker build -t ${SIDECAR} ${BUILD_ARGS} -f sidecar/Dockerfile .
docker build ${BUILD_ARGS} -t "${IMAGE}" -f release/Dockerfile .
docker build ${BUILD_ARGS} -t "${SIDECAR}" -f sidecar/Dockerfile .
docker tag ${IMAGE} ${ECR}/${IMAGE}
docker tag ${SIDECAR} ${ECR}/${SIDECAR}
docker tag "${IMAGE}" "${ECR}"/"${IMAGE}"
docker tag "${SIDECAR}" "${ECR}"/"${SIDECAR}"
docker push ${ECR}/${IMAGE}
docker push ${ECR}/${SIDECAR}
docker push "${ECR}"/"${IMAGE}"
docker push "${ECR}"/"${SIDECAR}"

View File

@ -0,0 +1,64 @@
#!/usr/bin/env bash
set -Eeuo pipefail
set -x
DOCKER_ROOT="$(realpath "$(dirname "${BASH_SOURCE[0]}")")"
BUILD_OUTPUT=$(realpath "${DOCKER_ROOT}"/../..)
echo Docker root: "${DOCKER_ROOT}"
echo Build output: "${BUILD_OUTPUT}"
cd "${DOCKER_ROOT}"
## eg: CMAKE_PROJECT_VERSION:STATIC=7.0.0
FDB_VERSION=$(grep CMAKE_PROJECT_VERSION: "${BUILD_OUTPUT}"/CMakeCache.txt | cut -d '=' -f 2)
# Options (passed via environment variables)
# Feel free to customize the image tag.
# TODO: add a mechanism to set TAG=FDB_VERSION when we're building public releases.
TAG=${TAG:-${FDB_VERSION}-${OKTETO_NAME}}
ECR=${ECR:-112664522426.dkr.ecr.us-west-2.amazonaws.com}
echo Building with tag "${TAG}"
# Login to ECR
# TODO: Move this to a common place instead of repeatedly copy-pasting it.
aws ecr get-login-password | docker login --username AWS --password-stdin "${ECR}"
docker pull "${ECR}"/openjdk:17-slim
docker tag "${ECR}"/openjdk:17-slim openjdk:17-slim
# derived variables
IMAGE=foundationdb/ycsb:"${TAG}"
# mvn install fdb-java, compile YCSB
mvn install:install-file \
-Dfile="${BUILD_OUTPUT}"/packages/fdb-java-"${FDB_VERSION}"-PRERELEASE.jar \
-DgroupId=org.foundationdb \
-DartifactId=fdb-java \
-Dversion="${FDB_VERSION}"-PRERELEASE \
-Dpackaging=jar \
-DgeneratePom=true
mkdir "${DOCKER_ROOT}"/YCSB && cd "${DOCKER_ROOT}"/YCSB
git clone https://github.com/FoundationDB/YCSB.git .
sed -i "s/<foundationdb.version>[0-9]\+.[0-9]\+.[0-9]\+<\/foundationdb.version>/<foundationdb.version>${FDB_VERSION}-PRERELEASE<\/foundationdb.version>/g" pom.xml
mvn -pl site.ycsb:foundationdb-binding -am clean package
mkdir -p core/target/dependency
# shellcheck disable=SC2046
cp $(find ~/.m2/ -name jax\*.jar) core/target/dependency/
# shellcheck disable=SC2046
cp $(find ~/.m2/ -name htrace\*.jar) core/target/dependency/
# shellcheck disable=SC2046
cp $(find ~/.m2/ -name HdrHistogram\*.jar) core/target/dependency/
rm -rf .git && cd ..
docker build -t "${IMAGE}" -f ycsb/Dockerfile .
docker tag "${IMAGE}" "${ECR}"/"${IMAGE}"
docker push "${ECR}"/"${IMAGE}"

Some files were not shown because too many files have changed in this diff Show More