Merge branch 'main' of github.com:apple/foundationdb into jfu-metacluster-rename

This commit is contained in:
Jon Fu 2022-08-16 16:14:28 -07:00
commit 2836852a7f
73 changed files with 2044 additions and 657 deletions

View File

@ -239,6 +239,10 @@ fdb_error_t fdb_future_get_version_v619(FDBFuture* f, int64_t* out_version) {
CATCH_AND_RETURN(*out_version = TSAV(Version, f)->get(););
}
extern "C" DLLEXPORT fdb_error_t fdb_future_get_bool(FDBFuture* f, fdb_bool_t* out_value) {
CATCH_AND_RETURN(*out_value = TSAV(bool, f)->get(););
}
extern "C" DLLEXPORT fdb_error_t fdb_future_get_int64(FDBFuture* f, int64_t* out_value) {
CATCH_AND_RETURN(*out_value = TSAV(int64_t, f)->get(););
}
@ -494,6 +498,54 @@ extern "C" DLLEXPORT FDBFuture* fdb_database_wait_purge_granules_complete(FDBDat
FDBFuture*)(DB(db)->waitPurgeGranulesComplete(StringRef(purge_key_name, purge_key_name_length)).extractPtr());
}
extern "C" DLLEXPORT FDBFuture* fdb_database_blobbify_range(FDBDatabase* db,
uint8_t const* begin_key_name,
int begin_key_name_length,
uint8_t const* end_key_name,
int end_key_name_length) {
return (FDBFuture*)(DB(db)
->blobbifyRange(KeyRangeRef(StringRef(begin_key_name, begin_key_name_length),
StringRef(end_key_name, end_key_name_length)))
.extractPtr());
}
extern "C" DLLEXPORT FDBFuture* fdb_database_unblobbify_range(FDBDatabase* db,
uint8_t const* begin_key_name,
int begin_key_name_length,
uint8_t const* end_key_name,
int end_key_name_length) {
return (FDBFuture*)(DB(db)
->unblobbifyRange(KeyRangeRef(StringRef(begin_key_name, begin_key_name_length),
StringRef(end_key_name, end_key_name_length)))
.extractPtr());
}
extern "C" DLLEXPORT FDBFuture* fdb_database_list_blobbified_ranges(FDBDatabase* db,
uint8_t const* begin_key_name,
int begin_key_name_length,
uint8_t const* end_key_name,
int end_key_name_length,
int rangeLimit) {
return (FDBFuture*)(DB(db)
->listBlobbifiedRanges(KeyRangeRef(StringRef(begin_key_name, begin_key_name_length),
StringRef(end_key_name, end_key_name_length)),
rangeLimit)
.extractPtr());
}
extern "C" DLLEXPORT WARN_UNUSED_RESULT FDBFuture* fdb_database_verify_blob_range(FDBDatabase* db,
uint8_t const* begin_key_name,
int begin_key_name_length,
uint8_t const* end_key_name,
int end_key_name_length,
int64_t version) {
return (FDBFuture*)(DB(db)
->verifyBlobRange(KeyRangeRef(StringRef(begin_key_name, begin_key_name_length),
StringRef(end_key_name, end_key_name_length)),
version)
.extractPtr());
}
extern "C" DLLEXPORT fdb_error_t fdb_tenant_create_transaction(FDBTenant* tenant, FDBTransaction** out_transaction) {
CATCH_AND_RETURN(*out_transaction = (FDBTransaction*)TENANT(tenant)->createTransaction().extractPtr(););
}
@ -856,11 +908,12 @@ extern "C" DLLEXPORT FDBFuture* fdb_transaction_get_blob_granule_ranges(FDBTrans
uint8_t const* begin_key_name,
int begin_key_name_length,
uint8_t const* end_key_name,
int end_key_name_length) {
int end_key_name_length,
int rangeLimit) {
RETURN_FUTURE_ON_ERROR(
Standalone<VectorRef<KeyRangeRef>>,
KeyRangeRef range(KeyRef(begin_key_name, begin_key_name_length), KeyRef(end_key_name, end_key_name_length));
return (FDBFuture*)(TXN(tr)->getBlobGranuleRanges(range).extractPtr()););
return (FDBFuture*)(TXN(tr)->getBlobGranuleRanges(range, rangeLimit).extractPtr()););
}
extern "C" DLLEXPORT FDBResult* fdb_transaction_read_blob_granules(FDBTransaction* tr,

View File

@ -227,6 +227,8 @@ DLLEXPORT WARN_UNUSED_RESULT fdb_error_t fdb_future_set_callback(FDBFuture* f,
DLLEXPORT WARN_UNUSED_RESULT fdb_error_t fdb_future_get_error(FDBFuture* f);
#endif
DLLEXPORT WARN_UNUSED_RESULT fdb_error_t fdb_future_get_bool(FDBFuture* f, fdb_bool_t* out);
DLLEXPORT WARN_UNUSED_RESULT fdb_error_t fdb_future_get_int64(FDBFuture* f, int64_t* out);
DLLEXPORT WARN_UNUSED_RESULT fdb_error_t fdb_future_get_uint64(FDBFuture* f, uint64_t* out);
@ -321,6 +323,32 @@ DLLEXPORT WARN_UNUSED_RESULT FDBFuture* fdb_database_wait_purge_granules_complet
uint8_t const* purge_key_name,
int purge_key_name_length);
DLLEXPORT WARN_UNUSED_RESULT FDBFuture* fdb_database_blobbify_range(FDBDatabase* db,
uint8_t const* begin_key_name,
int begin_key_name_length,
uint8_t const* end_key_name,
int end_key_name_length);
DLLEXPORT WARN_UNUSED_RESULT FDBFuture* fdb_database_unblobbify_range(FDBDatabase* db,
uint8_t const* begin_key_name,
int begin_key_name_length,
uint8_t const* end_key_name,
int end_key_name_length);
DLLEXPORT WARN_UNUSED_RESULT FDBFuture* fdb_database_list_blobbified_ranges(FDBDatabase* db,
uint8_t const* begin_key_name,
int begin_key_name_length,
uint8_t const* end_key_name,
int end_key_name_length,
int rangeLimit);
DLLEXPORT WARN_UNUSED_RESULT FDBFuture* fdb_database_verify_blob_range(FDBDatabase* db,
uint8_t const* begin_key_name,
int begin_key_name_length,
uint8_t const* end_key_name,
int end_key_name_length,
int64_t version);
DLLEXPORT WARN_UNUSED_RESULT fdb_error_t fdb_tenant_create_transaction(FDBTenant* tenant,
FDBTransaction** out_transaction);
@ -479,7 +507,8 @@ DLLEXPORT WARN_UNUSED_RESULT FDBFuture* fdb_transaction_get_blob_granule_ranges(
uint8_t const* begin_key_name,
int begin_key_name_length,
uint8_t const* end_key_name,
int end_key_name_length);
int end_key_name_length,
int rangeLimit);
/* LatestVersion (-2) for readVersion means get read version from transaction
Separated out as optional because BG reads can support longer-lived reads than normal FDB transactions */

View File

@ -180,7 +180,7 @@ private:
}
execTransaction(
[begin, end, results](auto ctx) {
fdb::Future f = ctx->tx().getBlobGranuleRanges(begin, end).eraseType();
fdb::Future f = ctx->tx().getBlobGranuleRanges(begin, end, 1000).eraseType();
ctx->continueAfter(
f,
[ctx, f, results]() {

View File

@ -559,9 +559,9 @@ public:
reverse);
}
TypedFuture<future_var::KeyRangeRefArray> getBlobGranuleRanges(KeyRef begin, KeyRef end) {
TypedFuture<future_var::KeyRangeRefArray> getBlobGranuleRanges(KeyRef begin, KeyRef end, int rangeLimit) {
return native::fdb_transaction_get_blob_granule_ranges(
tr.get(), begin.data(), intSize(begin), end.data(), intSize(end));
tr.get(), begin.data(), intSize(begin), end.data(), intSize(end), rangeLimit);
}
Result readBlobGranules(KeyRef begin,

View File

@ -356,9 +356,15 @@ fdb_error_t Transaction::add_conflict_range(std::string_view begin_key,
tr_, (const uint8_t*)begin_key.data(), begin_key.size(), (const uint8_t*)end_key.data(), end_key.size(), type);
}
KeyRangeArrayFuture Transaction::get_blob_granule_ranges(std::string_view begin_key, std::string_view end_key) {
return KeyRangeArrayFuture(fdb_transaction_get_blob_granule_ranges(
tr_, (const uint8_t*)begin_key.data(), begin_key.size(), (const uint8_t*)end_key.data(), end_key.size()));
KeyRangeArrayFuture Transaction::get_blob_granule_ranges(std::string_view begin_key,
std::string_view end_key,
int rangeLimit) {
return KeyRangeArrayFuture(fdb_transaction_get_blob_granule_ranges(tr_,
(const uint8_t*)begin_key.data(),
begin_key.size(),
(const uint8_t*)end_key.data(),
end_key.size(),
rangeLimit));
}
KeyValueArrayResult Transaction::read_blob_granules(std::string_view begin_key,
std::string_view end_key,

View File

@ -348,7 +348,7 @@ public:
// Wrapper around fdb_transaction_add_conflict_range.
fdb_error_t add_conflict_range(std::string_view begin_key, std::string_view end_key, FDBConflictRangeType type);
KeyRangeArrayFuture get_blob_granule_ranges(std::string_view begin_key, std::string_view end_key);
KeyRangeArrayFuture get_blob_granule_ranges(std::string_view begin_key, std::string_view end_key, int rangeLimit);
KeyValueArrayResult read_blob_granules(std::string_view begin_key,
std::string_view end_key,
int64_t beginVersion,

View File

@ -2853,7 +2853,7 @@ TEST_CASE("Blob Granule Functions") {
// test ranges
while (1) {
fdb::KeyRangeArrayFuture f = tr.get_blob_granule_ranges(key("bg"), key("bh"));
fdb::KeyRangeArrayFuture f = tr.get_blob_granule_ranges(key("bg"), key("bh"), 1000);
fdb_error_t err = wait_future(f);
if (err) {
fdb::EmptyFuture f2 = tr.on_error(err);

View File

@ -34,9 +34,11 @@ set(JAVA_BINDING_SRCS
src/main/com/apple/foundationdb/FDBDatabase.java
src/main/com/apple/foundationdb/FDBTenant.java
src/main/com/apple/foundationdb/FDBTransaction.java
src/main/com/apple/foundationdb/FutureBool.java
src/main/com/apple/foundationdb/FutureInt64.java
src/main/com/apple/foundationdb/FutureKey.java
src/main/com/apple/foundationdb/FutureKeyArray.java
src/main/com/apple/foundationdb/FutureKeyRangeArray.java
src/main/com/apple/foundationdb/FutureResult.java
src/main/com/apple/foundationdb/FutureResults.java
src/main/com/apple/foundationdb/FutureMappedResults.java
@ -56,6 +58,7 @@ set(JAVA_BINDING_SRCS
src/main/com/apple/foundationdb/RangeQuery.java
src/main/com/apple/foundationdb/MappedRangeQuery.java
src/main/com/apple/foundationdb/KeyArrayResult.java
src/main/com/apple/foundationdb/KeyRangeArrayResult.java
src/main/com/apple/foundationdb/RangeResult.java
src/main/com/apple/foundationdb/MappedRangeResult.java
src/main/com/apple/foundationdb/RangeResultInfo.java

View File

@ -25,9 +25,11 @@
#include "com_apple_foundationdb_FDB.h"
#include "com_apple_foundationdb_FDBDatabase.h"
#include "com_apple_foundationdb_FDBTransaction.h"
#include "com_apple_foundationdb_FutureBool.h"
#include "com_apple_foundationdb_FutureInt64.h"
#include "com_apple_foundationdb_FutureKey.h"
#include "com_apple_foundationdb_FutureKeyArray.h"
#include "com_apple_foundationdb_FutureKeyRangeArray.h"
#include "com_apple_foundationdb_FutureResult.h"
#include "com_apple_foundationdb_FutureResults.h"
#include "com_apple_foundationdb_FutureStrings.h"
@ -55,7 +57,11 @@ static jclass mapped_range_result_class;
static jclass mapped_key_value_class;
static jclass string_class;
static jclass key_array_result_class;
static jclass keyrange_class;
static jclass keyrange_array_result_class;
static jmethodID key_array_result_init;
static jmethodID keyrange_init;
static jmethodID keyrange_array_result_init;
static jmethodID range_result_init;
static jmethodID mapped_range_result_init;
static jmethodID mapped_key_value_from_bytes;
@ -278,6 +284,23 @@ JNIEXPORT void JNICALL Java_com_apple_foundationdb_NativeFuture_Future_1releaseM
fdb_future_release_memory(var);
}
JNIEXPORT jboolean JNICALL Java_com_apple_foundationdb_FutureBool_FutureBool_1get(JNIEnv* jenv, jobject, jlong future) {
if (!future) {
throwParamNotNull(jenv);
return 0;
}
FDBFuture* f = (FDBFuture*)future;
fdb_bool_t value = false;
fdb_error_t err = fdb_future_get_bool(f, &value);
if (err) {
safeThrow(jenv, getThrowable(jenv, err));
return 0;
}
return (jboolean)value;
}
JNIEXPORT jlong JNICALL Java_com_apple_foundationdb_FutureInt64_FutureInt64_1get(JNIEnv* jenv, jobject, jlong future) {
if (!future) {
throwParamNotNull(jenv);
@ -407,6 +430,61 @@ JNIEXPORT jobject JNICALL Java_com_apple_foundationdb_FutureKeyArray_FutureKeyAr
return result;
}
JNIEXPORT jobject JNICALL Java_com_apple_foundationdb_FutureKeyRangeArray_FutureKeyRangeArray_1get(JNIEnv* jenv,
jobject,
jlong future) {
if (!future) {
throwParamNotNull(jenv);
return JNI_NULL;
}
FDBFuture* f = (FDBFuture*)future;
const FDBKeyRange* fdbKr;
int count;
fdb_error_t err = fdb_future_get_keyrange_array(f, &fdbKr, &count);
if (err) {
safeThrow(jenv, getThrowable(jenv, err));
return JNI_NULL;
}
jobjectArray kr_values = jenv->NewObjectArray(count, keyrange_class, NULL);
if (!kr_values) {
if (!jenv->ExceptionOccurred())
throwOutOfMem(jenv);
return JNI_NULL;
}
for (int i = 0; i < count; i++) {
jbyteArray beginArr = jenv->NewByteArray(fdbKr[i].begin_key_length);
if (!beginArr) {
if (!jenv->ExceptionOccurred())
throwOutOfMem(jenv);
return JNI_NULL;
}
jbyteArray endArr = jenv->NewByteArray(fdbKr[i].end_key_length);
if (!endArr) {
if (!jenv->ExceptionOccurred())
throwOutOfMem(jenv);
return JNI_NULL;
}
jenv->SetByteArrayRegion(beginArr, 0, fdbKr[i].begin_key_length, (const jbyte*)fdbKr[i].begin_key);
jenv->SetByteArrayRegion(endArr, 0, fdbKr[i].end_key_length, (const jbyte*)fdbKr[i].end_key);
jobject kr = jenv->NewObject(keyrange_class, keyrange_init, beginArr, endArr);
if (jenv->ExceptionOccurred())
return JNI_NULL;
jenv->SetObjectArrayElement(kr_values, i, kr);
if (jenv->ExceptionOccurred())
return JNI_NULL;
}
jobject krarr = jenv->NewObject(keyrange_array_result_class, keyrange_array_result_init, kr_values);
if (jenv->ExceptionOccurred())
return JNI_NULL;
return krarr;
}
// SOMEDAY: explore doing this more efficiently with Direct ByteBuffers
JNIEXPORT jobject JNICALL Java_com_apple_foundationdb_FutureResults_FutureResults_1get(JNIEnv* jenv,
jobject,
@ -830,6 +908,142 @@ Java_com_apple_foundationdb_FDBDatabase_Database_1waitPurgeGranulesComplete(JNIE
return (jlong)f;
}
JNIEXPORT jlong JNICALL Java_com_apple_foundationdb_FDBDatabase_Database_1blobbifyRange(JNIEnv* jenv,
jobject,
jlong dbPtr,
jbyteArray beginKeyBytes,
jbyteArray endKeyBytes) {
if (!dbPtr || !beginKeyBytes || !endKeyBytes) {
throwParamNotNull(jenv);
return 0;
}
FDBDatabase* database = (FDBDatabase*)dbPtr;
uint8_t* beginKeyArr = (uint8_t*)jenv->GetByteArrayElements(beginKeyBytes, JNI_NULL);
if (!beginKeyArr) {
if (!jenv->ExceptionOccurred())
throwRuntimeEx(jenv, "Error getting handle to native resources");
return 0;
}
uint8_t* endKeyArr = (uint8_t*)jenv->GetByteArrayElements(endKeyBytes, JNI_NULL);
if (!endKeyArr) {
jenv->ReleaseByteArrayElements(beginKeyBytes, (jbyte*)beginKeyArr, JNI_ABORT);
if (!jenv->ExceptionOccurred())
throwRuntimeEx(jenv, "Error getting handle to native resources");
return 0;
}
FDBFuture* f = fdb_database_blobbify_range(
database, beginKeyArr, jenv->GetArrayLength(beginKeyBytes), endKeyArr, jenv->GetArrayLength(endKeyBytes));
jenv->ReleaseByteArrayElements(beginKeyBytes, (jbyte*)beginKeyArr, JNI_ABORT);
jenv->ReleaseByteArrayElements(endKeyBytes, (jbyte*)endKeyArr, JNI_ABORT);
return (jlong)f;
}
JNIEXPORT jlong JNICALL Java_com_apple_foundationdb_FDBDatabase_Database_1unblobbifyRange(JNIEnv* jenv,
jobject,
jlong dbPtr,
jbyteArray beginKeyBytes,
jbyteArray endKeyBytes) {
if (!dbPtr || !beginKeyBytes || !endKeyBytes) {
throwParamNotNull(jenv);
return 0;
}
FDBDatabase* database = (FDBDatabase*)dbPtr;
uint8_t* beginKeyArr = (uint8_t*)jenv->GetByteArrayElements(beginKeyBytes, JNI_NULL);
if (!beginKeyArr) {
if (!jenv->ExceptionOccurred())
throwRuntimeEx(jenv, "Error getting handle to native resources");
return 0;
}
uint8_t* endKeyArr = (uint8_t*)jenv->GetByteArrayElements(endKeyBytes, JNI_NULL);
if (!endKeyArr) {
jenv->ReleaseByteArrayElements(beginKeyBytes, (jbyte*)beginKeyArr, JNI_ABORT);
if (!jenv->ExceptionOccurred())
throwRuntimeEx(jenv, "Error getting handle to native resources");
return 0;
}
FDBFuture* f = fdb_database_unblobbify_range(
database, beginKeyArr, jenv->GetArrayLength(beginKeyBytes), endKeyArr, jenv->GetArrayLength(endKeyBytes));
jenv->ReleaseByteArrayElements(beginKeyBytes, (jbyte*)beginKeyArr, JNI_ABORT);
jenv->ReleaseByteArrayElements(endKeyBytes, (jbyte*)endKeyArr, JNI_ABORT);
return (jlong)f;
}
JNIEXPORT jlong JNICALL Java_com_apple_foundationdb_FDBDatabase_Database_1listBlobbifiedRanges(JNIEnv* jenv,
jobject,
jlong dbPtr,
jbyteArray beginKeyBytes,
jbyteArray endKeyBytes,
jint rangeLimit) {
if (!dbPtr || !beginKeyBytes || !endKeyBytes) {
throwParamNotNull(jenv);
return 0;
}
FDBDatabase* tr = (FDBDatabase*)dbPtr;
uint8_t* startKey = (uint8_t*)jenv->GetByteArrayElements(beginKeyBytes, JNI_NULL);
if (!startKey) {
if (!jenv->ExceptionOccurred())
throwRuntimeEx(jenv, "Error getting handle to native resources");
return 0;
}
uint8_t* endKey = (uint8_t*)jenv->GetByteArrayElements(endKeyBytes, JNI_NULL);
if (!endKey) {
jenv->ReleaseByteArrayElements(beginKeyBytes, (jbyte*)startKey, JNI_ABORT);
if (!jenv->ExceptionOccurred())
throwRuntimeEx(jenv, "Error getting handle to native resources");
return 0;
}
FDBFuture* f = fdb_database_list_blobbified_ranges(
tr, startKey, jenv->GetArrayLength(beginKeyBytes), endKey, jenv->GetArrayLength(endKeyBytes), rangeLimit);
jenv->ReleaseByteArrayElements(beginKeyBytes, (jbyte*)startKey, JNI_ABORT);
jenv->ReleaseByteArrayElements(endKeyBytes, (jbyte*)endKey, JNI_ABORT);
return (jlong)f;
}
JNIEXPORT jlong JNICALL Java_com_apple_foundationdb_FDBDatabase_Database_1verifyBlobRange(JNIEnv* jenv,
jobject,
jlong dbPtr,
jbyteArray beginKeyBytes,
jbyteArray endKeyBytes,
jlong version) {
if (!dbPtr || !beginKeyBytes || !endKeyBytes) {
throwParamNotNull(jenv);
return 0;
}
FDBDatabase* tr = (FDBDatabase*)dbPtr;
uint8_t* startKey = (uint8_t*)jenv->GetByteArrayElements(beginKeyBytes, JNI_NULL);
if (!startKey) {
if (!jenv->ExceptionOccurred())
throwRuntimeEx(jenv, "Error getting handle to native resources");
return 0;
}
uint8_t* endKey = (uint8_t*)jenv->GetByteArrayElements(endKeyBytes, JNI_NULL);
if (!endKey) {
jenv->ReleaseByteArrayElements(beginKeyBytes, (jbyte*)startKey, JNI_ABORT);
if (!jenv->ExceptionOccurred())
throwRuntimeEx(jenv, "Error getting handle to native resources");
return 0;
}
FDBFuture* f = fdb_database_list_blobbified_ranges(
tr, startKey, jenv->GetArrayLength(beginKeyBytes), endKey, jenv->GetArrayLength(endKeyBytes), version);
jenv->ReleaseByteArrayElements(beginKeyBytes, (jbyte*)startKey, JNI_ABORT);
jenv->ReleaseByteArrayElements(endKeyBytes, (jbyte*)endKey, JNI_ABORT);
return (jlong)f;
}
JNIEXPORT jboolean JNICALL Java_com_apple_foundationdb_FDB_Error_1predicate(JNIEnv* jenv,
jobject,
jint predicate,
@ -1307,6 +1521,41 @@ Java_com_apple_foundationdb_FDBTransaction_Transaction_1getRangeSplitPoints(JNIE
return (jlong)f;
}
JNIEXPORT jlong JNICALL
Java_com_apple_foundationdb_FDBTransaction_Transaction_1getBlobGranuleRanges(JNIEnv* jenv,
jobject,
jlong tPtr,
jbyteArray beginKeyBytes,
jbyteArray endKeyBytes,
jint rowLimit) {
if (!tPtr || !beginKeyBytes || !endKeyBytes || !rowLimit) {
throwParamNotNull(jenv);
return 0;
}
FDBTransaction* tr = (FDBTransaction*)tPtr;
uint8_t* startKey = (uint8_t*)jenv->GetByteArrayElements(beginKeyBytes, JNI_NULL);
if (!startKey) {
if (!jenv->ExceptionOccurred())
throwRuntimeEx(jenv, "Error getting handle to native resources");
return 0;
}
uint8_t* endKey = (uint8_t*)jenv->GetByteArrayElements(endKeyBytes, JNI_NULL);
if (!endKey) {
jenv->ReleaseByteArrayElements(beginKeyBytes, (jbyte*)startKey, JNI_ABORT);
if (!jenv->ExceptionOccurred())
throwRuntimeEx(jenv, "Error getting handle to native resources");
return 0;
}
FDBFuture* f = fdb_transaction_get_blob_granule_ranges(
tr, startKey, jenv->GetArrayLength(beginKeyBytes), endKey, jenv->GetArrayLength(endKeyBytes), rowLimit);
jenv->ReleaseByteArrayElements(beginKeyBytes, (jbyte*)startKey, JNI_ABORT);
jenv->ReleaseByteArrayElements(endKeyBytes, (jbyte*)endKey, JNI_ABORT);
return (jlong)f;
}
JNIEXPORT void JNICALL Java_com_apple_foundationdb_FDBTransaction_Transaction_1set(JNIEnv* jenv,
jobject,
jlong tPtr,
@ -1746,6 +1995,15 @@ jint JNI_OnLoad(JavaVM* vm, void* reserved) {
key_array_result_init = env->GetMethodID(local_key_array_result_class, "<init>", "([B[I)V");
key_array_result_class = (jclass)(env)->NewGlobalRef(local_key_array_result_class);
jclass local_keyrange_class = env->FindClass("com/apple/foundationdb/Range");
keyrange_init = env->GetMethodID(local_keyrange_class, "<init>", "([B[B)V");
keyrange_class = (jclass)(env)->NewGlobalRef(local_keyrange_class);
jclass local_keyrange_array_result_class = env->FindClass("com/apple/foundationdb/KeyRangeArrayResult");
keyrange_array_result_init =
env->GetMethodID(local_keyrange_array_result_class, "<init>", "([Lcom/apple/foundationdb/Range;)V");
keyrange_array_result_class = (jclass)(env)->NewGlobalRef(local_keyrange_array_result_class);
jclass local_range_result_summary_class = env->FindClass("com/apple/foundationdb/RangeResultSummary");
range_result_summary_init = env->GetMethodID(local_range_result_summary_class, "<init>", "([BIZ)V");
range_result_summary_class = (jclass)(env)->NewGlobalRef(local_range_result_summary_class);
@ -1770,6 +2028,12 @@ void JNI_OnUnload(JavaVM* vm, void* reserved) {
if (range_result_class != JNI_NULL) {
env->DeleteGlobalRef(range_result_class);
}
if (keyrange_array_result_class != JNI_NULL) {
env->DeleteGlobalRef(keyrange_array_result_class);
}
if (keyrange_class != JNI_NULL) {
env->DeleteGlobalRef(keyrange_class);
}
if (mapped_range_result_class != JNI_NULL) {
env->DeleteGlobalRef(mapped_range_result_class);
}

View File

@ -161,6 +161,20 @@ public interface Database extends AutoCloseable, TransactionContext {
*/
double getMainThreadBusyness();
/**
* Runs {@link #purgeBlobGranules(Function)} on the default executor.
*
* @param beginKey start of the key range
* @param endKey end of the key range
* @param purgeVersion version to purge at
* @param force if true delete all data, if not keep data >= purgeVersion
*
* @return the key to watch for purge complete
*/
default CompletableFuture<byte[]> purgeBlobGranules(byte[] beginKey, byte[] endKey, long purgeVersion, boolean force) {
return purgeBlobGranules(beginKey, endKey, purgeVersion, force, getExecutor());
}
/**
* Queues a purge of blob granules for the specified key range, at the specified version.
*
@ -168,17 +182,126 @@ public interface Database extends AutoCloseable, TransactionContext {
* @param endKey end of the key range
* @param purgeVersion version to purge at
* @param force if true delete all data, if not keep data >= purgeVersion
* @param e the {@link Executor} to use for asynchronous callbacks
* @return the key to watch for purge complete
*/
CompletableFuture<byte[]> purgeBlobGranules(byte[] beginKey, byte[] endKey, long purgeVersion, boolean force, Executor e);
/**
* Wait for a previous call to purgeBlobGranules to complete
* Runs {@link #waitPurgeGranulesComplete(Function)} on the default executor.
*
* @param purgeKey key to watch
*/
default CompletableFuture<Void> waitPurgeGranulesComplete(byte[] purgeKey) {
return waitPurgeGranulesComplete(purgeKey, getExecutor());
}
/**
* Wait for a previous call to purgeBlobGranules to complete.
*
* @param purgeKey key to watch
* @param e the {@link Executor} to use for asynchronous callbacks
*/
CompletableFuture<Void> waitPurgeGranulesComplete(byte[] purgeKey, Executor e);
/**
* Runs {@link #blobbifyRange(Function)} on the default executor.
*
* @param beginKey start of the key range
* @param endKey end of the key range
* @return if the recording of the range was successful
*/
default CompletableFuture<Boolean> blobbifyRange(byte[] beginKey, byte[] endKey) {
return blobbifyRange(beginKey, endKey, getExecutor());
}
/**
* Sets a range to be blobbified in the database. Must be a completely unblobbified range.
*
* @param beginKey start of the key range
* @param endKey end of the key range
* @param e the {@link Executor} to use for asynchronous callbacks
* @return if the recording of the range was successful
*/
CompletableFuture<Boolean> blobbifyRange(byte[] beginKey, byte[] endKey, Executor e);
/**
* Runs {@link #unblobbifyRange(Function)} on the default executor.
*
* @param beginKey start of the key range
* @param endKey end of the key range
* @return if the recording of the range was successful
*/
default CompletableFuture<Boolean> unblobbifyRange(byte[] beginKey, byte[] endKey) {
return unblobbifyRange(beginKey, endKey, getExecutor());
}
/**
* Sets a range to be unblobbified in the database.
*
* @param beginKey start of the key range
* @param endKey end of the key range
* @param e the {@link Executor} to use for asynchronous callbacks
* @return if the recording of the range was successful
*/
CompletableFuture<Boolean> unblobbifyRange(byte[] beginKey, byte[] endKey, Executor e);
/**
* Runs {@link #listBlobbifiedRanges(Function)} on the default executor.
*
* @param beginKey start of the key range
* @param endKey end of the key range
* @param rangeLimit batch size
* @param e the {@link Executor} to use for asynchronous callbacks
* @return a future with the list of blobbified ranges.
*/
default CompletableFuture<KeyRangeArrayResult> listBlobbifiedRanges(byte[] beginKey, byte[] endKey, int rangeLimit) {
return listBlobbifiedRanges(beginKey, endKey, rangeLimit, getExecutor());
}
/**
* Lists blobbified ranges in the database. There may be more if result.size() == rangeLimit.
*
* @param beginKey start of the key range
* @param endKey end of the key range
* @param rangeLimit batch size
* @param e the {@link Executor} to use for asynchronous callbacks
* @return a future with the list of blobbified ranges.
*/
CompletableFuture<KeyRangeArrayResult> listBlobbifiedRanges(byte[] beginKey, byte[] endKey, int rangeLimit, Executor e);
/**
* Runs {@link #verifyBlobRange(Function)} on the default executor.
*
* @param beginKey start of the key range
* @param endKey end of the key range
* @param version version to read at
*
* @return a future with the version of the last blob granule.
*/
default CompletableFuture<Long> verifyBlobRange(byte[] beginKey, byte[] endKey, long version) {
return verifyBlobRange(beginKey, endKey, version, getExecutor());
}
/**
* Checks if a blob range is blobbified.
*
* @param beginKey start of the key range
* @param endKey end of the key range
* @param version version to read at
*
* @return a future with the version of the last blob granule.
*/
CompletableFuture<Long> verifyBlobRange(byte[] beginKey, byte[] endKey, long version, Executor e);
/**
* Runs a read-only transactional function against this {@code Database} with retry logic.
* {@link Function#apply(Object) apply(ReadTransaction)} will be called on the

View File

@ -201,20 +201,60 @@ class FDBDatabase extends NativeObjectWrapper implements Database, OptionConsume
}
@Override
public CompletableFuture<byte[]> purgeBlobGranules(byte[] beginKey, byte[] endKey, long purgeVersion, boolean force, Executor executor) {
public CompletableFuture<byte[]> purgeBlobGranules(byte[] beginKey, byte[] endKey, long purgeVersion, boolean force, Executor e) {
pointerReadLock.lock();
try {
return new FutureKey(Database_purgeBlobGranules(getPtr(), beginKey, endKey, purgeVersion, force), executor, eventKeeper);
return new FutureKey(Database_purgeBlobGranules(getPtr(), beginKey, endKey, purgeVersion, force), e, eventKeeper);
} finally {
pointerReadLock.unlock();
}
}
@Override
public CompletableFuture<Void> waitPurgeGranulesComplete(byte[] purgeKey, Executor executor) {
public CompletableFuture<Void> waitPurgeGranulesComplete(byte[] purgeKey, Executor e) {
pointerReadLock.lock();
try {
return new FutureVoid(Database_waitPurgeGranulesComplete(getPtr(), purgeKey), executor);
return new FutureVoid(Database_waitPurgeGranulesComplete(getPtr(), purgeKey), e);
} finally {
pointerReadLock.unlock();
}
}
@Override
public CompletableFuture<Boolean> blobbifyRange(byte[] beginKey, byte[] endKey, Executor e) {
pointerReadLock.lock();
try {
return new FutureBool(Database_blobbifyRange(getPtr(), beginKey, endKey), e);
} finally {
pointerReadLock.unlock();
}
}
@Override
public CompletableFuture<Boolean> unblobbifyRange(byte[] beginKey, byte[] endKey, Executor e) {
pointerReadLock.lock();
try {
return new FutureBool(Database_unblobbifyRange(getPtr(), beginKey, endKey), e);
} finally {
pointerReadLock.unlock();
}
}
@Override
public CompletableFuture<KeyRangeArrayResult> listBlobbifiedRanges(byte[] beginKey, byte[] endKey, int rangeLimit, Executor e) {
pointerReadLock.lock();
try {
return new FutureKeyRangeArray(Database_listBlobbifiedRanges(getPtr(), beginKey, endKey, rangeLimit), e);
} finally {
pointerReadLock.unlock();
}
}
@Override
public CompletableFuture<Long> verifyBlobRange(byte[] beginKey, byte[] endKey, long version, Executor e) {
pointerReadLock.lock();
try {
return new FutureInt64(Database_verifyBlobRange(getPtr(), beginKey, endKey, version), e);
} finally {
pointerReadLock.unlock();
}
@ -237,4 +277,8 @@ class FDBDatabase extends NativeObjectWrapper implements Database, OptionConsume
private native double Database_getMainThreadBusyness(long cPtr);
private native long Database_purgeBlobGranules(long cPtr, byte[] beginKey, byte[] endKey, long purgeVersion, boolean force);
private native long Database_waitPurgeGranulesComplete(long cPtr, byte[] purgeKey);
private native long Database_blobbifyRange(long cPtr, byte[] beginKey, byte[] endKey);
private native long Database_unblobbifyRange(long cPtr, byte[] beginKey, byte[] endKey);
private native long Database_listBlobbifiedRanges(long cPtr, byte[] beginKey, byte[] endKey, int rangeLimit);
private native long Database_verifyBlobRange(long cPtr, byte[] beginKey, byte[] endKey, long version);
}

View File

@ -97,6 +97,11 @@ class FDBTransaction extends NativeObjectWrapper implements Transaction, OptionC
return FDBTransaction.this.getRangeSplitPoints(range, chunkSize);
}
@Override
public CompletableFuture<KeyRangeArrayResult> getBlobGranuleRanges(byte[] begin, byte[] end, int rowLimit) {
return FDBTransaction.this.getBlobGranuleRanges(begin, end, rowLimit);
}
@Override
public AsyncIterable<MappedKeyValue> getMappedRange(KeySelector begin, KeySelector end, byte[] mapper,
int limit, int matchIndex, boolean reverse,
@ -352,6 +357,16 @@ class FDBTransaction extends NativeObjectWrapper implements Transaction, OptionC
return this.getRangeSplitPoints(range.begin, range.end, chunkSize);
}
@Override
public CompletableFuture<KeyRangeArrayResult> getBlobGranuleRanges(byte[] begin, byte[] end, int rowLimit) {
pointerReadLock.lock();
try {
return new FutureKeyRangeArray(Transaction_getBlobGranuleRanges(getPtr(), begin, end, rowLimit), executor);
} finally {
pointerReadLock.unlock();
}
}
@Override
public AsyncIterable<MappedKeyValue> getMappedRange(KeySelector begin, KeySelector end, byte[] mapper, int limit,
int matchIndex, boolean reverse, StreamingMode mode) {
@ -842,4 +857,5 @@ class FDBTransaction extends NativeObjectWrapper implements Transaction, OptionC
private native long Transaction_getKeyLocations(long cPtr, byte[] key);
private native long Transaction_getEstimatedRangeSizeBytes(long cPtr, byte[] keyBegin, byte[] keyEnd);
private native long Transaction_getRangeSplitPoints(long cPtr, byte[] keyBegin, byte[] keyEnd, long chunkSize);
private native long Transaction_getBlobGranuleRanges(long cPtr, byte[] keyBegin, byte[] keyEnd, int rowLimit);
}

View File

@ -0,0 +1,37 @@
/*
* FutureBool.java
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2019 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.apple.foundationdb;
import java.util.concurrent.Executor;
class FutureBool extends NativeFuture<Boolean> {
FutureBool(long cPtr, Executor executor) {
super(cPtr);
registerMarshalCallback(executor);
}
@Override
protected Boolean getIfDone_internal(long cPtr) throws FDBException {
return FutureBool_get(cPtr);
}
private native boolean FutureBool_get(long cPtr) throws FDBException;
}

View File

@ -0,0 +1,37 @@
/*
* FutureKeyRangeArray.java
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2019 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.apple.foundationdb;
import java.util.concurrent.Executor;
class FutureKeyRangeArray extends NativeFuture<KeyRangeArrayResult> {
FutureKeyRangeArray(long cPtr, Executor executor) {
super(cPtr);
registerMarshalCallback(executor);
}
@Override
protected KeyRangeArrayResult getIfDone_internal(long cPtr) throws FDBException {
return FutureKeyRangeArray_get(cPtr);
}
private native KeyRangeArrayResult FutureKeyRangeArray_get(long cPtr) throws FDBException;
}

View File

@ -0,0 +1,36 @@
/*
* KeyRangeArrayResult.java
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2020 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.apple.foundationdb;
import java.util.Arrays;
import java.util.List;
public class KeyRangeArrayResult {
final List<Range> keyRanges;
public KeyRangeArrayResult(Range[] keyRangeArr) {
this.keyRanges = Arrays.asList(keyRangeArr);
}
public List<Range> getKeyRanges() {
return keyRanges;
}
}

View File

@ -513,6 +513,17 @@ public interface ReadTransaction extends ReadTransactionContext {
*/
CompletableFuture<KeyArrayResult> getRangeSplitPoints(Range range, long chunkSize);
/**
* Gets the blob granule ranges for a given region.
* Returned in batches, requires calling again moving the begin key up.
*
* @param begin beginning of the range (inclusive)
* @param end end of the range (exclusive)
* @return list of blob granules in the given range. May not be all.
*/
CompletableFuture<KeyRangeArrayResult> getBlobGranuleRanges(byte[] begin, byte[] end, int rowLimit);
/**
* Returns a set of options that can be set on a {@code Transaction}

View File

@ -302,6 +302,7 @@ namespace SummarizeTest
uniqueFileSet.Add(file.Substring(0, file.LastIndexOf("-"))); // all restarting tests end with -1.txt or -2.txt
}
uniqueFiles = uniqueFileSet.ToArray();
Array.Sort(uniqueFiles);
testFile = random.Choice(uniqueFiles);
// The on-disk format changed in 4.0.0, and 5.x can't load files from 3.x.
string oldBinaryVersionLowerBound = "4.0.0";
@ -334,8 +335,9 @@ namespace SummarizeTest
// thus, by definition, if "until_" appears, we do not want to run with the current binary version
oldBinaries = oldBinaries.Concat(currentBinary);
}
List<string> oldBinariesList = oldBinaries.ToList<string>();
if (oldBinariesList.Count == 0) {
string[] oldBinariesList = oldBinaries.ToArray<string>();
Array.Sort(oldBinariesList);
if (oldBinariesList.Count() == 0) {
// In theory, restarting tests are named to have at least one old binary version to run
// But if none of the provided old binaries fall in the range, we just skip the test
Console.WriteLine("No available old binary version from {0} to {1}", oldBinaryVersionLowerBound, oldBinaryVersionUpperBound);
@ -347,6 +349,7 @@ namespace SummarizeTest
else
{
uniqueFiles = Directory.GetFiles(testDir);
Array.Sort(uniqueFiles);
testFile = random.Choice(uniqueFiles);
}
}
@ -718,7 +721,7 @@ namespace SummarizeTest
process.Refresh();
if (process.HasExited)
return;
long mem = process.PrivateMemorySize64;
long mem = process.PagedMemorySize64;
MaxMem = Math.Max(MaxMem, mem);
//Console.WriteLine(string.Format("Process used {0} bytes", MaxMem));
Thread.Sleep(1000);

View File

@ -284,6 +284,12 @@ class ErrorCommitInfo(BaseInfo):
if protocol_version >= PROTOCOL_VERSION_6_3:
self.report_conflicting_keys = bb.get_bool()
if protocol_version >= PROTOCOL_VERSION_7_1:
lock_aware = bb.get_bool()
if bb.get_bool():
spanId = bb.get_bytes(16)
class UnsupportedProtocolVersionError(Exception):
def __init__(self, protocol_version):
super().__init__("Unsupported protocol version 0x%0.2X" % protocol_version)

View File

@ -22,6 +22,8 @@ Each special key that existed before api version 630 is its own module. These ar
#. ``\xff\xff/cluster_file_path`` - See :ref:`cluster file client access <cluster-file-client-access>`
#. ``\xff\xff/status/json`` - See :doc:`Machine-readable status <mr-status>`
#. ``\xff\xff/worker_interfaces`` - key as the worker's network address and value as the serialized ClientWorkerInterface, not transactional
Prior to api version 630, it was also possible to read a range starting at ``\xff\xff/worker_interfaces``. This is mostly an implementation detail of fdbcli,
but it's available in api version 630 as a module with prefix ``\xff\xff/worker_interfaces/``.
@ -210,6 +212,7 @@ that process, and wait for necessary data to be moved away.
#. ``\xff\xff/management/options/failed_locality/force`` Read/write. Setting this key disables safety checks for writes to ``\xff\xff/management/failed_locality/<locality>``. Setting this key only has an effect in the current transaction and is not persisted on commit.
#. ``\xff\xff/management/tenant/map/<tenant>`` Read/write. Setting a key in this range to any value will result in a tenant being created with name ``<tenant>``. Clearing a key in this range will delete the tenant with name ``<tenant>``. Reading all or a portion of this range will return the list of tenants currently present in the cluster, excluding any changes in this transaction. Values read in this range will be JSON objects containing the metadata for the associated tenants.
#. ``\xff\xff/management/tenant/rename/<tenant>`` Read/write. Setting a key in this range to an unused tenant name will result in the tenant with the name ``<tenant>`` to be renamed to the value provided. If the rename operation is a transaction retried in a loop, it is possible for the rename to be applied twice, in which case ``tenant_not_found`` or ``tenant_already_exists`` errors may be returned. This can be avoided by checking for the tenant's existence first.
#. ``\xff\xff/management/options/worker_interfaces/verify`` Read/write. Setting this key will add a verification phase in reading ``\xff\xff/worker_interfaces``. Setting this key only has an effect in the current transaction and is not persisted on commit. Try to establish connections with every worker from the list returned by Cluster Controller and only return those workers that the client can connect to. This option is now only used in fdbcli commands ``kill``, ``suspend`` and ``expensive_data_check`` to populate the worker list.
An exclusion is syntactically either an ip address (e.g. ``127.0.0.1``), or
an ip address and port (e.g. ``127.0.0.1:4500``) or any locality (e.g ``locality_dcid:primary-satellite`` or

View File

@ -23,6 +23,7 @@
#include "fdbclient/FDBOptions.g.h"
#include "fdbclient/IClientApi.h"
#include "fdbclient/ManagementAPI.actor.h"
#include "fdbclient/NativeAPI.actor.h"
#include "flow/Arena.h"
#include "flow/FastRef.h"
@ -31,33 +32,6 @@
namespace {
// copy to standalones for krm
ACTOR Future<Void> setBlobRange(Database db, Key startKey, Key endKey, Value value) {
state Reference<ReadYourWritesTransaction> tr = makeReference<ReadYourWritesTransaction>(db);
loop {
try {
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
tr->setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
// FIXME: check that the set range is currently inactive, and that a revoked range is currently its own
// range in the map and fully set.
tr->set(blobRangeChangeKey, deterministicRandom()->randomUniqueID().toString());
// This is not coalescing because we want to keep each range logically separate.
wait(krmSetRange(tr, blobRangeKeys.begin, KeyRange(KeyRangeRef(startKey, endKey)), value));
wait(tr->commit());
printf("Successfully updated blob range [%s - %s) to %s\n",
startKey.printable().c_str(),
endKey.printable().c_str(),
value.printable().c_str());
return Void();
} catch (Error& e) {
wait(tr->onError(e));
}
}
}
ACTOR Future<Version> getLatestReadVersion(Database db) {
state Transaction tr(db);
loop {
@ -99,65 +73,10 @@ ACTOR Future<Void> doBlobPurge(Database db, Key startKey, Key endKey, Optional<V
return Void();
}
ACTOR Future<Version> checkBlobSubrange(Database db, KeyRange keyRange, Optional<Version> version) {
state Transaction tr(db);
state Version readVersionOut = invalidVersion;
loop {
try {
wait(success(tr.readBlobGranules(keyRange, 0, version, &readVersionOut)));
return readVersionOut;
} catch (Error& e) {
wait(tr.onError(e));
}
}
}
ACTOR Future<Void> doBlobCheck(Database db, Key startKey, Key endKey, Optional<Version> version) {
state Transaction tr(db);
state Version readVersionOut = invalidVersion;
state double elapsed = -timer_monotonic();
state KeyRange range = KeyRange(KeyRangeRef(startKey, endKey));
state Standalone<VectorRef<KeyRangeRef>> allRanges;
loop {
try {
wait(store(allRanges, tr.getBlobGranuleRanges(range)));
break;
} catch (Error& e) {
wait(tr.onError(e));
}
}
if (allRanges.empty()) {
fmt::print("ERROR: No blob ranges for [{0} - {1})\n", startKey.printable(), endKey.printable());
return Void();
}
fmt::print("Loaded {0} blob ranges to check\n", allRanges.size());
state std::vector<Future<Version>> checkParts;
// Chunk up to smaller ranges than this limit. Must be smaller than BG_TOO_MANY_GRANULES to not hit the limit
int maxChunkSize = CLIENT_KNOBS->BG_TOO_MANY_GRANULES / 2;
KeyRange currentChunk;
int currentChunkSize = 0;
for (auto& it : allRanges) {
if (currentChunkSize == maxChunkSize) {
checkParts.push_back(checkBlobSubrange(db, currentChunk, version));
currentChunkSize = 0;
}
if (currentChunkSize == 0) {
currentChunk = it;
} else if (it.begin != currentChunk.end) {
fmt::print("ERROR: Blobrange check failed, gap in blob ranges from [{0} - {1})\n",
currentChunk.end.printable(),
it.begin.printable());
return Void();
} else {
currentChunk = KeyRangeRef(currentChunk.begin, it.end);
}
currentChunkSize++;
}
checkParts.push_back(checkBlobSubrange(db, currentChunk, version));
wait(waitForAll(checkParts));
readVersionOut = checkParts.back().get();
state Version readVersionOut = wait(db->verifyBlobRange(KeyRangeRef(startKey, endKey), version));
elapsed += timer_monotonic();
@ -201,7 +120,7 @@ ACTOR Future<bool> blobRangeCommandActor(Database localDb,
fmt::print("Invalid blob range [{0} - {1})\n", tokens[2].printable(), tokens[3].printable());
} else {
if (tokencmp(tokens[1], "start") || tokencmp(tokens[1], "stop")) {
bool starting = tokencmp(tokens[1], "start");
state bool starting = tokencmp(tokens[1], "start");
if (tokens.size() > 4) {
printUsage(tokens[0]);
return false;
@ -210,7 +129,19 @@ ACTOR Future<bool> blobRangeCommandActor(Database localDb,
starting ? "Starting" : "Stopping",
tokens[2].printable().c_str(),
tokens[3].printable().c_str());
wait(setBlobRange(localDb, begin, end, starting ? LiteralStringRef("1") : StringRef()));
state bool success = false;
if (starting) {
wait(store(success, localDb->blobbifyRange(KeyRangeRef(begin, end))));
} else {
wait(store(success, localDb->unblobbifyRange(KeyRangeRef(begin, end))));
}
if (!success) {
fmt::print("{0} blobbify range for [{1} - {2}) failed\n",
starting ? "Starting" : "Stopping",
tokens[2].printable().c_str(),
tokens[3].printable().c_str());
}
return success;
} else if (tokencmp(tokens[1], "purge") || tokencmp(tokens[1], "forcepurge") || tokencmp(tokens[1], "check")) {
bool purge = tokencmp(tokens[1], "purge") || tokencmp(tokens[1], "forcepurge");
bool forcePurge = tokencmp(tokens[1], "forcepurge");

View File

@ -46,7 +46,7 @@ ACTOR Future<bool> expensiveDataCheckCommandActor(
if (tokens.size() == 1) {
// initialize worker interfaces
address_interface->clear();
wait(getWorkerInterfaces(tr, address_interface));
wait(getWorkerInterfaces(tr, address_interface, true));
}
if (tokens.size() == 1 || tokencmp(tokens[1], "list")) {
if (address_interface->size() == 0) {

View File

@ -44,7 +44,7 @@ ACTOR Future<bool> killCommandActor(Reference<IDatabase> db,
if (tokens.size() == 1) {
// initialize worker interfaces
address_interface->clear();
wait(getWorkerInterfaces(tr, address_interface));
wait(getWorkerInterfaces(tr, address_interface, true));
}
if (tokens.size() == 1 || tokencmp(tokens[1], "list")) {
if (address_interface->size() == 0) {

View File

@ -43,7 +43,7 @@ ACTOR Future<bool> suspendCommandActor(Reference<IDatabase> db,
if (tokens.size() == 1) {
// initialize worker interfaces
address_interface->clear();
wait(getWorkerInterfaces(tr, address_interface));
wait(getWorkerInterfaces(tr, address_interface, true));
if (address_interface->size() == 0) {
printf("\nNo addresses can be suspended.\n");
} else if (address_interface->size() == 1) {

View File

@ -62,56 +62,52 @@ ACTOR Future<std::string> getSpecialKeysFailureErrorMessage(Reference<ITransacti
return valueObj["message"].get_str();
}
ACTOR Future<Void> verifyAndAddInterface(std::map<Key, std::pair<Value, ClientLeaderRegInterface>>* address_interface,
Reference<FlowLock> connectLock,
KeyValue kv) {
wait(connectLock->take());
state FlowLock::Releaser releaser(*connectLock);
state ClientWorkerInterface workerInterf;
try {
// the interface is back-ward compatible, thus if parsing failed, it needs to upgrade cli version
workerInterf = BinaryReader::fromStringRef<ClientWorkerInterface>(kv.value, IncludeVersion());
} catch (Error& e) {
fprintf(stderr, "Error: %s; CLI version is too old, please update to use a newer version\n", e.what());
return Void();
}
state ClientLeaderRegInterface leaderInterf(workerInterf.address());
choose {
when(Optional<LeaderInfo> rep =
wait(brokenPromiseToNever(leaderInterf.getLeader.getReply(GetLeaderRequest())))) {
StringRef ip_port =
(kv.key.endsWith(LiteralStringRef(":tls")) ? kv.key.removeSuffix(LiteralStringRef(":tls")) : kv.key)
.removePrefix(LiteralStringRef("\xff\xff/worker_interfaces/"));
(*address_interface)[ip_port] = std::make_pair(kv.value, leaderInterf);
if (workerInterf.reboot.getEndpoint().addresses.secondaryAddress.present()) {
Key full_ip_port2 =
StringRef(workerInterf.reboot.getEndpoint().addresses.secondaryAddress.get().toString());
StringRef ip_port2 = full_ip_port2.endsWith(LiteralStringRef(":tls"))
? full_ip_port2.removeSuffix(LiteralStringRef(":tls"))
: full_ip_port2;
(*address_interface)[ip_port2] = std::make_pair(kv.value, leaderInterf);
}
void addInterfacesFromKVs(RangeResult& kvs,
std::map<Key, std::pair<Value, ClientLeaderRegInterface>>* address_interface) {
for (const auto& kv : kvs) {
ClientWorkerInterface workerInterf;
try {
// the interface is back-ward compatible, thus if parsing failed, it needs to upgrade cli version
workerInterf = BinaryReader::fromStringRef<ClientWorkerInterface>(kv.value, IncludeVersion());
} catch (Error& e) {
fprintf(stderr, "Error: %s; CLI version is too old, please update to use a newer version\n", e.what());
return;
}
ClientLeaderRegInterface leaderInterf(workerInterf.address());
StringRef ip_port =
(kv.key.endsWith(LiteralStringRef(":tls")) ? kv.key.removeSuffix(LiteralStringRef(":tls")) : kv.key)
.removePrefix(LiteralStringRef("\xff\xff/worker_interfaces/"));
(*address_interface)[ip_port] = std::make_pair(kv.value, leaderInterf);
if (workerInterf.reboot.getEndpoint().addresses.secondaryAddress.present()) {
Key full_ip_port2 =
StringRef(workerInterf.reboot.getEndpoint().addresses.secondaryAddress.get().toString());
StringRef ip_port2 = full_ip_port2.endsWith(LiteralStringRef(":tls"))
? full_ip_port2.removeSuffix(LiteralStringRef(":tls"))
: full_ip_port2;
(*address_interface)[ip_port2] = std::make_pair(kv.value, leaderInterf);
}
when(wait(delay(CLIENT_KNOBS->CLI_CONNECT_TIMEOUT))) {}
}
return Void();
}
ACTOR Future<Void> getWorkerInterfaces(Reference<ITransaction> tr,
std::map<Key, std::pair<Value, ClientLeaderRegInterface>>* address_interface) {
std::map<Key, std::pair<Value, ClientLeaderRegInterface>>* address_interface,
bool verify) {
if (verify) {
tr->setOption(FDBTransactionOptions::SPECIAL_KEY_SPACE_ENABLE_WRITES);
tr->set(workerInterfacesVerifyOptionSpecialKey, ValueRef());
}
// Hold the reference to the standalone's memory
state ThreadFuture<RangeResult> kvsFuture = tr->getRange(
KeyRangeRef(LiteralStringRef("\xff\xff/worker_interfaces/"), LiteralStringRef("\xff\xff/worker_interfaces0")),
CLIENT_KNOBS->TOO_MANY);
RangeResult kvs = wait(safeThreadFutureToFuture(kvsFuture));
state RangeResult kvs = wait(safeThreadFutureToFuture(kvsFuture));
ASSERT(!kvs.more);
auto connectLock = makeReference<FlowLock>(CLIENT_KNOBS->CLI_CONNECT_PARALLELISM);
std::vector<Future<Void>> addInterfs;
for (auto it : kvs) {
addInterfs.push_back(verifyAndAddInterface(address_interface, connectLock, it));
if (verify) {
// remove the option if set
tr->clear(workerInterfacesVerifyOptionSpecialKey);
}
wait(waitForAll(addInterfs));
addInterfacesFromKVs(kvs, address_interface);
return Void();
}

View File

@ -1050,36 +1050,6 @@ Future<T> stopNetworkAfter(Future<T> what) {
}
}
ACTOR Future<Void> addInterface(std::map<Key, std::pair<Value, ClientLeaderRegInterface>>* address_interface,
Reference<FlowLock> connectLock,
KeyValue kv) {
wait(connectLock->take());
state FlowLock::Releaser releaser(*connectLock);
state ClientWorkerInterface workerInterf =
BinaryReader::fromStringRef<ClientWorkerInterface>(kv.value, IncludeVersion());
state ClientLeaderRegInterface leaderInterf(workerInterf.address());
choose {
when(Optional<LeaderInfo> rep =
wait(brokenPromiseToNever(leaderInterf.getLeader.getReply(GetLeaderRequest())))) {
StringRef ip_port =
(kv.key.endsWith(LiteralStringRef(":tls")) ? kv.key.removeSuffix(LiteralStringRef(":tls")) : kv.key)
.removePrefix(LiteralStringRef("\xff\xff/worker_interfaces/"));
(*address_interface)[ip_port] = std::make_pair(kv.value, leaderInterf);
if (workerInterf.reboot.getEndpoint().addresses.secondaryAddress.present()) {
Key full_ip_port2 =
StringRef(workerInterf.reboot.getEndpoint().addresses.secondaryAddress.get().toString());
StringRef ip_port2 = full_ip_port2.endsWith(LiteralStringRef(":tls"))
? full_ip_port2.removeSuffix(LiteralStringRef(":tls"))
: full_ip_port2;
(*address_interface)[ip_port2] = std::make_pair(kv.value, leaderInterf);
}
}
when(wait(delay(CLIENT_KNOBS->CLI_CONNECT_TIMEOUT))) {}
}
return Void();
}
ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise) {
state LineNoise& linenoise = *plinenoise;
state bool intrans = false;

View File

@ -120,6 +120,7 @@ extern const KeyRangeRef processClassSourceSpecialKeyRange;
extern const KeyRangeRef processClassTypeSpecialKeyRange;
// Other special keys
inline const KeyRef errorMsgSpecialKey = LiteralStringRef("\xff\xff/error_message");
inline const KeyRef workerInterfacesVerifyOptionSpecialKey = "\xff\xff/management/options/worker_interfaces/verify"_sr;
// help functions (Copied from fdbcli.actor.cpp)
// get all workers' info
@ -132,13 +133,14 @@ void printUsage(StringRef command);
// Pre: tr failed with special_keys_api_failure error
// Read the error message special key and return the message
ACTOR Future<std::string> getSpecialKeysFailureErrorMessage(Reference<ITransaction> tr);
// Using \xff\xff/worker_interfaces/ special key, get all worker interfaces
// Using \xff\xff/worker_interfaces/ special key, get all worker interfaces.
// A worker list will be returned from CC.
// If verify, we will try to establish connections to all workers returned.
// In particular, it will deserialize \xff\xff/worker_interfaces/<address>:=<ClientInterface> kv pairs and issue RPC
// calls, then only return interfaces(kv pairs) the client can talk to
ACTOR Future<Void> getWorkerInterfaces(Reference<ITransaction> tr,
std::map<Key, std::pair<Value, ClientLeaderRegInterface>>* address_interface);
// Deserialize \xff\xff/worker_interfaces/<address>:=<ClientInterface> k-v pair and verify by a RPC call
ACTOR Future<Void> verifyAndAddInterface(std::map<Key, std::pair<Value, ClientLeaderRegInterface>>* address_interface,
Reference<FlowLock> connectLock,
KeyValue kv);
std::map<Key, std::pair<Value, ClientLeaderRegInterface>>* address_interface,
bool verify = false);
// print cluster status info
void printStatus(StatusObjectReader statusObj,
StatusClient::StatusLevel level,

View File

@ -40,6 +40,7 @@
#include <cstring>
#include <fstream> // for perf microbenchmark
#include <limits>
#include <vector>
#define BG_READ_DEBUG false
@ -209,16 +210,21 @@ namespace {
BlobGranuleFileEncryptionKeys getEncryptBlobCipherKey(const BlobGranuleCipherKeysCtx cipherKeysCtx) {
BlobGranuleFileEncryptionKeys eKeys;
// Cipher key reconstructed is 'never' inserted into BlobCipherKey cache, choose 'neverExpire'
eKeys.textCipherKey = makeReference<BlobCipherKey>(cipherKeysCtx.textCipherKey.encryptDomainId,
cipherKeysCtx.textCipherKey.baseCipherId,
cipherKeysCtx.textCipherKey.baseCipher.begin(),
cipherKeysCtx.textCipherKey.baseCipher.size(),
cipherKeysCtx.textCipherKey.salt);
cipherKeysCtx.textCipherKey.salt,
std::numeric_limits<int64_t>::max(),
std::numeric_limits<int64_t>::max());
eKeys.headerCipherKey = makeReference<BlobCipherKey>(cipherKeysCtx.headerCipherKey.encryptDomainId,
cipherKeysCtx.headerCipherKey.baseCipherId,
cipherKeysCtx.headerCipherKey.baseCipher.begin(),
cipherKeysCtx.headerCipherKey.baseCipher.size(),
cipherKeysCtx.headerCipherKey.salt);
cipherKeysCtx.headerCipherKey.salt,
std::numeric_limits<int64_t>::max(),
std::numeric_limits<int64_t>::max());
return eKeys;
}

View File

@ -60,6 +60,7 @@ void ClientKnobs::initialize(Randomize randomize) {
init( WRONG_SHARD_SERVER_DELAY, .01 ); if( randomize && BUGGIFY ) WRONG_SHARD_SERVER_DELAY = deterministicRandom()->random01(); // FLOW_KNOBS->PREVENT_FAST_SPIN_DELAY; // SOMEDAY: This delay can limit performance of retrieving data when the cache is mostly wrong (e.g. dumping the database after a test)
init( FUTURE_VERSION_RETRY_DELAY, .01 ); if( randomize && BUGGIFY ) FUTURE_VERSION_RETRY_DELAY = deterministicRandom()->random01();// FLOW_KNOBS->PREVENT_FAST_SPIN_DELAY;
init( GRV_ERROR_RETRY_DELAY, 5.0 ); if( randomize && BUGGIFY ) GRV_ERROR_RETRY_DELAY = 0.01 + 5 * deterministicRandom()->random01();
init( UNKNOWN_TENANT_RETRY_DELAY, 0.0 ); if( randomize && BUGGIFY ) UNKNOWN_TENANT_RETRY_DELAY = deterministicRandom()->random01();
init( REPLY_BYTE_LIMIT, 80000 );
init( DEFAULT_BACKOFF, .01 ); if( randomize && BUGGIFY ) DEFAULT_BACKOFF = deterministicRandom()->random01();

View File

@ -663,69 +663,43 @@ ACTOR Future<Void> asyncDeserializeClusterInterface(Reference<AsyncVar<Value>> s
}
}
struct ClientStatusStats {
int count;
std::vector<std::pair<NetworkAddress, Key>> examples;
namespace {
ClientStatusStats() : count(0) { examples.reserve(CLIENT_KNOBS->CLIENT_EXAMPLE_AMOUNT); }
};
void tryInsertIntoSamples(OpenDatabaseRequest::Samples& samples,
const NetworkAddress& networkAddress,
const Key& traceLogGroup) {
++samples.count;
if (samples.samples.size() < static_cast<size_t>(CLIENT_KNOBS->CLIENT_EXAMPLE_AMOUNT)) {
samples.samples.insert({ networkAddress, traceLogGroup });
}
}
} // namespace
OpenDatabaseRequest ClientData::getRequest() {
OpenDatabaseRequest req;
std::map<StringRef, ClientStatusStats> issueMap;
std::map<ClientVersionRef, ClientStatusStats> versionMap;
std::map<StringRef, ClientStatusStats> maxProtocolMap;
int clientCount = 0;
// SOMEDAY: add a yield in this loop
for (auto& ci : clientStatusInfoMap) {
for (auto& it : ci.second.issues) {
auto& entry = issueMap[it];
entry.count++;
if (entry.examples.size() < CLIENT_KNOBS->CLIENT_EXAMPLE_AMOUNT) {
entry.examples.emplace_back(ci.first, ci.second.traceLogGroup);
}
}
if (ci.second.versions.size()) {
clientCount++;
StringRef maxProtocol;
for (auto& it : ci.second.versions) {
maxProtocol = std::max(maxProtocol, it.protocolVersion);
auto& entry = versionMap[it];
entry.count++;
if (entry.examples.size() < CLIENT_KNOBS->CLIENT_EXAMPLE_AMOUNT) {
entry.examples.emplace_back(ci.first, ci.second.traceLogGroup);
}
}
auto& maxEntry = maxProtocolMap[maxProtocol];
maxEntry.count++;
if (maxEntry.examples.size() < CLIENT_KNOBS->CLIENT_EXAMPLE_AMOUNT) {
maxEntry.examples.emplace_back(ci.first, ci.second.traceLogGroup);
}
} else {
auto& entry = versionMap[ClientVersionRef()];
entry.count++;
if (entry.examples.size() < CLIENT_KNOBS->CLIENT_EXAMPLE_AMOUNT) {
entry.examples.emplace_back(ci.first, ci.second.traceLogGroup);
}
}
}
const auto& networkAddress = ci.first;
const auto& traceLogGroup = ci.second.traceLogGroup;
req.issues.reserve(issueMap.size());
for (auto& it : issueMap) {
req.issues.push_back(ItemWithExamples<Key>(it.first, it.second.count, it.second.examples));
for (auto& issue : ci.second.issues) {
tryInsertIntoSamples(req.issues[issue], networkAddress, traceLogGroup);
}
if (!ci.second.versions.size()) {
tryInsertIntoSamples(req.supportedVersions[ClientVersionRef()], networkAddress, traceLogGroup);
continue;
}
++req.clientCount;
StringRef maxProtocol;
for (auto& it : ci.second.versions) {
maxProtocol = std::max(maxProtocol, it.protocolVersion);
tryInsertIntoSamples(req.supportedVersions[it], networkAddress, traceLogGroup);
}
tryInsertIntoSamples(req.maxProtocolSupported[maxProtocol], networkAddress, traceLogGroup);
}
req.supportedVersions.reserve(versionMap.size());
for (auto& it : versionMap) {
req.supportedVersions.push_back(
ItemWithExamples<Standalone<ClientVersionRef>>(it.first, it.second.count, it.second.examples));
}
req.maxProtocolSupported.reserve(maxProtocolMap.size());
for (auto& it : maxProtocolMap) {
req.maxProtocolSupported.push_back(ItemWithExamples<Key>(it.first, it.second.count, it.second.examples));
}
req.clientCount = clientCount;
return req;
}

View File

@ -257,13 +257,14 @@ ThreadFuture<Standalone<VectorRef<KeyRef>>> DLTransaction::getRangeSplitPoints(c
});
}
ThreadFuture<Standalone<VectorRef<KeyRangeRef>>> DLTransaction::getBlobGranuleRanges(const KeyRangeRef& keyRange) {
ThreadFuture<Standalone<VectorRef<KeyRangeRef>>> DLTransaction::getBlobGranuleRanges(const KeyRangeRef& keyRange,
int rangeLimit) {
if (!api->transactionGetBlobGranuleRanges) {
return unsupported_operation();
}
FdbCApi::FDBFuture* f = api->transactionGetBlobGranuleRanges(
tr, keyRange.begin.begin(), keyRange.begin.size(), keyRange.end.begin(), keyRange.end.size());
tr, keyRange.begin.begin(), keyRange.begin.size(), keyRange.end.begin(), keyRange.end.size(), rangeLimit);
return toThreadFuture<Standalone<VectorRef<KeyRangeRef>>>(api, f, [](FdbCApi::FDBFuture* f, FdbCApi* api) {
const FdbCApi::FDBKeyRange* keyRanges;
int keyRangesLength;
@ -583,6 +584,71 @@ ThreadFuture<Void> DLDatabase::waitPurgeGranulesComplete(const KeyRef& purgeKey)
return toThreadFuture<Void>(api, f, [](FdbCApi::FDBFuture* f, FdbCApi* api) { return Void(); });
}
ThreadFuture<bool> DLDatabase::blobbifyRange(const KeyRangeRef& keyRange) {
if (!api->databaseBlobbifyRange) {
return unsupported_operation();
}
FdbCApi::FDBFuture* f = api->databaseBlobbifyRange(
db, keyRange.begin.begin(), keyRange.begin.size(), keyRange.end.begin(), keyRange.end.size());
return toThreadFuture<bool>(api, f, [](FdbCApi::FDBFuture* f, FdbCApi* api) {
bool ret = false;
ASSERT(!api->futureGetBool(f, &ret));
return ret;
});
}
ThreadFuture<bool> DLDatabase::unblobbifyRange(const KeyRangeRef& keyRange) {
if (!api->databaseUnblobbifyRange) {
return unsupported_operation();
}
FdbCApi::FDBFuture* f = api->databaseUnblobbifyRange(
db, keyRange.begin.begin(), keyRange.begin.size(), keyRange.end.begin(), keyRange.end.size());
return toThreadFuture<bool>(api, f, [](FdbCApi::FDBFuture* f, FdbCApi* api) {
bool ret = false;
ASSERT(!api->futureGetBool(f, &ret));
return ret;
});
}
ThreadFuture<Standalone<VectorRef<KeyRangeRef>>> DLDatabase::listBlobbifiedRanges(const KeyRangeRef& keyRange,
int rangeLimit) {
if (!api->databaseListBlobbifiedRanges) {
return unsupported_operation();
}
FdbCApi::FDBFuture* f = api->databaseListBlobbifiedRanges(
db, keyRange.begin.begin(), keyRange.begin.size(), keyRange.end.begin(), keyRange.end.size(), rangeLimit);
return toThreadFuture<Standalone<VectorRef<KeyRangeRef>>>(api, f, [](FdbCApi::FDBFuture* f, FdbCApi* api) {
const FdbCApi::FDBKeyRange* keyRanges;
int keyRangesLength;
FdbCApi::fdb_error_t error = api->futureGetKeyRangeArray(f, &keyRanges, &keyRangesLength);
ASSERT(!error);
// The memory for this is stored in the FDBFuture and is released when the future gets destroyed.
return Standalone<VectorRef<KeyRangeRef>>(VectorRef<KeyRangeRef>((KeyRangeRef*)keyRanges, keyRangesLength),
Arena());
});
}
ThreadFuture<Version> DLDatabase::verifyBlobRange(const KeyRangeRef& keyRange, Optional<Version> version) {
if (!api->databaseVerifyBlobRange) {
return unsupported_operation();
}
FdbCApi::FDBFuture* f = api->databaseVerifyBlobRange(
db, keyRange.begin.begin(), keyRange.begin.size(), keyRange.end.begin(), keyRange.end.size(), version);
return toThreadFuture<Version>(api, f, [](FdbCApi::FDBFuture* f, FdbCApi* api) {
Version version = invalidVersion;
ASSERT(!api->futureGetInt64(f, &version));
return version;
});
}
// DLApi
// Loads the specified function from a dynamic library
@ -670,6 +736,13 @@ void DLApi::init() {
fdbCPath,
"fdb_database_wait_purge_granules_complete",
headerVersion >= 710);
loadClientFunction(&api->databaseBlobbifyRange, lib, fdbCPath, "fdb_database_blobbify_range", headerVersion >= 720);
loadClientFunction(
&api->databaseUnblobbifyRange, lib, fdbCPath, "fdb_database_unblobbify_range", headerVersion >= 720);
loadClientFunction(
&api->databaseListBlobbifiedRanges, lib, fdbCPath, "fdb_database_list_blobbified_ranges", headerVersion >= 720);
loadClientFunction(
&api->databaseVerifyBlobRange, lib, fdbCPath, "fdb_database_verify_blob_range", headerVersion >= 720);
loadClientFunction(
&api->tenantCreateTransaction, lib, fdbCPath, "fdb_tenant_create_transaction", headerVersion >= 710);
@ -744,6 +817,7 @@ void DLApi::init() {
fdbCPath,
headerVersion >= 620 ? "fdb_future_get_int64" : "fdb_future_get_version",
headerVersion >= 0);
loadClientFunction(&api->futureGetBool, lib, fdbCPath, "fdb_future_get_bool", headerVersion >= 720);
loadClientFunction(&api->futureGetUInt64, lib, fdbCPath, "fdb_future_get_uint64", headerVersion >= 700);
loadClientFunction(&api->futureGetError, lib, fdbCPath, "fdb_future_get_error", headerVersion >= 0);
loadClientFunction(&api->futureGetKey, lib, fdbCPath, "fdb_future_get_key", headerVersion >= 0);
@ -1079,9 +1153,10 @@ ThreadFuture<Standalone<VectorRef<KeyRef>>> MultiVersionTransaction::getRangeSpl
}
ThreadFuture<Standalone<VectorRef<KeyRangeRef>>> MultiVersionTransaction::getBlobGranuleRanges(
const KeyRangeRef& keyRange) {
const KeyRangeRef& keyRange,
int rangeLimit) {
auto tr = getTransaction();
auto f = tr.transaction ? tr.transaction->getBlobGranuleRanges(keyRange)
auto f = tr.transaction ? tr.transaction->getBlobGranuleRanges(keyRange, rangeLimit)
: makeTimeout<Standalone<VectorRef<KeyRangeRef>>>();
return abortableFuture(f, tr.onChange);
}
@ -1589,6 +1664,32 @@ ThreadFuture<Void> MultiVersionDatabase::waitPurgeGranulesComplete(const KeyRef&
return abortableFuture(f, dbState->dbVar->get().onChange);
}
ThreadFuture<bool> MultiVersionDatabase::blobbifyRange(const KeyRangeRef& keyRange) {
auto dbVar = dbState->dbVar->get();
auto f = dbVar.value ? dbVar.value->blobbifyRange(keyRange) : ThreadFuture<bool>(Never());
return abortableFuture(f, dbVar.onChange);
}
ThreadFuture<bool> MultiVersionDatabase::unblobbifyRange(const KeyRangeRef& keyRange) {
auto dbVar = dbState->dbVar->get();
auto f = dbVar.value ? dbVar.value->unblobbifyRange(keyRange) : ThreadFuture<bool>(Never());
return abortableFuture(f, dbVar.onChange);
}
ThreadFuture<Standalone<VectorRef<KeyRangeRef>>> MultiVersionDatabase::listBlobbifiedRanges(const KeyRangeRef& keyRange,
int rangeLimit) {
auto dbVar = dbState->dbVar->get();
auto f = dbVar.value ? dbVar.value->listBlobbifiedRanges(keyRange, rangeLimit)
: ThreadFuture<Standalone<VectorRef<KeyRangeRef>>>(Never());
return abortableFuture(f, dbVar.onChange);
}
ThreadFuture<Version> MultiVersionDatabase::verifyBlobRange(const KeyRangeRef& keyRange, Optional<Version> version) {
auto dbVar = dbState->dbVar->get();
auto f = dbVar.value ? dbVar.value->verifyBlobRange(keyRange, version) : ThreadFuture<Version>(Never());
return abortableFuture(f, dbVar.onChange);
}
// Returns the protocol version reported by the coordinator this client is connected to
// If an expected version is given, the future won't return until the protocol version is different than expected
// Note: this will never return if the server is running a protocol from FDB 5.0 or older

View File

@ -1279,32 +1279,6 @@ void DatabaseContext::registerSpecialKeysImpl(SpecialKeySpace::MODULE module,
ACTOR Future<RangeResult> getWorkerInterfaces(Reference<IClusterConnectionRecord> clusterRecord);
ACTOR Future<Optional<Value>> getJSON(Database db);
struct WorkerInterfacesSpecialKeyImpl : SpecialKeyRangeReadImpl {
Future<RangeResult> getRange(ReadYourWritesTransaction* ryw,
KeyRangeRef kr,
GetRangeLimits limitsHint) const override {
if (ryw->getDatabase().getPtr() && ryw->getDatabase()->getConnectionRecord()) {
Key prefix = Key(getKeyRange().begin);
return map(getWorkerInterfaces(ryw->getDatabase()->getConnectionRecord()),
[prefix = prefix, kr = KeyRange(kr)](const RangeResult& in) {
RangeResult result;
for (const auto& [k_, v] : in) {
auto k = k_.withPrefix(prefix);
if (kr.contains(k))
result.push_back_deep(result.arena(), KeyValueRef(k, v));
}
std::sort(result.begin(), result.end(), KeyValueRef::OrderByKey{});
return result;
});
} else {
return RangeResult();
}
}
explicit WorkerInterfacesSpecialKeyImpl(KeyRangeRef kr) : SpecialKeyRangeReadImpl(kr) {}
};
struct SingleSpecialKeyImpl : SpecialKeyRangeReadImpl {
Future<RangeResult> getRange(ReadYourWritesTransaction* ryw,
KeyRangeRef kr,
@ -3535,8 +3509,8 @@ ACTOR Future<Key> getKey(Reference<TransactionState> trState,
ACTOR Future<Version> waitForCommittedVersion(Database cx, Version version, SpanContext spanContext) {
state Span span("NAPI:waitForCommittedVersion"_loc, spanContext);
try {
loop {
loop {
try {
choose {
when(wait(cx->onProxiesChanged())) {}
when(GetReadVersionReply v = wait(basicLoadBalance(
@ -3562,10 +3536,16 @@ ACTOR Future<Version> waitForCommittedVersion(Database cx, Version version, Span
wait(delay(CLIENT_KNOBS->FUTURE_VERSION_RETRY_DELAY, cx->taskID));
}
}
} catch (Error& e) {
if (e.code() == error_code_batch_transaction_throttled ||
e.code() == error_code_grv_proxy_memory_limit_exceeded) {
// GRV Proxy returns an error
wait(delayJittered(CLIENT_KNOBS->GRV_ERROR_RETRY_DELAY));
} else {
TraceEvent(SevError, "WaitForCommittedVersionError").error(e);
throw;
}
}
} catch (Error& e) {
TraceEvent(SevError, "WaitForCommittedVersionError").error(e);
throw;
}
}
@ -6774,9 +6754,12 @@ ACTOR Future<GetReadVersionReply> getConsistentReadVersion(SpanContext parentSpa
}
}
} catch (Error& e) {
if (e.code() != error_code_broken_promise && e.code() != error_code_batch_transaction_throttled)
if (e.code() != error_code_broken_promise && e.code() != error_code_batch_transaction_throttled &&
e.code() != error_code_grv_proxy_memory_limit_exceeded)
TraceEvent(SevError, "GetConsistentReadVersionError").error(e);
if (e.code() == error_code_batch_transaction_throttled && !cx->apiVersionAtLeast(630)) {
if ((e.code() == error_code_batch_transaction_throttled ||
e.code() == error_code_grv_proxy_memory_limit_exceeded) &&
!cx->apiVersionAtLeast(630)) {
wait(delayJittered(5.0));
} else {
throw;
@ -7655,7 +7638,9 @@ Future<Standalone<VectorRef<KeyRef>>> Transaction::getRangeSplitPoints(KeyRange
// the blob granule requests are a bit funky because they piggyback off the existing transaction to read from the system
// keyspace
ACTOR Future<Standalone<VectorRef<KeyRangeRef>>> getBlobGranuleRangesActor(Transaction* self, KeyRange keyRange) {
ACTOR Future<Standalone<VectorRef<KeyRangeRef>>> getBlobGranuleRangesActor(Transaction* self,
KeyRange keyRange,
int rangeLimit) {
// FIXME: use streaming range read
state KeyRange currentRange = keyRange;
state Standalone<VectorRef<KeyRangeRef>> results;
@ -7678,7 +7663,7 @@ ACTOR Future<Standalone<VectorRef<KeyRangeRef>>> getBlobGranuleRangesActor(Trans
// basically krmGetRange, but enable it to not use tenant without RAW_ACCESS by doing manual getRange with
// UseTenant::False
GetRangeLimits limits(1000);
GetRangeLimits limits(2 * rangeLimit + 2);
limits.minRows = 2;
RangeResult rawMapping = wait(getRange(self->trState,
self->getReadVersion(),
@ -7700,6 +7685,9 @@ ACTOR Future<Standalone<VectorRef<KeyRangeRef>>> getBlobGranuleRangesActor(Trans
if (blobGranuleMapping[i].value.size()) {
results.push_back(results.arena(),
KeyRangeRef(blobGranuleMapping[i].key, blobGranuleMapping[i + 1].key));
if (results.size() == rangeLimit) {
return results;
}
}
}
results.arena().dependsOn(blobGranuleMapping.arena());
@ -7711,8 +7699,8 @@ ACTOR Future<Standalone<VectorRef<KeyRangeRef>>> getBlobGranuleRangesActor(Trans
}
}
Future<Standalone<VectorRef<KeyRangeRef>>> Transaction::getBlobGranuleRanges(const KeyRange& range) {
return ::getBlobGranuleRangesActor(this, range);
Future<Standalone<VectorRef<KeyRangeRef>>> Transaction::getBlobGranuleRanges(const KeyRange& range, int rangeLimit) {
return ::getBlobGranuleRangesActor(this, range, rangeLimit);
}
// hack (for now) to get blob worker interface into load balance
@ -8024,6 +8012,71 @@ ACTOR Future<Version> setPerpetualStorageWiggle(Database cx, bool enable, LockAw
return version;
}
ACTOR Future<Version> checkBlobSubrange(Database db, KeyRange keyRange, Optional<Version> version) {
state Transaction tr(db);
state Version readVersionOut = invalidVersion;
loop {
try {
wait(success(tr.readBlobGranules(keyRange, 0, version, &readVersionOut)));
return readVersionOut;
} catch (Error& e) {
wait(tr.onError(e));
}
}
}
ACTOR Future<Version> verifyBlobRangeActor(Reference<DatabaseContext> cx, KeyRange range, Optional<Version> version) {
state Database db(cx);
state Transaction tr(db);
state Standalone<VectorRef<KeyRangeRef>> allRanges;
state KeyRange curRegion = KeyRangeRef(range.begin, range.begin);
state Version readVersionOut = invalidVersion;
state int batchSize = CLIENT_KNOBS->BG_TOO_MANY_GRANULES / 2;
loop {
try {
wait(store(allRanges, tr.getBlobGranuleRanges(KeyRangeRef(curRegion.begin, range.end), 20 * batchSize)));
} catch (Error& e) {
wait(tr.onError(e));
}
if (allRanges.empty()) {
if (curRegion.begin < range.end) {
return invalidVersion;
}
return readVersionOut;
}
state std::vector<Future<Version>> checkParts;
// Chunk up to smaller ranges than this limit. Must be smaller than BG_TOO_MANY_GRANULES to not hit the limit
int batchCount = 0;
for (auto& it : allRanges) {
if (it.begin != curRegion.end) {
return invalidVersion;
}
curRegion = KeyRangeRef(curRegion.begin, it.end);
batchCount++;
if (batchCount == batchSize) {
checkParts.push_back(checkBlobSubrange(db, curRegion, version));
batchCount = 0;
curRegion = KeyRangeRef(curRegion.end, curRegion.end);
}
}
if (!curRegion.empty()) {
checkParts.push_back(checkBlobSubrange(db, curRegion, version));
}
wait(waitForAll(checkParts));
readVersionOut = checkParts.back().get();
curRegion = KeyRangeRef(curRegion.end, curRegion.end);
}
}
Future<Version> DatabaseContext::verifyBlobRange(const KeyRange& range, Optional<Version> version) {
return verifyBlobRangeActor(Reference<DatabaseContext>::addRef(this), range, version);
}
ACTOR Future<std::vector<std::pair<UID, StorageWiggleValue>>> readStorageWiggleValues(Database cx,
bool primary,
bool use_system_priority) {
@ -9733,6 +9786,7 @@ Reference<DatabaseContext::TransactionT> DatabaseContext::createTransaction() {
return makeReference<ReadYourWritesTransaction>(Database(Reference<DatabaseContext>::addRef(this)));
}
// BlobGranule API.
ACTOR Future<Key> purgeBlobGranulesActor(Reference<DatabaseContext> db,
KeyRange range,
Version purgeVersion,
@ -9824,6 +9878,89 @@ Future<Void> DatabaseContext::waitPurgeGranulesComplete(Key purgeKey) {
return waitPurgeGranulesCompleteActor(Reference<DatabaseContext>::addRef(this), purgeKey);
}
ACTOR Future<bool> setBlobRangeActor(Reference<DatabaseContext> cx, KeyRange range, bool active) {
state Database db(cx);
state Reference<ReadYourWritesTransaction> tr = makeReference<ReadYourWritesTransaction>(db);
state Value value = active ? blobRangeActive : blobRangeInactive;
loop {
try {
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
tr->setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
if (active) {
state RangeResult results = wait(krmGetRanges(tr, blobRangeKeys.begin, range));
ASSERT(results.size() >= 2);
if (results[0].key == range.begin && results[1].key == range.end &&
results[0].value == blobRangeActive) {
return true;
} else {
for (int i = 0; i < results.size(); i++) {
if (results[i].value == blobRangeActive) {
return false;
}
}
}
}
tr->set(blobRangeChangeKey, deterministicRandom()->randomUniqueID().toString());
// This is not coalescing because we want to keep each range logically separate.
wait(krmSetRange(tr, blobRangeKeys.begin, range, value));
wait(tr->commit());
printf("Successfully updated blob range [%s - %s) to %s\n",
range.begin.printable().c_str(),
range.end.printable().c_str(),
value.printable().c_str());
return true;
} catch (Error& e) {
wait(tr->onError(e));
}
}
}
Future<bool> DatabaseContext::blobbifyRange(KeyRange range) {
return setBlobRangeActor(Reference<DatabaseContext>::addRef(this), range, true);
}
Future<bool> DatabaseContext::unblobbifyRange(KeyRange range) {
return setBlobRangeActor(Reference<DatabaseContext>::addRef(this), range, false);
}
ACTOR Future<Standalone<VectorRef<KeyRangeRef>>> listBlobbifiedRangesActor(Reference<DatabaseContext> cx,
KeyRange range,
int rangeLimit) {
state Database db(cx);
state Reference<ReadYourWritesTransaction> tr = makeReference<ReadYourWritesTransaction>(db);
state Standalone<VectorRef<KeyRangeRef>> blobRanges;
loop {
try {
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
state RangeResult results = wait(krmGetRanges(tr, blobRangeKeys.begin, range, 2 * rangeLimit + 2));
blobRanges.arena().dependsOn(results.arena());
for (int i = 0; i < results.size() - 1; i++) {
if (results[i].value == LiteralStringRef("1")) {
blobRanges.push_back(blobRanges.arena(), KeyRangeRef(results[i].value, results[i + 1].value));
}
if (blobRanges.size() == rangeLimit) {
return blobRanges;
}
}
return blobRanges;
} catch (Error& e) {
wait(tr->onError(e));
}
}
}
Future<Standalone<VectorRef<KeyRangeRef>>> DatabaseContext::listBlobbifiedRanges(KeyRange range, int rowLimit) {
return listBlobbifiedRangesActor(Reference<DatabaseContext>::addRef(this), range, rowLimit);
}
int64_t getMaxKeySize(KeyRef const& key) {
return getMaxWriteKeySize(key, true);
}

View File

@ -1783,7 +1783,8 @@ Future<Standalone<VectorRef<KeyRef>>> ReadYourWritesTransaction::getRangeSplitPo
return waitOrError(tr.getRangeSplitPoints(range, chunkSize), resetPromise.getFuture());
}
Future<Standalone<VectorRef<KeyRangeRef>>> ReadYourWritesTransaction::getBlobGranuleRanges(const KeyRange& range) {
Future<Standalone<VectorRef<KeyRangeRef>>> ReadYourWritesTransaction::getBlobGranuleRanges(const KeyRange& range,
int rangeLimit) {
if (checkUsedDuringCommit()) {
return used_during_commit();
}
@ -1794,7 +1795,7 @@ Future<Standalone<VectorRef<KeyRangeRef>>> ReadYourWritesTransaction::getBlobGra
if (range.begin > maxKey || range.end > maxKey)
return key_outside_legal_range();
return waitOrError(tr.getBlobGranuleRanges(range), resetPromise.getFuture());
return waitOrError(tr.getBlobGranuleRanges(range, rangeLimit), resetPromise.getFuture());
}
Future<Standalone<VectorRef<BlobGranuleChunkRef>>> ReadYourWritesTransaction::readBlobGranules(

View File

@ -133,7 +133,8 @@ std::unordered_map<std::string, KeyRange> SpecialKeySpace::actorLineageApiComman
std::set<std::string> SpecialKeySpace::options = { "excluded/force",
"failed/force",
"excluded_locality/force",
"failed_locality/force" };
"failed_locality/force",
"worker_interfaces/verify" };
std::set<std::string> SpecialKeySpace::tracingOptions = { kTracingTransactionIdKey, kTracingTokenKey };
@ -2754,6 +2755,64 @@ Future<Optional<std::string>> FailedLocalitiesRangeImpl::commit(ReadYourWritesTr
return excludeLocalityCommitActor(ryw, true);
}
// Defined in ReadYourWrites.actor.cpp
ACTOR Future<RangeResult> getWorkerInterfaces(Reference<IClusterConnectionRecord> clusterRecord);
// Defined in NativeAPI.actor.cpp
ACTOR Future<bool> verifyInterfaceActor(Reference<FlowLock> connectLock, ClientWorkerInterface workerInterf);
ACTOR static Future<RangeResult> workerInterfacesImplGetRangeActor(ReadYourWritesTransaction* ryw,
KeyRef prefix,
KeyRangeRef kr) {
if (!ryw->getDatabase().getPtr() || !ryw->getDatabase()->getConnectionRecord())
return RangeResult();
state RangeResult interfs = wait(getWorkerInterfaces(ryw->getDatabase()->getConnectionRecord()));
// for options' special keys, the boolean flag indicates if it's a SET operation
auto [verify, _] = ryw->getSpecialKeySpaceWriteMap()[SpecialKeySpace::getManagementApiCommandOptionSpecialKey(
"worker_interfaces", "verify")];
state RangeResult result;
if (verify) {
// if verify option is set, we try to talk to every worker and only returns those we can talk to
Reference<FlowLock> connectLock(new FlowLock(CLIENT_KNOBS->CLI_CONNECT_PARALLELISM));
state std::vector<Future<bool>> verifyInterfs;
for (const auto& [k_, value] : interfs) {
auto k = k_.withPrefix(prefix);
if (kr.contains(k)) {
ClientWorkerInterface workerInterf =
BinaryReader::fromStringRef<ClientWorkerInterface>(value, IncludeVersion());
verifyInterfs.push_back(verifyInterfaceActor(connectLock, workerInterf));
} else {
verifyInterfs.push_back(false);
}
}
wait(waitForAll(verifyInterfs));
// state int index;
for (int index = 0; index < interfs.size(); index++) {
if (verifyInterfs[index].get()) {
// if we can establish a connection, add the kv pair into the result
result.push_back_deep(result.arena(),
KeyValueRef(interfs[index].key.withPrefix(prefix), interfs[index].value));
}
}
} else {
for (const auto& [k_, v] : interfs) {
auto k = k_.withPrefix(prefix);
if (kr.contains(k))
result.push_back_deep(result.arena(), KeyValueRef(k, v));
}
}
std::sort(result.begin(), result.end(), KeyValueRef::OrderByKey{});
return result;
}
WorkerInterfacesSpecialKeyImpl::WorkerInterfacesSpecialKeyImpl(KeyRangeRef kr) : SpecialKeyRangeReadImpl(kr) {}
Future<RangeResult> WorkerInterfacesSpecialKeyImpl::getRange(ReadYourWritesTransaction* ryw,
KeyRangeRef kr,
GetRangeLimits limitsHint) const {
return workerInterfacesImplGetRangeActor(ryw, getKeyRange().begin, kr);
}
ACTOR Future<Void> validateSpecialSubrangeRead(ReadYourWritesTransaction* ryw,
KeySelector begin,
KeySelector end,

View File

@ -1331,6 +1331,9 @@ int64_t decodeBlobManagerEpochValue(ValueRef const& value) {
}
// blob granule data
const KeyRef blobRangeActive = LiteralStringRef("1");
const KeyRef blobRangeInactive = LiteralStringRef("0");
const KeyRangeRef blobGranuleFileKeys(LiteralStringRef("\xff\x02/bgf/"), LiteralStringRef("\xff\x02/bgf0"));
const KeyRangeRef blobGranuleMappingKeys(LiteralStringRef("\xff\x02/bgm/"), LiteralStringRef("\xff\x02/bgm0"));
const KeyRangeRef blobGranuleLockKeys(LiteralStringRef("\xff\x02/bgl/"), LiteralStringRef("\xff\x02/bgl0"));

View File

@ -144,6 +144,32 @@ ThreadFuture<Void> ThreadSafeDatabase::waitPurgeGranulesComplete(const KeyRef& p
return onMainThread([db, key]() -> Future<Void> { return db->waitPurgeGranulesComplete(key); });
}
ThreadFuture<bool> ThreadSafeDatabase::blobbifyRange(const KeyRangeRef& keyRange) {
DatabaseContext* db = this->db;
KeyRange range = keyRange;
return onMainThread([=]() -> Future<bool> { return db->blobbifyRange(range); });
}
ThreadFuture<bool> ThreadSafeDatabase::unblobbifyRange(const KeyRangeRef& keyRange) {
DatabaseContext* db = this->db;
KeyRange range = keyRange;
return onMainThread([=]() -> Future<bool> { return db->blobbifyRange(range); });
}
ThreadFuture<Standalone<VectorRef<KeyRangeRef>>> ThreadSafeDatabase::listBlobbifiedRanges(const KeyRangeRef& keyRange,
int rangeLimit) {
DatabaseContext* db = this->db;
KeyRange range = keyRange;
return onMainThread(
[=]() -> Future<Standalone<VectorRef<KeyRangeRef>>> { return db->listBlobbifiedRanges(range, rangeLimit); });
}
ThreadFuture<Version> ThreadSafeDatabase::verifyBlobRange(const KeyRangeRef& keyRange, Optional<Version> version) {
DatabaseContext* db = this->db;
KeyRange range = keyRange;
return onMainThread([=]() -> Future<Version> { return db->verifyBlobRange(range, version); });
}
ThreadSafeDatabase::ThreadSafeDatabase(ConnectionRecordType connectionRecordType,
std::string connectionRecordString,
int apiVersion) {
@ -359,13 +385,14 @@ ThreadFuture<Standalone<VectorRef<const char*>>> ThreadSafeTransaction::getAddre
}
ThreadFuture<Standalone<VectorRef<KeyRangeRef>>> ThreadSafeTransaction::getBlobGranuleRanges(
const KeyRangeRef& keyRange) {
const KeyRangeRef& keyRange,
int rangeLimit) {
ISingleThreadTransaction* tr = this->tr;
KeyRange r = keyRange;
return onMainThread([tr, r]() -> Future<Standalone<VectorRef<KeyRangeRef>>> {
return onMainThread([=]() -> Future<Standalone<VectorRef<KeyRangeRef>>> {
tr->checkDeferredError();
return tr->getBlobGranuleRanges(r);
return tr->getBlobGranuleRanges(r, rangeLimit);
});
}

View File

@ -57,6 +57,7 @@ public:
double WRONG_SHARD_SERVER_DELAY; // SOMEDAY: This delay can limit performance of retrieving data when the cache is
// mostly wrong (e.g. dumping the database after a test)
double FUTURE_VERSION_RETRY_DELAY;
double GRV_ERROR_RETRY_DELAY;
double UNKNOWN_TENANT_RETRY_DELAY;
int REPLY_BYTE_LIMIT;
double DEFAULT_BACKOFF;

View File

@ -98,32 +98,44 @@ struct ClusterControllerClientInterface {
}
};
template <class T>
struct ItemWithExamples {
T item;
int count;
std::vector<std::pair<NetworkAddress, Key>> examples;
ItemWithExamples() : item{}, count(0) {}
ItemWithExamples(T const& item, int count, std::vector<std::pair<NetworkAddress, Key>> const& examples)
: item(item), count(count), examples(examples) {}
template <class Ar>
void serialize(Ar& ar) {
serializer(ar, item, count, examples);
}
};
struct OpenDatabaseRequest {
constexpr static FileIdentifier file_identifier = 2799502;
// Sent by the native API to the cluster controller to open a database and track client
// info changes. Returns immediately if the current client info id is different from
// knownClientInfoID; otherwise returns when it next changes (or perhaps after a long interval)
struct Samples {
int count;
// network address / trace log group
std::set<std::pair<NetworkAddress, Key>> samples;
Samples() : count(0), samples{} {}
template <typename Ar>
void serialize(Ar& ar) {
serializer(ar, samples);
}
// Merges a set of Samples into *this
Samples& operator+=(const Samples& other) {
count += other.count;
samples.insert(std::begin(other.samples), std::end(other.samples));
return *this;
}
};
int clientCount;
std::vector<ItemWithExamples<Key>> issues;
std::vector<ItemWithExamples<Standalone<ClientVersionRef>>> supportedVersions;
std::vector<ItemWithExamples<Key>> maxProtocolSupported;
// Maps issue to Samples
std::map<Key, Samples> issues;
// Maps ClientVersionRef to Samples
std::map<Standalone<ClientVersionRef>, Samples> supportedVersions;
// Maps max protocol to Samples
std::map<Key, Samples> maxProtocolSupported;
UID knownClientInfoID;
ReplyPromise<struct ClientDBInfo> reply;

View File

@ -378,12 +378,18 @@ public:
Future<OverlappingChangeFeedsInfo> getOverlappingChangeFeeds(KeyRangeRef ranges, Version minVersion);
Future<Void> popChangeFeedMutations(Key rangeID, Version version);
// BlobGranule API.
Future<Key> purgeBlobGranules(KeyRange keyRange,
Version purgeVersion,
Optional<TenantName> tenant,
bool force = false);
Future<Void> waitPurgeGranulesComplete(Key purgeKey);
Future<bool> blobbifyRange(KeyRange range);
Future<bool> unblobbifyRange(KeyRange range);
Future<Standalone<VectorRef<KeyRangeRef>>> listBlobbifiedRanges(KeyRange range, int rangeLimit);
Future<Version> verifyBlobRange(const KeyRange& range, Optional<Version> version);
// private:
explicit DatabaseContext(Reference<AsyncVar<Reference<IClusterConnectionRecord>>> connectionRecord,
Reference<AsyncVar<ClientDBInfo>> clientDBInfo,

View File

@ -78,7 +78,8 @@ public:
virtual ThreadFuture<Standalone<VectorRef<KeyRef>>> getRangeSplitPoints(const KeyRangeRef& range,
int64_t chunkSize) = 0;
virtual ThreadFuture<Standalone<VectorRef<KeyRangeRef>>> getBlobGranuleRanges(const KeyRangeRef& keyRange) = 0;
virtual ThreadFuture<Standalone<VectorRef<KeyRangeRef>>> getBlobGranuleRanges(const KeyRangeRef& keyRange,
int rowLimit) = 0;
virtual ThreadResult<RangeResult> readBlobGranules(const KeyRangeRef& keyRange,
Version beginVersion,
@ -172,6 +173,13 @@ public:
virtual ThreadFuture<Key> purgeBlobGranules(const KeyRangeRef& keyRange, Version purgeVersion, bool force) = 0;
virtual ThreadFuture<Void> waitPurgeGranulesComplete(const KeyRef& purgeKey) = 0;
virtual ThreadFuture<bool> blobbifyRange(const KeyRangeRef& keyRange) = 0;
virtual ThreadFuture<bool> unblobbifyRange(const KeyRangeRef& keyRange) = 0;
virtual ThreadFuture<Standalone<VectorRef<KeyRangeRef>>> listBlobbifiedRanges(const KeyRangeRef& keyRange,
int rangeLimit) = 0;
virtual ThreadFuture<Version> verifyBlobRange(const KeyRangeRef& keyRange, Optional<Version> version) = 0;
// Interface to manage shared state across multiple connections to the same Database
virtual ThreadFuture<DatabaseSharedState*> createSharedState() = 0;
virtual void setSharedState(DatabaseSharedState* p) = 0;

View File

@ -55,7 +55,7 @@ public:
Future<Standalone<VectorRef<KeyRef>>> getRangeSplitPoints(KeyRange const& range, int64_t chunkSize) override {
throw client_invalid_operation();
}
Future<Standalone<VectorRef<KeyRangeRef>>> getBlobGranuleRanges(KeyRange const& range) override {
Future<Standalone<VectorRef<KeyRangeRef>>> getBlobGranuleRanges(KeyRange const& range, int rowLimit) override {
throw client_invalid_operation();
}
Future<Standalone<VectorRef<BlobGranuleChunkRef>>> readBlobGranules(KeyRange const& range,

View File

@ -80,7 +80,7 @@ public:
virtual Future<Standalone<VectorRef<const char*>>> getAddressesForKey(Key const& key) = 0;
virtual Future<Standalone<VectorRef<KeyRef>>> getRangeSplitPoints(KeyRange const& range, int64_t chunkSize) = 0;
virtual Future<int64_t> getEstimatedRangeSizeBytes(KeyRange const& keys) = 0;
virtual Future<Standalone<VectorRef<KeyRangeRef>>> getBlobGranuleRanges(KeyRange const& range) = 0;
virtual Future<Standalone<VectorRef<KeyRangeRef>>> getBlobGranuleRanges(KeyRange const& range, int rangeLimit) = 0;
virtual Future<Standalone<VectorRef<BlobGranuleChunkRef>>> readBlobGranules(KeyRange const& range,
Version begin,
Optional<Version> readVersion,

View File

@ -171,6 +171,32 @@ struct FdbCApi : public ThreadSafeReferenceCounted<FdbCApi> {
uint8_t const* purge_key_name,
int purge_key_name_length);
FDBFuture* (*databaseBlobbifyRange)(FDBDatabase* db,
uint8_t const* begin_key_name,
int begin_key_name_length,
uint8_t const* end_key_name,
int end_key_name_length);
FDBFuture* (*databaseUnblobbifyRange)(FDBDatabase* db,
uint8_t const* begin_key_name,
int begin_key_name_length,
uint8_t const* end_key_name,
int end_key_name_length);
FDBFuture* (*databaseListBlobbifiedRanges)(FDBDatabase* db,
uint8_t const* begin_key_name,
int begin_key_name_length,
uint8_t const* end_key_name,
int end_key_name_length,
int rangeLimit);
FDBFuture* (*databaseVerifyBlobRange)(FDBDatabase* db,
uint8_t const* begin_key_name,
int begin_key_name_length,
uint8_t const* end_key_name,
int end_key_name_length,
Optional<Version> version);
// Tenant
fdb_error_t (*tenantCreateTransaction)(FDBTenant* tenant, FDBTransaction** outTransaction);
@ -276,7 +302,8 @@ struct FdbCApi : public ThreadSafeReferenceCounted<FdbCApi> {
uint8_t const* begin_key_name,
int begin_key_name_length,
uint8_t const* end_key_name,
int end_key_name_length);
int end_key_name_length,
int rangeLimit);
FDBResult* (*transactionReadBlobGranules)(FDBTransaction* db,
uint8_t const* begin_key_name,
@ -376,7 +403,8 @@ public:
ThreadFuture<int64_t> getEstimatedRangeSizeBytes(const KeyRangeRef& keys) override;
ThreadFuture<Standalone<VectorRef<KeyRef>>> getRangeSplitPoints(const KeyRangeRef& range,
int64_t chunkSize) override;
ThreadFuture<Standalone<VectorRef<KeyRangeRef>>> getBlobGranuleRanges(const KeyRangeRef& keyRange) override;
ThreadFuture<Standalone<VectorRef<KeyRangeRef>>> getBlobGranuleRanges(const KeyRangeRef& keyRange,
int rangeLimit) override;
ThreadResult<RangeResult> readBlobGranules(const KeyRangeRef& keyRange,
Version beginVersion,
@ -476,6 +504,12 @@ public:
ThreadFuture<Key> purgeBlobGranules(const KeyRangeRef& keyRange, Version purgeVersion, bool force) override;
ThreadFuture<Void> waitPurgeGranulesComplete(const KeyRef& purgeKey) override;
ThreadFuture<bool> blobbifyRange(const KeyRangeRef& keyRange) override;
ThreadFuture<bool> unblobbifyRange(const KeyRangeRef& keyRange) override;
ThreadFuture<Standalone<VectorRef<KeyRangeRef>>> listBlobbifiedRanges(const KeyRangeRef& keyRange,
int rangeLimit) override;
ThreadFuture<Version> verifyBlobRange(const KeyRangeRef& keyRange, Optional<Version> version) override;
ThreadFuture<DatabaseSharedState*> createSharedState() override;
void setSharedState(DatabaseSharedState* p) override;
@ -574,7 +608,8 @@ public:
ThreadFuture<Standalone<VectorRef<KeyRef>>> getRangeSplitPoints(const KeyRangeRef& range,
int64_t chunkSize) override;
ThreadFuture<Standalone<VectorRef<KeyRangeRef>>> getBlobGranuleRanges(const KeyRangeRef& keyRange) override;
ThreadFuture<Standalone<VectorRef<KeyRangeRef>>> getBlobGranuleRanges(const KeyRangeRef& keyRange,
int rangeLimit) override;
ThreadResult<RangeResult> readBlobGranules(const KeyRangeRef& keyRange,
Version beginVersion,
@ -817,6 +852,12 @@ public:
ThreadFuture<Key> purgeBlobGranules(const KeyRangeRef& keyRange, Version purgeVersion, bool force) override;
ThreadFuture<Void> waitPurgeGranulesComplete(const KeyRef& purgeKey) override;
ThreadFuture<bool> blobbifyRange(const KeyRangeRef& keyRange) override;
ThreadFuture<bool> unblobbifyRange(const KeyRangeRef& keyRange) override;
ThreadFuture<Standalone<VectorRef<KeyRangeRef>>> listBlobbifiedRanges(const KeyRangeRef& keyRange,
int rangeLimit) override;
ThreadFuture<Version> verifyBlobRange(const KeyRangeRef& keyRange, Optional<Version> version) override;
ThreadFuture<DatabaseSharedState*> createSharedState() override;
void setSharedState(DatabaseSharedState* p) override;

View File

@ -415,7 +415,7 @@ public:
// The returned list would still be in form of [keys.begin, splitPoint1, splitPoint2, ... , keys.end]
Future<Standalone<VectorRef<KeyRef>>> getRangeSplitPoints(KeyRange const& keys, int64_t chunkSize);
Future<Standalone<VectorRef<KeyRangeRef>>> getBlobGranuleRanges(const KeyRange& range);
Future<Standalone<VectorRef<KeyRangeRef>>> getBlobGranuleRanges(const KeyRange& range, int rangeLimit);
Future<Standalone<VectorRef<BlobGranuleChunkRef>>> readBlobGranules(const KeyRange& range,
Version begin,
Optional<Version> readVersion,

View File

@ -121,7 +121,7 @@ public:
Future<Standalone<VectorRef<KeyRef>>> getRangeSplitPoints(const KeyRange& range, int64_t chunkSize) override;
Future<int64_t> getEstimatedRangeSizeBytes(const KeyRange& keys) override;
Future<Standalone<VectorRef<KeyRangeRef>>> getBlobGranuleRanges(const KeyRange& range) override;
Future<Standalone<VectorRef<KeyRangeRef>>> getBlobGranuleRanges(const KeyRange& range, int rangeLimit) override;
Future<Standalone<VectorRef<BlobGranuleChunkRef>>> readBlobGranules(const KeyRange& range,
Version begin,
Optional<Version> readVersion,

View File

@ -548,6 +548,15 @@ public:
Future<Optional<std::string>> commit(ReadYourWritesTransaction* ryw) override;
};
class WorkerInterfacesSpecialKeyImpl : public SpecialKeyRangeReadImpl {
public:
explicit WorkerInterfacesSpecialKeyImpl(KeyRangeRef kr);
Future<RangeResult> getRange(ReadYourWritesTransaction* ryw,
KeyRangeRef kr,
GetRangeLimits limitsHint) const override;
};
// If the underlying set of key-value pairs of a key space is not changing, then we expect repeating a read to give the
// same result. Additionally, we can generate the expected result of any read if that read is reading a subrange. This
// actor performs a read of an arbitrary subrange of [begin, end) and validates the results.

View File

@ -594,6 +594,8 @@ const Value blobManagerEpochValueFor(int64_t epoch);
int64_t decodeBlobManagerEpochValue(ValueRef const& value);
// blob granule keys
extern const StringRef blobRangeActive;
extern const StringRef blobRangeInactive;
extern const uint8_t BG_FILE_TYPE_DELTA;
extern const uint8_t BG_FILE_TYPE_SNAPSHOT;

View File

@ -62,6 +62,13 @@ public:
ThreadFuture<Key> purgeBlobGranules(const KeyRangeRef& keyRange, Version purgeVersion, bool force) override;
ThreadFuture<Void> waitPurgeGranulesComplete(const KeyRef& purgeKey) override;
ThreadFuture<bool> blobbifyRange(const KeyRangeRef& keyRange) override;
ThreadFuture<bool> unblobbifyRange(const KeyRangeRef& keyRange) override;
ThreadFuture<Standalone<VectorRef<KeyRangeRef>>> listBlobbifiedRanges(const KeyRangeRef& keyRange,
int rangeLimit) override;
ThreadFuture<Version> verifyBlobRange(const KeyRangeRef& keyRange, Optional<Version> version) override;
ThreadFuture<DatabaseSharedState*> createSharedState() override;
void setSharedState(DatabaseSharedState* p) override;
@ -149,7 +156,8 @@ public:
ThreadFuture<Standalone<VectorRef<KeyRef>>> getRangeSplitPoints(const KeyRangeRef& range,
int64_t chunkSize) override;
ThreadFuture<Standalone<VectorRef<KeyRangeRef>>> getBlobGranuleRanges(const KeyRangeRef& keyRange) override;
ThreadFuture<Standalone<VectorRef<KeyRangeRef>>> getBlobGranuleRanges(const KeyRangeRef& keyRange,
int rangeLimit) override;
ThreadResult<RangeResult> readBlobGranules(const KeyRangeRef& keyRange,
Version beginVersion,

View File

@ -453,20 +453,30 @@ struct BackupData {
ACTOR static Future<Version> _getMinKnownCommittedVersion(BackupData* self) {
state Span span("BA:GetMinCommittedVersion"_loc);
loop {
GetReadVersionRequest request(span.context,
0,
TransactionPriority::DEFAULT,
invalidVersion,
GetReadVersionRequest::FLAG_USE_MIN_KNOWN_COMMITTED_VERSION);
choose {
when(wait(self->cx->onProxiesChanged())) {}
when(GetReadVersionReply reply =
wait(basicLoadBalance(self->cx->getGrvProxies(UseProvisionalProxies::False),
&GrvProxyInterface::getConsistentReadVersion,
request,
self->cx->taskID))) {
self->cx->ssVersionVectorCache.applyDelta(reply.ssVersionVectorDelta);
return reply.version;
try {
GetReadVersionRequest request(span.context,
0,
TransactionPriority::DEFAULT,
invalidVersion,
GetReadVersionRequest::FLAG_USE_MIN_KNOWN_COMMITTED_VERSION);
choose {
when(wait(self->cx->onProxiesChanged())) {}
when(GetReadVersionReply reply =
wait(basicLoadBalance(self->cx->getGrvProxies(UseProvisionalProxies::False),
&GrvProxyInterface::getConsistentReadVersion,
request,
self->cx->taskID))) {
self->cx->ssVersionVectorCache.applyDelta(reply.ssVersionVectorDelta);
return reply.version;
}
}
} catch (Error& e) {
if (e.code() == error_code_batch_transaction_throttled ||
e.code() == error_code_grv_proxy_memory_limit_exceeded) {
// GRV Proxy returns an error
wait(delayJittered(CLIENT_KNOBS->GRV_ERROR_RETRY_DELAY));
} else {
throw;
}
}
}

View File

@ -189,7 +189,7 @@ ACTOR Future<Void> clearAndAwaitMerge(Database cx, KeyRange range) {
state int reClearInterval = 1; // do quadratic backoff on clear rate, b/c large keys can keep it not write-cold
loop {
try {
Standalone<VectorRef<KeyRangeRef>> ranges = wait(tr.getBlobGranuleRanges(range));
Standalone<VectorRef<KeyRangeRef>> ranges = wait(tr.getBlobGranuleRanges(range, 2));
if (ranges.size() == 1) {
return Void();
}

View File

@ -130,7 +130,7 @@ void updateClientBlobRanges(KeyRangeMap<bool>* knownBlobRanges,
}
break;
}
bool active = dbBlobRanges[i].value == LiteralStringRef("1");
bool active = dbBlobRanges[i].value == blobRangeActive;
if (active) {
if (BM_DEBUG) {
fmt::print("BM sees client range [{0} - {1})\n",
@ -1017,6 +1017,8 @@ static bool handleRangeIsAssign(Reference<BlobManagerData> bmData, RangeAssignme
}
ASSERT(count == 1);
bool forcePurging = bmData->isForcePurging(assignment.keyRange);
if (assignment.worker.present() && assignment.worker.get().isValid()) {
if (BM_DEBUG) {
fmt::print("BW {0} already chosen for seqno {1} in BM {2}\n",
@ -1034,8 +1036,10 @@ static bool handleRangeIsAssign(Reference<BlobManagerData> bmData, RangeAssignme
// assignsInProgress
bmData->addActor.send(doRangeAssignment(bmData, assignment, workerId, bmData->epoch, seqNo));
} else {
bmData->assignsInProgress.insert(assignment.keyRange,
doRangeAssignment(bmData, assignment, workerId, bmData->epoch, seqNo));
if (!forcePurging) {
bmData->assignsInProgress.insert(assignment.keyRange,
doRangeAssignment(bmData, assignment, workerId, bmData->epoch, seqNo));
}
if (bmData->workerStats.count(workerId)) {
bmData->workerStats[workerId].numGranulesAssigned += 1;
}
@ -1044,8 +1048,10 @@ static bool handleRangeIsAssign(Reference<BlobManagerData> bmData, RangeAssignme
// Ensure the key boundaries are updated before we pick a worker
bmData->workerAssignments.insert(assignment.keyRange, UID());
ASSERT(assignment.assign.get().type != AssignRequestType::Continue);
bmData->assignsInProgress.insert(assignment.keyRange,
doRangeAssignment(bmData, assignment, Optional<UID>(), bmData->epoch, seqNo));
if (!forcePurging) {
bmData->assignsInProgress.insert(
assignment.keyRange, doRangeAssignment(bmData, assignment, Optional<UID>(), bmData->epoch, seqNo));
}
}
return true;
}
@ -1094,10 +1100,6 @@ static bool handleRangeIsRevoke(Reference<BlobManagerData> bmData, RangeAssignme
}
static bool handleRangeAssign(Reference<BlobManagerData> bmData, RangeAssignment assignment) {
if ((assignment.isAssign || !assignment.revoke.get().dispose) && bmData->isForcePurging(assignment.keyRange)) {
return false;
}
int64_t seqNo = bmData->seqNo;
bmData->seqNo++;
@ -1280,7 +1282,7 @@ ACTOR Future<Void> monitorClientRanges(Reference<BlobManagerData> bmData) {
needToCoalesce = false;
for (int i = 0; i < results.size() - 1; i++) {
bool active = results[i].value == LiteralStringRef("1");
bool active = results[i].value == blobRangeActive;
bmData->knownBlobRanges.insert(KeyRangeRef(results[i].key, results[i + 1].key), active);
}
}
@ -1633,6 +1635,10 @@ ACTOR Future<Void> maybeSplitRange(Reference<BlobManagerData> bmData,
bool writeHot,
int64_t originalEpoch,
int64_t originalSeqno) {
if (bmData->isForcePurging(granuleRange)) {
// ignore
return Void();
}
state Reference<ReadYourWritesTransaction> tr = makeReference<ReadYourWritesTransaction>(bmData->db);
// first get ranges to split
@ -1952,7 +1958,10 @@ ACTOR Future<Void> maybeSplitRange(Reference<BlobManagerData> bmData,
// read mapping from db to handle any in flight granules or other issues
// Forces all granules in the specified key range to flush data to blob up to the specified version. This is required
// for executing a merge.
ACTOR Future<Void> forceGranuleFlush(Reference<BlobManagerData> bmData, KeyRange keyRange, Version version) {
ACTOR Future<bool> forceGranuleFlush(Reference<BlobManagerData> bmData,
UID mergeGranuleID,
KeyRange keyRange,
Version version) {
state Transaction tr(bmData->db);
state KeyRange currentRange = keyRange;
@ -1975,7 +1984,13 @@ ACTOR Future<Void> forceGranuleFlush(Reference<BlobManagerData> bmData, KeyRange
TraceEvent("GranuleFlushCancelledForcePurge", bmData->id)
.detail("Epoch", bmData->epoch)
.detail("KeyRange", keyRange);
return Void();
// destroy already created change feed from earlier so it doesn't leak
wait(updateChangeFeed(
&tr, granuleIDToCFKey(mergeGranuleID), ChangeFeedStatus::CHANGE_FEED_DESTROY, keyRange));
wait(tr.commit());
return false;
}
// TODO KNOB
@ -2091,7 +2106,7 @@ ACTOR Future<Void> forceGranuleFlush(Reference<BlobManagerData> bmData, KeyRange
version);
}
return Void();
return true;
}
// Persist the merge intent for this merge in the database. Once this transaction commits, the merge is in progress. It
@ -2126,6 +2141,9 @@ ACTOR Future<std::pair<UID, Version>> persistMergeGranulesStart(Reference<BlobMa
wait(tr->commit());
bmData->activeGranuleMerges.insert(mergeRange, invalidVersion);
bmData->activeGranuleMerges.coalesce(mergeRange.begin);
// TODO better error?
return std::pair(UID(), invalidVersion);
}
@ -2174,7 +2192,7 @@ ACTOR Future<std::pair<UID, Version>> persistMergeGranulesStart(Reference<BlobMa
// FIXME: why not just make parentGranuleRanges vector of N+1 keys?
// Persists the merge being complete in the database by clearing the merge intent. Once this transaction commits, the
// merge is considered completed.
ACTOR Future<Void> persistMergeGranulesDone(Reference<BlobManagerData> bmData,
ACTOR Future<bool> persistMergeGranulesDone(Reference<BlobManagerData> bmData,
UID mergeGranuleID,
KeyRange mergeRange,
Version mergeVersion,
@ -2219,9 +2237,14 @@ ACTOR Future<Void> persistMergeGranulesDone(Reference<BlobManagerData> bmData,
ChangeFeedStatus::CHANGE_FEED_DESTROY,
mergeRange));
// TODO could also delete history entry here
wait(tr->commit());
return Void();
bmData->activeGranuleMerges.insert(mergeRange, invalidVersion);
bmData->activeGranuleMerges.coalesce(mergeRange.begin);
return false;
// TODO: check this in split re-eval too once that is merged!!
}
@ -2298,7 +2321,7 @@ ACTOR Future<Void> persistMergeGranulesDone(Reference<BlobManagerData> bmData,
tr->getCommittedVersion());
}
CODE_PROBE(true, "Granule merge complete");
return Void();
return true;
} catch (Error& e) {
wait(tr->onError(e));
}
@ -2328,16 +2351,28 @@ ACTOR Future<Void> finishMergeGranules(Reference<BlobManagerData> bmData,
}
// force granules to persist state up to mergeVersion
wait(forceGranuleFlush(bmData, mergeRange, mergeVersion));
bool successFlush = wait(forceGranuleFlush(bmData, mergeGranuleID, mergeRange, mergeVersion));
if (!successFlush) {
bmData->activeGranuleMerges.insert(mergeRange, invalidVersion);
bmData->activeGranuleMerges.coalesce(mergeRange.begin);
--bmData->stats.activeMerges;
return Void();
}
// update state and clear merge intent
wait(persistMergeGranulesDone(bmData,
mergeGranuleID,
mergeRange,
mergeVersion,
parentGranuleIDs,
parentGranuleRanges,
parentGranuleStartVersions));
bool successFinish = wait(persistMergeGranulesDone(bmData,
mergeGranuleID,
mergeRange,
mergeVersion,
parentGranuleIDs,
parentGranuleRanges,
parentGranuleStartVersions));
if (!successFinish) {
bmData->activeGranuleMerges.insert(mergeRange, invalidVersion);
bmData->activeGranuleMerges.coalesce(mergeRange.begin);
--bmData->stats.activeMerges;
return Void();
}
int64_t seqnoForEval = bmData->seqNo;
@ -2387,6 +2422,7 @@ ACTOR Future<Void> doMerge(Reference<BlobManagerData> bmData,
wait(persistMergeGranulesStart(bmData, mergeRange, ids, ranges, startVersions));
if (persistMerge.second == invalidVersion) {
// cancelled because of force purge
return Void();
}
wait(finishMergeGranules(
@ -2427,6 +2463,11 @@ static void attemptStartMerge(Reference<BlobManagerData> bmData,
}
}
if (bmData->isForcePurging(mergeRange)) {
// ignore
return;
}
if (BM_DEBUG) {
fmt::print("BM {0} Starting merge of [{1} - {2}) ({3})\n",
bmData->epoch,
@ -2851,7 +2892,7 @@ ACTOR Future<Void> monitorBlobWorkerStatus(Reference<BlobManagerData> bmData, Bl
if (rep.blockedVersion < inProgressMergeVersion) {
CODE_PROBE(true, "merge blocking re-snapshot");
if (BM_DEBUG) {
fmt::print("DBG: BM {0} MERGE @ {1} blocking re-snapshot [{2} - {3}) @ {4}, "
fmt::print("BM {0} MERGE @ {1} blocking re-snapshot [{2} - {3}) @ {4}, "
"continuing snapshot\n",
bmData->epoch,
inProgressMergeVersion,
@ -4069,9 +4110,9 @@ ACTOR Future<Void> fullyDeleteGranule(Reference<BlobManagerData> self,
self->epoch,
granuleId.toString(),
filesToDelete.size());
for (auto filename : filesToDelete) {
fmt::print(" - {}\n", filename.c_str());
}
/*for (auto filename : filesToDelete) {
fmt::print(" - {}\n", filename.c_str());
}*/
}
// delete the files before the corresponding metadata.
@ -4203,9 +4244,9 @@ ACTOR Future<Void> partiallyDeleteGranule(Reference<BlobManagerData> self,
self->epoch,
granuleId.toString(),
filesToDelete.size());
for (auto filename : filesToDelete) {
fmt::print(" - {0}\n", filename);
}
/*for (auto filename : filesToDelete) {
fmt::print(" - {0}\n", filename);
}*/
}
// TODO: the following comment relies on the assumption that BWs will not get requests to
@ -4323,6 +4364,25 @@ ACTOR Future<Void> purgeRange(Reference<BlobManagerData> self, KeyRangeRef range
tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
}
// wait for all active splits and merges in the range to come to a stop, so no races with purging
std::vector<Future<Void>> activeBoundaryEvals;
auto boundaries = self->boundaryEvaluations.intersectingRanges(range);
for (auto& it : boundaries) {
auto& f = it.cvalue().inProgress;
if (f.isValid() && !f.isReady() && !f.isError()) {
activeBoundaryEvals.push_back(f);
}
}
if (!activeBoundaryEvals.empty()) {
wait(waitForAll(activeBoundaryEvals));
}
// some merges aren't counted in boundary evals, for merge/split race reasons
while (self->isMergeActive(range)) {
wait(delayJittered(1.0));
}
auto ranges = self->workerAssignments.intersectingRanges(range);
state std::vector<KeyRange> activeRanges;
@ -5013,9 +5073,6 @@ TEST_CASE("/blobmanager/updateranges") {
VectorRef<KeyRangeRef> added;
VectorRef<KeyRangeRef> removed;
StringRef active = LiteralStringRef("1");
StringRef inactive = StringRef();
RangeResult dbDataEmpty;
std::vector<std::pair<KeyRangeRef, bool>> kbrRanges;
@ -5026,34 +5083,34 @@ TEST_CASE("/blobmanager/updateranges") {
// db data setup
RangeResult dbDataAB;
dbDataAB.emplace_back(ar, keyA, active);
dbDataAB.emplace_back(ar, keyB, inactive);
dbDataAB.emplace_back(ar, keyA, blobRangeActive);
dbDataAB.emplace_back(ar, keyB, blobRangeInactive);
RangeResult dbDataAC;
dbDataAC.emplace_back(ar, keyA, active);
dbDataAC.emplace_back(ar, keyC, inactive);
dbDataAC.emplace_back(ar, keyA, blobRangeActive);
dbDataAC.emplace_back(ar, keyC, blobRangeInactive);
RangeResult dbDataAD;
dbDataAD.emplace_back(ar, keyA, active);
dbDataAD.emplace_back(ar, keyD, inactive);
dbDataAD.emplace_back(ar, keyA, blobRangeActive);
dbDataAD.emplace_back(ar, keyD, blobRangeInactive);
RangeResult dbDataBC;
dbDataBC.emplace_back(ar, keyB, active);
dbDataBC.emplace_back(ar, keyC, inactive);
dbDataBC.emplace_back(ar, keyB, blobRangeActive);
dbDataBC.emplace_back(ar, keyC, blobRangeInactive);
RangeResult dbDataBD;
dbDataBD.emplace_back(ar, keyB, active);
dbDataBD.emplace_back(ar, keyD, inactive);
dbDataBD.emplace_back(ar, keyB, blobRangeActive);
dbDataBD.emplace_back(ar, keyD, blobRangeInactive);
RangeResult dbDataCD;
dbDataCD.emplace_back(ar, keyC, active);
dbDataCD.emplace_back(ar, keyD, inactive);
dbDataCD.emplace_back(ar, keyC, blobRangeActive);
dbDataCD.emplace_back(ar, keyD, blobRangeInactive);
RangeResult dbDataAB_CD;
dbDataAB_CD.emplace_back(ar, keyA, active);
dbDataAB_CD.emplace_back(ar, keyB, inactive);
dbDataAB_CD.emplace_back(ar, keyC, active);
dbDataAB_CD.emplace_back(ar, keyD, inactive);
dbDataAB_CD.emplace_back(ar, keyA, blobRangeActive);
dbDataAB_CD.emplace_back(ar, keyB, blobRangeInactive);
dbDataAB_CD.emplace_back(ar, keyC, blobRangeActive);
dbDataAB_CD.emplace_back(ar, keyD, blobRangeInactive);
// key ranges setup
KeyRangeRef rangeAB = KeyRangeRef(keyA, keyB);

View File

@ -395,6 +395,21 @@ class DDTxnProcessorImpl {
}
}
}
ACTOR static Future<Void> pollMoveKeysLock(Database cx, MoveKeysLock lock, const DDEnabledState* ddEnabledState) {
loop {
wait(delay(SERVER_KNOBS->MOVEKEYS_LOCK_POLLING_DELAY));
state Transaction tr(cx);
loop {
try {
wait(checkMoveKeysLockReadOnly(&tr, lock, ddEnabledState));
break;
} catch (Error& e) {
wait(tr.onError(e));
}
}
}
}
};
Future<IDDTxnProcessor::SourceServers> DDTxnProcessor::getSourceServersForRange(const KeyRangeRef range) {
@ -431,3 +446,7 @@ Future<Reference<InitialDataDistribution>> DDTxnProcessor::getInitialDataDistrib
Future<Void> DDTxnProcessor::waitForDataDistributionEnabled(const DDEnabledState* ddEnabledState) const {
return DDTxnProcessorImpl::waitForDataDistributionEnabled(cx, ddEnabledState);
}
Future<Void> DDTxnProcessor::pollMoveKeysLock(MoveKeysLock lock, const DDEnabledState* ddEnabledState) const {
return DDTxnProcessorImpl::pollMoveKeysLock(cx, lock, ddEnabledState);
}

View File

@ -284,21 +284,6 @@ static std::set<int> const& normalDDQueueErrors() {
return s;
}
ACTOR Future<Void> pollMoveKeysLock(Database cx, MoveKeysLock lock, const DDEnabledState* ddEnabledState) {
loop {
wait(delay(SERVER_KNOBS->MOVEKEYS_LOCK_POLLING_DELAY));
state Transaction tr(cx);
loop {
try {
wait(checkMoveKeysLockReadOnly(&tr, lock, ddEnabledState));
break;
} catch (Error& e) {
wait(tr.onError(e));
}
}
}
}
struct DataDistributor : NonCopyable, ReferenceCounted<DataDistributor> {
public:
Reference<AsyncVar<ServerDBInfo> const> dbInfo;
@ -543,6 +528,10 @@ public:
Future<Void> shardsReady = resumeFromShards(Reference<DataDistributor>::addRef(this), g_network->isSimulated());
return resumeFromDataMoves(Reference<DataDistributor>::addRef(this), shardsReady);
}
Future<Void> pollMoveKeysLock(const DDEnabledState* ddEnabledState) {
return txnProcessor->pollMoveKeysLock(lock, ddEnabledState);
}
};
// Runs the data distribution algorithm for FDB, including the DD Queue, DD tracker, and DD team collection
@ -618,7 +607,7 @@ ACTOR Future<Void> dataDistribution(Reference<DataDistributor> self,
ddTenantCache->monitorTenantMap(), "DDTenantCacheMonitor", self->ddId, &normalDDQueueErrors()));
}
actors.push_back(pollMoveKeysLock(cx, self->lock, ddEnabledState));
actors.push_back(self->pollMoveKeysLock(ddEnabledState));
actors.push_back(reportErrorsExcept(dataDistributionTracker(self->initData,
cx,
self->relocationProducer,

View File

@ -71,7 +71,7 @@ ACTOR Future<Void> updateMaxShardSize(Reference<AsyncVar<int64_t>> dbSizeEstimat
struct DataDistributionTracker {
Database cx;
UID distributorId;
KeyRangeMap<ShardTrackedData>& shards;
KeyRangeMap<ShardTrackedData>* shards;
ActorCollection sizeChanges;
int64_t systemSizeEstimate;
@ -92,7 +92,7 @@ struct DataDistributionTracker {
// The reference to trackerCancelled must be extracted by actors,
// because by the time (trackerCancelled == true) this memory cannot
// be accessed
bool& trackerCancelled;
bool* trackerCancelled;
// This class extracts the trackerCancelled reference from a DataDistributionTracker object
// Because some actors spawned by the dataDistributionTracker outlive the DataDistributionTracker
@ -104,7 +104,7 @@ struct DataDistributionTracker {
public:
SafeAccessor(DataDistributionTracker* tracker)
: trackerCancelled(tracker->trackerCancelled), tracker(*tracker) {
: trackerCancelled(*tracker->trackerCancelled), tracker(*tracker) {
ASSERT(!trackerCancelled);
}
@ -123,15 +123,15 @@ struct DataDistributionTracker {
PromiseStream<RelocateShard> const& output,
Reference<ShardsAffectedByTeamFailure> shardsAffectedByTeamFailure,
Reference<AsyncVar<bool>> anyZeroHealthyTeams,
KeyRangeMap<ShardTrackedData>& shards,
bool& trackerCancelled)
KeyRangeMap<ShardTrackedData>* shards,
bool* trackerCancelled)
: cx(cx), distributorId(distributorId), shards(shards), sizeChanges(false), systemSizeEstimate(0),
dbSizeEstimate(new AsyncVar<int64_t>()), maxShardSize(new AsyncVar<Optional<int64_t>>()), output(output),
shardsAffectedByTeamFailure(shardsAffectedByTeamFailure), readyToStart(readyToStart),
anyZeroHealthyTeams(anyZeroHealthyTeams), trackerCancelled(trackerCancelled) {}
~DataDistributionTracker() {
trackerCancelled = true;
*trackerCancelled = true;
// Cancel all actors so they aren't waiting on sizeChanged broken promise
sizeChanges.clear(false);
}
@ -399,7 +399,7 @@ ACTOR Future<int64_t> getFirstSize(Reference<AsyncVar<Optional<ShardMetrics>>> s
ACTOR Future<Void> changeSizes(DataDistributionTracker* self, KeyRange keys, int64_t oldShardsEndingSize) {
state std::vector<Future<int64_t>> sizes;
state std::vector<Future<int64_t>> systemSizes;
for (auto it : self->shards.intersectingRanges(keys)) {
for (auto it : self->shards->intersectingRanges(keys)) {
Future<int64_t> thisSize = getFirstSize(it->value().stats);
sizes.push_back(thisSize);
if (it->range().begin >= systemKeys.begin) {
@ -557,8 +557,8 @@ Future<Void> shardMerger(DataDistributionTracker* self,
Reference<AsyncVar<Optional<ShardMetrics>>> shardSize) {
int64_t maxShardSize = self->maxShardSize->get().get();
auto prevIter = self->shards.rangeContaining(keys.begin);
auto nextIter = self->shards.rangeContaining(keys.begin);
auto prevIter = self->shards->rangeContaining(keys.begin);
auto nextIter = self->shards->rangeContaining(keys.begin);
CODE_PROBE(true, "shard to be merged");
ASSERT(keys.begin > allKeys.begin);
@ -779,7 +779,7 @@ ACTOR Future<Void> shardTracker(DataDistributionTracker::SafeAccessor self,
}
void restartShardTrackers(DataDistributionTracker* self, KeyRangeRef keys, Optional<ShardMetrics> startingMetrics) {
auto ranges = self->shards.getAffectedRangesAfterInsertion(keys, ShardTrackedData());
auto ranges = self->shards->getAffectedRangesAfterInsertion(keys, ShardTrackedData());
for (int i = 0; i < ranges.size(); i++) {
if (!ranges[i].value.trackShard.isValid() && ranges[i].begin != keys.begin) {
// When starting, key space will be full of "dummy" default contructed entries.
@ -806,7 +806,7 @@ void restartShardTrackers(DataDistributionTracker* self, KeyRangeRef keys, Optio
data.stats = shardMetrics;
data.trackShard = shardTracker(DataDistributionTracker::SafeAccessor(self), ranges[i], shardMetrics);
data.trackBytes = trackShardMetrics(DataDistributionTracker::SafeAccessor(self), ranges[i], shardMetrics);
self->shards.insert(ranges[i], data);
self->shards->insert(ranges[i], data);
}
}
@ -848,7 +848,7 @@ ACTOR Future<Void> fetchTopKShardMetrics_impl(DataDistributionTracker* self, Get
for (i = 0; i < SERVER_KNOBS->DD_SHARD_COMPARE_LIMIT && i < req.keys.size(); ++i) {
auto range = req.keys[i];
StorageMetrics metrics;
for (auto t : self->shards.intersectingRanges(range)) {
for (auto t : self->shards->intersectingRanges(range)) {
auto& stats = t.value().stats;
if (!stats->get().present()) {
onChange = stats->onChange();
@ -914,7 +914,7 @@ ACTOR Future<Void> fetchShardMetrics_impl(DataDistributionTracker* self, GetMetr
loop {
Future<Void> onChange;
StorageMetrics returnMetrics;
for (auto t : self->shards.intersectingRanges(req.keys)) {
for (auto t : self->shards->intersectingRanges(req.keys)) {
auto& stats = t.value().stats;
if (!stats->get().present()) {
onChange = stats->onChange();
@ -958,8 +958,8 @@ ACTOR Future<Void> fetchShardMetricsList_impl(DataDistributionTracker* self, Get
// list of metrics, regenerate on loop when full range unsuccessful
Standalone<VectorRef<DDMetricsRef>> result;
Future<Void> onChange;
auto beginIter = self->shards.containedRanges(req.keys).begin();
auto endIter = self->shards.intersectingRanges(req.keys).end();
auto beginIter = self->shards->containedRanges(req.keys).begin();
auto endIter = self->shards->intersectingRanges(req.keys).end();
for (auto t = beginIter; t != endIter; ++t) {
auto& stats = t.value().stats;
if (!stats->get().present()) {
@ -1015,8 +1015,8 @@ ACTOR Future<Void> dataDistributionTracker(Reference<InitialDataDistribution> in
output,
shardsAffectedByTeamFailure,
anyZeroHealthyTeams,
*shards,
*trackerCancelled);
shards,
trackerCancelled);
state Future<Void> loggingTrigger = Void();
state Future<Void> readHotDetect = readHotDetector(&self);
state Reference<EventCacheHolder> ddTrackerStatsEventHolder = makeReference<EventCacheHolder>("DDTrackerStats");
@ -1030,7 +1030,7 @@ ACTOR Future<Void> dataDistributionTracker(Reference<InitialDataDistribution> in
}
when(wait(loggingTrigger)) {
TraceEvent("DDTrackerStats", self.distributorId)
.detail("Shards", self.shards.size())
.detail("Shards", self.shards->size())
.detail("TotalSizeBytes", self.dbSizeEstimate->get())
.detail("SystemSizeBytes", self.systemSizeEstimate)
.trackLatest(ddTrackerStatsEventHolder->trackingKey);

View File

@ -18,7 +18,9 @@
* limitations under the License.
*/
#include "fdbserver/EncryptKeyProxyInterface.h"
#include "fdbserver/GetEncryptCipherKeys.h"
#include "flow/IRandom.h"
#include <boost/functional/hash.hpp>
@ -105,8 +107,12 @@ ACTOR Future<std::unordered_map<EncryptCipherDomainId, Reference<BlobCipherKey>>
for (const EKPBaseCipherDetails& details : reply.baseCipherDetails) {
EncryptCipherDomainId domainId = details.encryptDomainId;
if (domains.count(domainId) > 0 && cipherKeys.count(domainId) == 0) {
Reference<BlobCipherKey> cipherKey = cipherKeyCache->insertCipherKey(
domainId, details.baseCipherId, details.baseCipherKey.begin(), details.baseCipherKey.size());
Reference<BlobCipherKey> cipherKey = cipherKeyCache->insertCipherKey(domainId,
details.baseCipherId,
details.baseCipherKey.begin(),
details.baseCipherKey.size(),
details.refreshAt,
details.expireAt);
ASSERT(cipherKey.isValid());
cipherKeys[domainId] = cipherKey;
}
@ -191,10 +197,10 @@ ACTOR Future<std::unordered_map<BlobCipherDetails, Reference<BlobCipherKey>>> ge
// Fetch any uncached cipher keys.
loop choose {
when(EKPGetBaseCipherKeysByIdsReply reply = wait(getUncachedEncryptCipherKeys(db, request))) {
std::unordered_map<BaseCipherIndex, StringRef, boost::hash<BaseCipherIndex>> baseCipherKeys;
std::unordered_map<BaseCipherIndex, EKPBaseCipherDetails, boost::hash<BaseCipherIndex>> baseCipherKeys;
for (const EKPBaseCipherDetails& baseDetails : reply.baseCipherDetails) {
BaseCipherIndex baseIdx = std::make_pair(baseDetails.encryptDomainId, baseDetails.baseCipherId);
baseCipherKeys[baseIdx] = baseDetails.baseCipherKey;
baseCipherKeys[baseIdx] = baseDetails;
}
// Insert base cipher keys into cache and construct result.
for (const BlobCipherDetails& details : cipherDetails) {
@ -211,9 +217,11 @@ ACTOR Future<std::unordered_map<BlobCipherDetails, Reference<BlobCipherKey>>> ge
}
Reference<BlobCipherKey> cipherKey = cipherKeyCache->insertCipherKey(details.encryptDomainId,
details.baseCipherId,
itr->second.begin(),
itr->second.size(),
details.salt);
itr->second.baseCipherKey.begin(),
itr->second.baseCipherKey.size(),
details.salt,
itr->second.refreshAt,
itr->second.expireAt);
ASSERT(cipherKey.isValid());
cipherKeys[details] = cipherKey;
}

View File

@ -18,6 +18,7 @@
* limitations under the License.
*/
#include "fdbclient/ClientKnobs.h"
#include "fdbclient/DatabaseContext.h"
#include "fdbclient/Knobs.h"
#include "fdbclient/Notified.h"
@ -31,6 +32,7 @@
#include "fdbserver/WaitFailure.h"
#include "fdbserver/WorkerInterface.actor.h"
#include "fdbrpc/sim_validation.h"
#include "flow/IRandom.h"
#include "flow/flow.h"
#include "flow/actorcompiler.h" // This must be the last #include.
@ -560,7 +562,9 @@ ACTOR Future<Void> queueGetReadVersionRequests(
// WARNING: this code is run at a high priority, so it needs to do as little work as possible
bool canBeQueued = true;
if (stats->txnRequestIn.getValue() - stats->txnRequestOut.getValue() >
SERVER_KNOBS->START_TRANSACTION_MAX_QUEUE_SIZE) {
SERVER_KNOBS->START_TRANSACTION_MAX_QUEUE_SIZE ||
(g_network->isSimulated() && !g_simulator.speedUpSimulation &&
deterministicRandom()->random01() < 0.01)) {
// When the limit is hit, try to drop requests from the lower priority queues.
if (req.priority == TransactionPriority::BATCH) {
canBeQueued = false;

View File

@ -76,18 +76,20 @@ struct SimKmsConnectorContext : NonCopyable, ReferenceCounted<SimKmsConnectorCon
};
namespace {
Optional<int64_t> getRefreshInterval(int64_t now, int64_t defaultTtl) {
Optional<int64_t> getRefreshInterval(const int64_t now, const int64_t defaultTtl) {
if (BUGGIFY) {
return Optional<int64_t>(now + defaultTtl);
return Optional<int64_t>(now);
}
return Optional<int64_t>();
return Optional<int64_t>(now + defaultTtl);
}
Optional<int64_t> getExpireInterval(Optional<int64_t> refTS) {
Optional<int64_t> getExpireInterval(Optional<int64_t> refTS, const int64_t defaultTtl) {
ASSERT(refTS.present());
if (BUGGIFY) {
return Optional<int64_t>(-1);
}
return refTS;
return (refTS.get() + defaultTtl);
}
} // namespace
@ -105,11 +107,17 @@ ACTOR Future<Void> ekLookupByIds(Reference<SimKmsConnectorContext> ctx,
}
// Lookup corresponding EncryptKeyCtx for input keyId
const int64_t currTS = (int64_t)now();
// Fetch default TTL to avoid BUGGIFY giving different value per invocation causing refTS > expTS
const int64_t defaultTtl = FLOW_KNOBS->ENCRYPT_CIPHER_KEY_CACHE_TTL;
Optional<int64_t> refAtTS = getRefreshInterval(currTS, defaultTtl);
Optional<int64_t> expAtTS = getExpireInterval(refAtTS, defaultTtl);
TraceEvent("SimKms.EKLookupById").detail("RefreshAt", refAtTS).detail("ExpireAt", expAtTS);
for (const auto& item : req.encryptKeyInfos) {
const auto& itr = ctx->simEncryptKeyStore.find(item.baseCipherId);
if (itr != ctx->simEncryptKeyStore.end()) {
rep.cipherKeyDetails.emplace_back_deep(
rep.arena, item.domainId, itr->first, StringRef(itr->second.get()->key));
rep.arena, item.domainId, itr->first, StringRef(itr->second.get()->key), refAtTS, expAtTS);
if (dbgKIdTrace.present()) {
// {encryptDomainId, baseCipherId} forms a unique tuple across encryption domains
@ -145,11 +153,12 @@ ACTOR Future<Void> ekLookupByDomainIds(Reference<SimKmsConnectorContext> ctx,
// Map encryptionDomainId to corresponding EncryptKeyCtx element using a modulo operation. This
// would mean multiple domains gets mapped to the same encryption key which is fine, the
// EncryptKeyStore guarantees that keyId -> plaintext encryptKey mapping is idempotent.
int64_t currTS = (int64_t)now();
const int64_t currTS = (int64_t)now();
// Fetch default TTL to avoid BUGGIFY giving different value per invocation causing refTS > expTS
int64_t defaultTtl = FLOW_KNOBS->ENCRYPT_CIPHER_KEY_CACHE_TTL;
const int64_t defaultTtl = FLOW_KNOBS->ENCRYPT_CIPHER_KEY_CACHE_TTL;
Optional<int64_t> refAtTS = getRefreshInterval(currTS, defaultTtl);
Optional<int64_t> expAtTS = getExpireInterval(refAtTS);
Optional<int64_t> expAtTS = getExpireInterval(refAtTS, defaultTtl);
TraceEvent("SimKms.EKLookupByDomainId").detail("RefreshAt", refAtTS).detail("ExpireAt", expAtTS);
for (const auto& info : req.encryptDomainInfos) {
EncryptCipherBaseKeyId keyId = 1 + abs(info.domainId) % SERVER_KNOBS->SIM_KMS_MAX_KEYS;
const auto& itr = ctx->simEncryptKeyStore.find(keyId);

View File

@ -1078,76 +1078,61 @@ ACTOR static Future<JsonBuilderObject> processStatusFetcher(
return processMap;
}
struct ClientStats {
int count;
std::set<std::pair<NetworkAddress, Key>> examples;
ClientStats() : count(0) {}
};
static JsonBuilderObject clientStatusFetcher(
std::map<NetworkAddress, std::pair<double, OpenDatabaseRequest>>* clientStatusMap) {
JsonBuilderObject clientStatus;
int64_t clientCount = 0;
std::map<Key, ClientStats> issues;
std::map<Standalone<ClientVersionRef>, ClientStats> supportedVersions;
std::map<Key, ClientStats> maxSupportedProtocol;
// Here we handle versions and maxSupportedProtocols, the issues will be handled in getClientIssuesAsMessages
std::map<Standalone<ClientVersionRef>, OpenDatabaseRequest::Samples> supportedVersions;
std::map<Key, OpenDatabaseRequest::Samples> maxSupportedProtocol;
for (auto iter = clientStatusMap->begin(); iter != clientStatusMap->end();) {
if (now() - iter->second.first < 2 * SERVER_KNOBS->COORDINATOR_REGISTER_INTERVAL) {
clientCount += iter->second.second.clientCount;
for (auto& it : iter->second.second.issues) {
auto& issue = issues[it.item];
issue.count += it.count;
issue.examples.insert(it.examples.begin(), it.examples.end());
}
for (auto& it : iter->second.second.supportedVersions) {
auto& version = supportedVersions[it.item];
version.count += it.count;
version.examples.insert(it.examples.begin(), it.examples.end());
}
for (auto& it : iter->second.second.maxProtocolSupported) {
auto& protocolVersion = maxSupportedProtocol[it.item];
protocolVersion.count += it.count;
protocolVersion.examples.insert(it.examples.begin(), it.examples.end());
}
++iter;
} else {
if (now() - iter->second.first >= 2 * SERVER_KNOBS->COORDINATOR_REGISTER_INTERVAL) {
iter = clientStatusMap->erase(iter);
continue;
}
clientCount += iter->second.second.clientCount;
for (const auto& [version, samples] : iter->second.second.supportedVersions) {
supportedVersions[version] += samples;
}
for (const auto& [protocol, samples] : iter->second.second.maxProtocolSupported) {
maxSupportedProtocol[protocol] += samples;
}
++iter;
}
clientStatus["count"] = clientCount;
JsonBuilderArray versionsArray = JsonBuilderArray();
for (auto& cv : supportedVersions) {
for (const auto& [clientVersionRef, samples] : supportedVersions) {
JsonBuilderObject ver;
ver["count"] = (int64_t)cv.second.count;
ver["client_version"] = cv.first.clientVersion.toString();
ver["protocol_version"] = cv.first.protocolVersion.toString();
ver["source_version"] = cv.first.sourceVersion.toString();
ver["count"] = (int64_t)samples.count;
ver["client_version"] = clientVersionRef.clientVersion.toString();
ver["protocol_version"] = clientVersionRef.protocolVersion.toString();
ver["source_version"] = clientVersionRef.sourceVersion.toString();
JsonBuilderArray clients = JsonBuilderArray();
for (auto& client : cv.second.examples) {
for (const auto& [networkAddress, trackLogGroup] : samples.samples) {
JsonBuilderObject cli;
cli["address"] = client.first.toString();
cli["log_group"] = client.second.toString();
cli["address"] = networkAddress.toString();
cli["log_group"] = trackLogGroup.toString();
clients.push_back(cli);
}
auto iter = maxSupportedProtocol.find(cv.first.protocolVersion);
if (iter != maxSupportedProtocol.end()) {
auto iter = maxSupportedProtocol.find(clientVersionRef.protocolVersion);
if (iter != std::end(maxSupportedProtocol)) {
JsonBuilderArray maxClients = JsonBuilderArray();
for (auto& client : iter->second.examples) {
for (const auto& [networkAddress, trackLogGroup] : iter->second.samples) {
JsonBuilderObject cli;
cli["address"] = client.first.toString();
cli["log_group"] = client.second.toString();
cli["address"] = networkAddress.toString();
cli["log_group"] = trackLogGroup.toString();
maxClients.push_back(cli);
}
ver["max_protocol_count"] = iter->second.count;
ver["max_protocol_clients"] = maxClients;
maxSupportedProtocol.erase(cv.first.protocolVersion);
maxSupportedProtocol.erase(clientVersionRef.protocolVersion);
}
ver["connected_clients"] = clients;
@ -2660,18 +2645,19 @@ static JsonBuilderArray getClientIssuesAsMessages(
std::map<std::string, std::pair<int, std::vector<std::string>>> deduplicatedIssues;
for (auto iter = clientStatusMap->begin(); iter != clientStatusMap->end();) {
if (now() - iter->second.first < 2 * SERVER_KNOBS->COORDINATOR_REGISTER_INTERVAL) {
for (auto& issue : iter->second.second.issues) {
auto& t = deduplicatedIssues[issue.item.toString()];
t.first += issue.count;
for (auto& example : issue.examples) {
t.second.push_back(formatIpPort(example.first.ip, example.first.port));
}
}
++iter;
} else {
if (now() - iter->second.first >= 2 * SERVER_KNOBS->COORDINATOR_REGISTER_INTERVAL) {
iter = clientStatusMap->erase(iter);
continue;
}
for (const auto& [issueKey, samples] : iter->second.second.issues) {
auto& t = deduplicatedIssues[issueKey.toString()];
t.first += samples.count;
for (const auto& sample : samples.samples) {
t.second.push_back(formatIpPort(sample.first.ip, sample.first.port));
}
}
++iter;
}
// FIXME: add the log_group in addition to the network address

View File

@ -61,6 +61,8 @@ public:
}
virtual Future<Void> waitForDataDistributionEnabled(const DDEnabledState* ddEnabledState) const = 0;
virtual Future<Void> pollMoveKeysLock(MoveKeysLock lock, const DDEnabledState* ddEnabledState) const = 0;
};
class DDTxnProcessorImpl;
@ -94,6 +96,8 @@ public:
const DatabaseConfiguration& configuration) const override;
Future<Void> waitForDataDistributionEnabled(const DDEnabledState* ddEnabledState) const override;
Future<Void> pollMoveKeysLock(MoveKeysLock lock, const DDEnabledState* ddEnabledState) const override;
};
// A mock transaction implementation for test usage.

View File

@ -181,15 +181,30 @@ ACTOR Future<std::vector<std::pair<uint64_t, double>>> trackInsertionCount(Datab
ACTOR template <class T>
Future<Void> waitForLowInFlight(Database cx, T* workload) {
state Future<Void> timeout = delay(600.0);
loop {
int64_t inFlight = wait(getDataInFlight(cx, workload->dbInfo));
TraceEvent("DynamicWarming").detail("InFlight", inFlight);
if (inFlight > 1e6) { // Wait for just 1 MB to be in flight
wait(delay(1.0));
} else {
wait(delay(1.0));
TraceEvent("DynamicWarmingDone").log();
break;
try {
if (timeout.isReady()) {
throw timed_out();
}
int64_t inFlight = wait(getDataInFlight(cx, workload->dbInfo));
TraceEvent("DynamicWarming").detail("InFlight", inFlight);
if (inFlight > 1e6) { // Wait for just 1 MB to be in flight
wait(delay(1.0));
} else {
wait(delay(1.0));
TraceEvent("DynamicWarmingDone").log();
break;
}
} catch (Error& e) {
if (e.code() == error_code_attribute_not_found) {
// DD may not be initialized yet and attribute "DataInFlight" can be missing
wait(delay(1.0));
} else {
TraceEvent(SevWarn, "WaitForLowInFlightError").error(e);
throw;
}
}
}
return Void();

View File

@ -6495,8 +6495,10 @@ ACTOR Future<Void> fetchKeys(StorageServer* data, AddingShard* shard) {
}
// FIXME: remove when we no longer support upgrades from 5.X
data->cx->enableLocalityLoadBalance = EnableLocalityLoadBalance::True;
TraceEvent(SevWarnAlways, "FKReenableLB").detail("FKID", fetchKeysID);
if (!data->cx->enableLocalityLoadBalance) {
data->cx->enableLocalityLoadBalance = EnableLocalityLoadBalance::True;
TraceEvent(SevWarnAlways, "FKReenableLB").detail("FKID", fetchKeysID);
}
// We have completed the fetch and write of the data, now we wait for MVCC window to pass.
// As we have finished this work, we will allow more work to start...

View File

@ -230,27 +230,6 @@ struct BlobGranuleCorrectnessWorkload : TestWorkload {
}
}
ACTOR Future<Void> setUpBlobRange(Database cx, KeyRange keyRange) {
state Reference<ReadYourWritesTransaction> tr = makeReference<ReadYourWritesTransaction>(cx);
loop {
try {
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
tr->setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
tr->set(blobRangeChangeKey, deterministicRandom()->randomUniqueID().toString());
wait(krmSetRange(tr, blobRangeKeys.begin, keyRange, LiteralStringRef("1")));
wait(tr->commit());
if (BGW_DEBUG) {
fmt::print("Successfully set up blob granule range for tenant range [{0} - {1})\n",
keyRange.begin.printable(),
keyRange.end.printable());
}
return Void();
} catch (Error& e) {
wait(tr->onError(e));
}
}
}
ACTOR Future<TenantMapEntry> setUpTenant(Database cx, TenantName name) {
if (BGW_DEBUG) {
fmt::print("Setting up blob granule range for tenant {0}\n", name.printable());
@ -291,7 +270,8 @@ struct BlobGranuleCorrectnessWorkload : TestWorkload {
self->directories[directoryIdx]->directoryRange =
KeyRangeRef(tenantEntry.prefix, tenantEntry.prefix.withSuffix(normalKeys.end));
tenants.push_back({ self->directories[directoryIdx]->tenantName, tenantEntry });
wait(self->setUpBlobRange(cx, self->directories[directoryIdx]->directoryRange));
bool _success = wait(cx->blobbifyRange(self->directories[directoryIdx]->directoryRange));
ASSERT(_success);
}
tenantData.addTenants(tenants);
@ -911,8 +891,8 @@ struct BlobGranuleCorrectnessWorkload : TestWorkload {
loop {
state Transaction tr(cx, threadData->tenantName);
try {
Standalone<VectorRef<KeyRangeRef>> ranges = wait(tr.getBlobGranuleRanges(normalKeys));
ASSERT(ranges.size() >= 1);
Standalone<VectorRef<KeyRangeRef>> ranges = wait(tr.getBlobGranuleRanges(normalKeys, 1000000));
ASSERT(ranges.size() >= 1 && ranges.size() < 1000000);
ASSERT(ranges.front().begin == normalKeys.begin);
ASSERT(ranges.back().end == normalKeys.end);
for (int i = 0; i < ranges.size() - 1; i++) {

View File

@ -105,6 +105,10 @@ struct BlobGranuleVerifierWorkload : TestWorkload {
clearAndMergeCheck = getOption(options, LiteralStringRef("clearAndMergeCheck"), sharedRandomNumber % 10 == 0);
sharedRandomNumber /= 10;
// don't do strictPurgeChecking or forcePurge if !enablePurging
strictPurgeChecking &= enablePurging;
doForcePurge &= enablePurging;
startedForcePurge = false;
if (doSetup && BGV_DEBUG) {
@ -112,6 +116,8 @@ struct BlobGranuleVerifierWorkload : TestWorkload {
fmt::print(" enablePurging={0}\n", enablePurging);
fmt::print(" strictPurgeChecking={0}\n", strictPurgeChecking);
fmt::print(" doForcePurge={0}\n", doForcePurge);
fmt::print(" initAtEnd={0}\n", initAtEnd);
fmt::print(" clearAndMergeCheck={0}\n", clearAndMergeCheck);
}
ASSERT(threads >= 1);
@ -169,7 +175,7 @@ struct BlobGranuleVerifierWorkload : TestWorkload {
state Transaction tr(cx);
loop {
try {
Standalone<VectorRef<KeyRangeRef>> allGranules = wait(tr.getBlobGranuleRanges(normalKeys));
Standalone<VectorRef<KeyRangeRef>> allGranules = wait(tr.getBlobGranuleRanges(normalKeys, 1000000));
self->granuleRanges.set(allGranules);
break;
} catch (Error& e) {

View File

@ -21,14 +21,17 @@
#include "fdbclient/DatabaseContext.h"
#include "fdbclient/NativeAPI.actor.h"
#include "flow/EncryptUtils.h"
#include "flow/Error.h"
#include "flow/IRandom.h"
#include "flow/BlobCipher.h"
#include "fdbserver/workloads/workloads.actor.h"
#include "flow/flow.h"
#include "flow/ITrace.h"
#include "flow/Trace.h"
#include <chrono>
#include <cstring>
#include <limits>
#include <memory>
#include <random>
@ -111,6 +114,7 @@ struct EncryptionOpsWorkload : TestWorkload {
int pageSize;
int maxBufSize;
std::unique_ptr<uint8_t[]> buff;
int enableTTLTest;
Arena arena;
std::unique_ptr<WorkloadMetrics> metrics;
@ -121,7 +125,7 @@ struct EncryptionOpsWorkload : TestWorkload {
EncryptCipherBaseKeyId headerBaseCipherId;
EncryptCipherRandomSalt headerRandomSalt;
EncryptionOpsWorkload(WorkloadContext const& wcx) : TestWorkload(wcx) {
EncryptionOpsWorkload(WorkloadContext const& wcx) : TestWorkload(wcx), enableTTLTest(false) {
mode = getOption(options, LiteralStringRef("fixedSize"), 1);
numIterations = getOption(options, LiteralStringRef("numIterations"), 10);
pageSize = getOption(options, LiteralStringRef("pageSize"), 4096);
@ -136,13 +140,18 @@ struct EncryptionOpsWorkload : TestWorkload {
metrics = std::make_unique<WorkloadMetrics>();
if (wcx.clientId == 0 && mode == 1) {
enableTTLTest = true;
}
TraceEvent("EncryptionOpsWorkload")
.detail("Mode", getModeStr())
.detail("MinDomainId", minDomainId)
.detail("MaxDomainId", maxDomainId);
.detail("MaxDomainId", maxDomainId)
.detail("EnableTTL", enableTTLTest);
}
~EncryptionOpsWorkload() { TraceEvent("EncryptionOpsWorkload_Done").log(); }
~EncryptionOpsWorkload() { TraceEvent("EncryptionOpsWorkload.Done").log(); }
bool isFixedSizePayload() { return mode == 1; }
@ -165,14 +174,19 @@ struct EncryptionOpsWorkload : TestWorkload {
void setupCipherEssentials() {
Reference<BlobCipherKeyCache> cipherKeyCache = BlobCipherKeyCache::getInstance();
TraceEvent("SetupCipherEssentials_Start").detail("MinDomainId", minDomainId).detail("MaxDomainId", maxDomainId);
TraceEvent("SetupCipherEssentials.Start").detail("MinDomainId", minDomainId).detail("MaxDomainId", maxDomainId);
uint8_t buff[AES_256_KEY_LENGTH];
std::vector<Reference<BlobCipherKey>> cipherKeys;
int cipherLen = 0;
for (EncryptCipherDomainId id = minDomainId; id <= maxDomainId; id++) {
generateRandomBaseCipher(AES_256_KEY_LENGTH, &buff[0], &cipherLen);
cipherKeyCache->insertCipherKey(id, minBaseCipherId, buff, cipherLen);
cipherKeyCache->insertCipherKey(id,
minBaseCipherId,
buff,
cipherLen,
std::numeric_limits<int64_t>::max(),
std::numeric_limits<int64_t>::max());
ASSERT(cipherLen > 0 && cipherLen <= AES_256_KEY_LENGTH);
@ -183,13 +197,18 @@ struct EncryptionOpsWorkload : TestWorkload {
// insert the Encrypt Header cipherKey; record cipherDetails as getLatestCipher() may not work with multiple
// test clients
generateRandomBaseCipher(AES_256_KEY_LENGTH, &buff[0], &cipherLen);
cipherKeyCache->insertCipherKey(ENCRYPT_HEADER_DOMAIN_ID, headerBaseCipherId, buff, cipherLen);
cipherKeyCache->insertCipherKey(ENCRYPT_HEADER_DOMAIN_ID,
headerBaseCipherId,
buff,
cipherLen,
std::numeric_limits<int64_t>::max(),
std::numeric_limits<int64_t>::max());
Reference<BlobCipherKey> latestCipher = cipherKeyCache->getLatestCipherKey(ENCRYPT_HEADER_DOMAIN_ID);
ASSERT_EQ(latestCipher->getBaseCipherId(), headerBaseCipherId);
ASSERT_EQ(memcmp(latestCipher->rawBaseCipher(), buff, cipherLen), 0);
headerRandomSalt = latestCipher->getSalt();
TraceEvent("SetupCipherEssentials_Done")
TraceEvent("SetupCipherEssentials.Done")
.detail("MinDomainId", minDomainId)
.detail("MaxDomainId", maxDomainId)
.detail("HeaderBaseCipherId", headerBaseCipherId)
@ -198,9 +217,14 @@ struct EncryptionOpsWorkload : TestWorkload {
void resetCipherEssentials() {
Reference<BlobCipherKeyCache> cipherKeyCache = BlobCipherKeyCache::getInstance();
cipherKeyCache->cleanup();
for (EncryptCipherDomainId id = minDomainId; id <= maxDomainId; id++) {
cipherKeyCache->resetEncryptDomainId(id);
}
TraceEvent("ResetCipherEssentials_Done").log();
cipherKeyCache->resetEncryptDomainId(SYSTEM_KEYSPACE_ENCRYPT_DOMAIN_ID);
cipherKeyCache->resetEncryptDomainId(ENCRYPT_HEADER_DOMAIN_ID);
TraceEvent("ResetCipherEssentials.Done").log();
}
void updateLatestBaseCipher(const EncryptCipherDomainId encryptDomainId,
@ -232,7 +256,9 @@ struct EncryptionOpsWorkload : TestWorkload {
baseCipherId,
cipherKey->rawBaseCipher(),
cipherKey->getBaseCipherLen(),
cipherKey->getSalt());
cipherKey->getSalt(),
std::numeric_limits<int64_t>::max(),
std::numeric_limits<int64_t>::max());
// Ensure the update was a NOP
Reference<BlobCipherKey> cKey = cipherKeyCache->getCipherKey(domainId, baseCipherId, salt);
ASSERT(cKey->isEqual(cipherKey));
@ -297,11 +323,7 @@ struct EncryptionOpsWorkload : TestWorkload {
metrics->updateDecryptionTime(std::chrono::duration<double, std::nano>(end - start).count());
}
Future<Void> setup(Database const& ctx) override { return Void(); }
std::string description() const override { return "EncryptionOps"; }
Future<Void> start(Database const& cx) override {
void testBlobCipherKeyCacheOps() {
uint8_t baseCipher[AES_256_KEY_LENGTH];
int baseCipherLen = 0;
EncryptCipherBaseKeyId nextBaseCipherId;
@ -322,7 +344,12 @@ struct EncryptionOpsWorkload : TestWorkload {
if (updateBaseCipher) {
// simulate baseCipherId getting refreshed/updated
updateLatestBaseCipher(encryptDomainId, &baseCipher[0], &baseCipherLen, &nextBaseCipherId);
cipherKeyCache->insertCipherKey(encryptDomainId, nextBaseCipherId, &baseCipher[0], baseCipherLen);
cipherKeyCache->insertCipherKey(encryptDomainId,
nextBaseCipherId,
&baseCipher[0],
baseCipherLen,
std::numeric_limits<int64_t>::max(),
std::numeric_limits<int64_t>::max());
}
auto start = std::chrono::high_resolution_clock::now();
@ -368,6 +395,103 @@ struct EncryptionOpsWorkload : TestWorkload {
// Cleanup cipherKeys
resetCipherEssentials();
}
static void compareCipherDetails(Reference<BlobCipherKey> cipherKey,
const EncryptCipherDomainId domId,
const EncryptCipherBaseKeyId baseCipherId,
const uint8_t* baseCipher,
const int baseCipherLen,
const int64_t refreshAt,
const int64_t expAt) {
ASSERT(cipherKey.isValid());
ASSERT_EQ(cipherKey->getDomainId(), domId);
ASSERT_EQ(cipherKey->getBaseCipherId(), baseCipherId);
ASSERT_EQ(memcmp(cipherKey->rawBaseCipher(), baseCipher, baseCipherLen), 0);
ASSERT_EQ(cipherKey->getRefreshAtTS(), refreshAt);
ASSERT_EQ(cipherKey->getExpireAtTS(), expAt);
}
ACTOR Future<Void> testBlobCipherKeyCacheTTL(EncryptionOpsWorkload* self) {
state Reference<BlobCipherKeyCache> cipherKeyCache = BlobCipherKeyCache::getInstance();
state EncryptCipherDomainId domId = deterministicRandom()->randomInt(120000, 150000);
state EncryptCipherBaseKeyId baseCipherId = deterministicRandom()->randomInt(786, 1024);
state std::unique_ptr<uint8_t[]> baseCipher = std::make_unique<uint8_t[]>(AES_256_KEY_LENGTH);
state Reference<BlobCipherKey> cipherKey;
state EncryptCipherRandomSalt salt;
state int64_t refreshAt;
state int64_t expAt;
TraceEvent("TestBlobCipherCacheTTL.Start").detail("DomId", domId);
deterministicRandom()->randomBytes(baseCipher.get(), AES_256_KEY_LENGTH);
// Validate 'non-revocable' cipher with no expiration
refreshAt = std::numeric_limits<int64_t>::max();
expAt = std::numeric_limits<int64_t>::max();
cipherKeyCache->insertCipherKey(domId, baseCipherId, baseCipher.get(), AES_256_KEY_LENGTH, refreshAt, expAt);
cipherKey = cipherKeyCache->getLatestCipherKey(domId);
compareCipherDetails(cipherKey, domId, baseCipherId, baseCipher.get(), AES_256_KEY_LENGTH, refreshAt, expAt);
TraceEvent("TestBlobCipherCacheTTL.NonRevocableNoExpiry").detail("DomId", domId);
// Validate 'non-revocable' cipher with expiration
state EncryptCipherBaseKeyId baseCipherId_1 = baseCipherId + 1;
refreshAt = now() + 5;
cipherKeyCache->insertCipherKey(domId, baseCipherId_1, baseCipher.get(), AES_256_KEY_LENGTH, refreshAt, expAt);
cipherKey = cipherKeyCache->getLatestCipherKey(domId);
ASSERT(cipherKey.isValid());
compareCipherDetails(cipherKey, domId, baseCipherId_1, baseCipher.get(), AES_256_KEY_LENGTH, refreshAt, expAt);
salt = cipherKey->getSalt();
wait(delayUntil(refreshAt));
// Ensure that latest cipherKey needs refresh, however, cipher lookup works (non-revocable)
cipherKey = cipherKeyCache->getLatestCipherKey(domId);
ASSERT(!cipherKey.isValid());
cipherKey = cipherKeyCache->getCipherKey(domId, baseCipherId_1, salt);
ASSERT(cipherKey.isValid());
compareCipherDetails(cipherKey, domId, baseCipherId_1, baseCipher.get(), AES_256_KEY_LENGTH, refreshAt, expAt);
TraceEvent("TestBlobCipherCacheTTL.NonRevocableWithExpiry").detail("DomId", domId);
// Validate 'revocable' cipher with expiration
state EncryptCipherBaseKeyId baseCipherId_2 = baseCipherId + 2;
refreshAt = now() + 5;
expAt = refreshAt + 5;
cipherKeyCache->insertCipherKey(domId, baseCipherId_2, baseCipher.get(), AES_256_KEY_LENGTH, refreshAt, expAt);
cipherKey = cipherKeyCache->getLatestCipherKey(domId);
ASSERT(cipherKey.isValid());
compareCipherDetails(cipherKey, domId, baseCipherId_2, baseCipher.get(), AES_256_KEY_LENGTH, refreshAt, expAt);
salt = cipherKey->getSalt();
wait(delayUntil(refreshAt));
// Ensure that latest cipherKey needs refresh, however, cipher lookup works (non-revocable)
cipherKey = cipherKeyCache->getLatestCipherKey(domId);
ASSERT(!cipherKey.isValid());
cipherKey = cipherKeyCache->getCipherKey(domId, baseCipherId_2, salt);
ASSERT(cipherKey.isValid());
compareCipherDetails(cipherKey, domId, baseCipherId_2, baseCipher.get(), AES_256_KEY_LENGTH, refreshAt, expAt);
wait(delayUntil(expAt));
// Ensure that cipherKey lookup doesn't work after expiry
cipherKey = cipherKeyCache->getLatestCipherKey(domId);
ASSERT(!cipherKey.isValid());
cipherKey = cipherKeyCache->getCipherKey(domId, baseCipherId_2, salt);
ASSERT(!cipherKey.isValid());
TraceEvent("TestBlobCipherCacheTTL.End").detail("DomId", domId);
return Void();
}
Future<Void> setup(Database const& ctx) override { return Void(); }
std::string description() const override { return "EncryptionOps"; }
Future<Void> start(Database const& cx) override { return _start(cx, this); }
ACTOR Future<Void> _start(Database cx, EncryptionOpsWorkload* self) {
self->testBlobCipherKeyCacheOps();
if (self->enableTTLTest) {
wait(self->testBlobCipherKeyCacheTTL(self));
}
return Void();
}

View File

@ -63,7 +63,7 @@ struct ExceptionContract {
e.code() == error_code_future_version || e.code() == error_code_transaction_cancelled ||
e.code() == error_code_key_too_large || e.code() == error_code_value_too_large ||
e.code() == error_code_process_behind || e.code() == error_code_batch_transaction_throttled ||
e.code() == error_code_tag_throttled) {
e.code() == error_code_tag_throttled || e.code() == error_code_grv_proxy_memory_limit_exceeded) {
return;
}

View File

@ -18,6 +18,7 @@
* limitations under the License.
*/
#include "fdbclient/FDBTypes.h"
#include "fdbserver/workloads/workloads.actor.h"
#include <fdbserver/Knobs.h>
#include <flow/actorcompiler.h>
@ -82,7 +83,16 @@ struct LocalRatekeeperWorkload : TestWorkload {
.detail("Actual", metrics.localRateLimit);
}
tr.reset();
Version readVersion = wait(tr.getReadVersion());
state Version readVersion = invalidVersion;
loop {
try {
Version v = wait(tr.getReadVersion());
readVersion = v;
break;
} catch (Error& e) {
wait(tr.onError(e));
}
}
requests.clear();
// we send 100 requests to this storage node and count how many of those get rejected
for (int i = 0; i < 100; ++i) {

View File

@ -873,31 +873,34 @@ struct SpecialKeySpaceCorrectnessWorkload : TestWorkload {
}
TraceEvent(SevDebug, "DatabaseLocked").log();
// if database locked, fdb read should get database_locked error
try {
tx->reset();
tx->setOption(FDBTransactionOptions::RAW_ACCESS);
RangeResult res = wait(tx->getRange(normalKeys, 1));
} catch (Error& e) {
if (e.code() == error_code_actor_cancelled)
throw;
ASSERT(e.code() == error_code_database_locked);
tx->reset();
loop {
try {
tx->setOption(FDBTransactionOptions::RAW_ACCESS);
RangeResult res = wait(tx->getRange(normalKeys, 1));
} catch (Error& e) {
if (e.code() == error_code_actor_cancelled)
throw;
if (e.code() == error_code_grv_proxy_memory_limit_exceeded ||
e.code() == error_code_batch_transaction_throttled) {
wait(tx->onError(e));
} else {
ASSERT(e.code() == error_code_database_locked);
break;
}
}
}
// make sure we unlock the database
// unlock is idempotent, thus we can commit many times until successful
tx->reset();
loop {
try {
tx->reset();
tx->setOption(FDBTransactionOptions::RAW_ACCESS);
tx->setOption(FDBTransactionOptions::SPECIAL_KEY_SPACE_ENABLE_WRITES);
// unlock the database
tx->clear(SpecialKeySpace::getManagementApiCommandPrefix("lock"));
wait(tx->commit());
TraceEvent(SevDebug, "DatabaseUnlocked").log();
tx->reset();
// read should be successful
tx->setOption(FDBTransactionOptions::RAW_ACCESS);
RangeResult res = wait(tx->getRange(normalKeys, 1));
tx->reset();
break;
} catch (Error& e) {
TraceEvent(SevDebug, "DatabaseUnlockFailure").error(e);
@ -905,9 +908,23 @@ struct SpecialKeySpaceCorrectnessWorkload : TestWorkload {
wait(tx->onError(e));
}
}
tx->reset();
loop {
try {
// read should be successful
tx->setOption(FDBTransactionOptions::RAW_ACCESS);
RangeResult res = wait(tx->getRange(normalKeys, 1));
break;
} catch (Error& e) {
wait(tx->onError(e));
}
}
// test consistencycheck which only used by ConsistencyCheck Workload
// Note: we have exclusive ownership of fdbShouldConsistencyCheckBeSuspended,
// no existing workloads can modify the key
tx->reset();
{
try {
tx->setOption(FDBTransactionOptions::READ_SYSTEM_KEYS);

View File

@ -606,6 +606,23 @@ struct TenantManagementWorkload : TestWorkload {
return Void();
}
// Returns GRV and eats GRV errors
ACTOR static Future<Version> getReadVersion(Reference<ReadYourWritesTransaction> tr) {
loop {
try {
Version version = wait(tr->getReadVersion());
return version;
} catch (Error& e) {
if (e.code() == error_code_grv_proxy_memory_limit_exceeded ||
e.code() == error_code_batch_transaction_throttled) {
wait(tr->onError(e));
} else {
throw;
}
}
}
}
ACTOR static Future<Void> deleteTenant(TenantManagementWorkload* self) {
state TenantName beginTenant = self->chooseTenantName(true);
state OperationType operationType = self->randomOperationType();
@ -695,7 +712,7 @@ struct TenantManagementWorkload : TestWorkload {
state bool retried = false;
loop {
try {
state Version beforeVersion = wait(tr->getReadVersion());
state Version beforeVersion = wait(self->getReadVersion(tr));
Optional<Void> result =
wait(timeout(deleteImpl(tr, beginTenant, endTenant, tenants, operationType, self),
deterministicRandom()->randomInt(1, 30)));
@ -704,7 +721,7 @@ struct TenantManagementWorkload : TestWorkload {
if (anyExists) {
if (self->oldestDeletionVersion == 0 && !tenants.empty()) {
tr->reset();
Version afterVersion = wait(tr->getReadVersion());
Version afterVersion = wait(self->getReadVersion(tr));
self->oldestDeletionVersion = afterVersion;
}
self->newestDeletionVersion = beforeVersion;
@ -727,6 +744,11 @@ struct TenantManagementWorkload : TestWorkload {
operationType == OperationType::MANAGEMENT_DATABASE);
ASSERT(retried);
break;
} else if (e.code() == error_code_grv_proxy_memory_limit_exceeded ||
e.code() == error_code_batch_transaction_throttled) {
// GRV proxy returns an error
wait(tr->onError(e));
continue;
} else {
throw;
}

View File

@ -125,6 +125,10 @@ struct UnitTestWorkload : TestWorkload {
}
}
std::sort(tests.begin(), tests.end(), [](auto lhs, auto rhs) {
return std::string_view(lhs->name) < std::string_view(rhs->name);
});
fprintf(stdout, "Found %zu tests\n", tests.size());
if (tests.size() == 0) {

View File

@ -20,21 +20,34 @@
#include "flow/BlobCipher.h"
#include "flow/Arena.h"
#include "flow/EncryptUtils.h"
#include "flow/Knobs.h"
#include "flow/Error.h"
#include "flow/FastRef.h"
#include "flow/IRandom.h"
#include "flow/ITrace.h"
#include "flow/flow.h"
#include "flow/network.h"
#include "flow/Trace.h"
#include "flow/UnitTest.h"
#include <chrono>
#include <cstring>
#include <limits>
#include <memory>
#include <string>
#include <thread>
#include <utility>
#ifndef _WIN32
#include <unistd.h>
#else
#include <io.h>
#endif
#define BLOB_CIPHER_DEBUG false
namespace {
bool isEncryptHeaderAuthTokenModeValid(const EncryptAuthTokenMode mode) {
return mode >= ENCRYPT_HEADER_AUTH_TOKEN_MODE_NONE && mode < ENCRYPT_HEADER_AUTH_TOKEN_LAST;
@ -46,29 +59,43 @@ bool isEncryptHeaderAuthTokenModeValid(const EncryptAuthTokenMode mode) {
BlobCipherKey::BlobCipherKey(const EncryptCipherDomainId& domainId,
const EncryptCipherBaseKeyId& baseCiphId,
const uint8_t* baseCiph,
int baseCiphLen) {
int baseCiphLen,
const int64_t refreshAt,
const int64_t expireAt) {
// Salt generated is used while applying HMAC Key derivation, hence, not using crypto-secure hash algorithm is ok.
// Further, 'deterministic' salt generation is used to preserve simulation determinism properties.
EncryptCipherRandomSalt salt;
if (g_network->isSimulated()) {
salt = deterministicRandom()->randomUInt64();
} else {
salt = nondeterministicRandom()->randomUInt64();
}
initKey(domainId, baseCiph, baseCiphLen, baseCiphId, salt);
// Support two type of CipherKeys: 'revocable' & 'non-revocable' ciphers.
// In all cases, either cipherKey never expires i.e. refreshAt == infinite, or, refreshAt needs <= expireAt
// timestamp.
ASSERT(refreshAt == std::numeric_limits<int64_t>::max() || (refreshAt <= expireAt));
initKey(domainId, baseCiph, baseCiphLen, baseCiphId, salt, refreshAt, expireAt);
}
BlobCipherKey::BlobCipherKey(const EncryptCipherDomainId& domainId,
const EncryptCipherBaseKeyId& baseCiphId,
const uint8_t* baseCiph,
int baseCiphLen,
const EncryptCipherRandomSalt& salt) {
initKey(domainId, baseCiph, baseCiphLen, baseCiphId, salt);
const EncryptCipherRandomSalt& salt,
const int64_t refreshAt,
const int64_t expireAt) {
initKey(domainId, baseCiph, baseCiphLen, baseCiphId, salt, refreshAt, expireAt);
}
void BlobCipherKey::initKey(const EncryptCipherDomainId& domainId,
const uint8_t* baseCiph,
int baseCiphLen,
const EncryptCipherBaseKeyId& baseCiphId,
const EncryptCipherRandomSalt& salt) {
const EncryptCipherRandomSalt& salt,
const int64_t refreshAt,
const int64_t expireAt) {
// Set the base encryption key properties
baseCipher = std::make_unique<uint8_t[]>(AES_256_KEY_LENGTH);
memset(baseCipher.get(), 0, AES_256_KEY_LENGTH);
@ -82,15 +109,19 @@ void BlobCipherKey::initKey(const EncryptCipherDomainId& domainId,
cipher = std::make_unique<uint8_t[]>(AES_256_KEY_LENGTH);
memset(cipher.get(), 0, AES_256_KEY_LENGTH);
applyHmacSha256Derivation();
// update the key creation time
creationTime = now();
// update cipher 'refresh' and 'expire' TS
refreshAtTS = refreshAt;
expireAtTS = expireAt;
TraceEvent("BlobCipherKey")
#if BLOB_CIPHER_DEBUG
TraceEvent(SevDebug, "BlobCipher.KeyInit")
.detail("DomainId", domainId)
.detail("BaseCipherId", baseCipherId)
.detail("BaseCipherLen", baseCipherLen)
.detail("RandomSalt", randomSalt)
.detail("CreationTime", creationTime);
.detail("RefreshAt", refreshAtTS)
.detail("ExpireAtTS", expireAtTS);
#endif
}
void BlobCipherKey::applyHmacSha256Derivation() {
@ -118,7 +149,7 @@ BlobCipherKeyIdCache::BlobCipherKeyIdCache()
BlobCipherKeyIdCache::BlobCipherKeyIdCache(EncryptCipherDomainId dId)
: domainId(dId), latestBaseCipherKeyId(), latestRandomSalt() {
TraceEvent("Init_BlobCipherKeyIdCache").detail("DomainId", domainId);
TraceEvent(SevInfo, "BlobCipher.KeyIdCacheInit").detail("DomainId", domainId);
}
BlobCipherKeyIdCacheKey BlobCipherKeyIdCache::getCacheKey(const EncryptCipherBaseKeyId& baseCipherKeyId,
@ -151,7 +182,9 @@ Reference<BlobCipherKey> BlobCipherKeyIdCache::getCipherByBaseCipherId(const Enc
Reference<BlobCipherKey> BlobCipherKeyIdCache::insertBaseCipherKey(const EncryptCipherBaseKeyId& baseCipherId,
const uint8_t* baseCipher,
int baseCipherLen) {
int baseCipherLen,
const int64_t refreshAt,
const int64_t expireAt) {
ASSERT_GT(baseCipherId, ENCRYPT_INVALID_CIPHER_KEY_ID);
// BaseCipherKeys are immutable, given the routine invocation updates 'latestCipher',
@ -159,21 +192,30 @@ Reference<BlobCipherKey> BlobCipherKeyIdCache::insertBaseCipherKey(const Encrypt
Reference<BlobCipherKey> latestCipherKey = getLatestCipherKey();
if (latestCipherKey.isValid() && latestCipherKey->getBaseCipherId() == baseCipherId) {
if (memcmp(latestCipherKey->rawBaseCipher(), baseCipher, baseCipherLen) == 0) {
TraceEvent("InsertBaseCipherKey_AlreadyPresent")
#if BLOB_CIPHER_DEBUG
TraceEvent(SevDebug, "InsertBaseCipherKey_AlreadyPresent")
.detail("BaseCipherKeyId", baseCipherId)
.detail("DomainId", domainId);
#endif
// Key is already present; nothing more to do.
return latestCipherKey;
} else {
TraceEvent("InsertBaseCipherKey_UpdateCipher")
TraceEvent(SevInfo, "BlobCipher.UpdatetBaseCipherKey")
.detail("BaseCipherKeyId", baseCipherId)
.detail("DomainId", domainId);
throw encrypt_update_cipher();
}
}
TraceEvent(SevInfo, "BlobCipherKey.InsertBaseCipherKeyLatest")
.detail("DomainId", domainId)
.detail("BaseCipherId", baseCipherId)
.detail("RefreshAt", refreshAt)
.detail("ExpireAt", expireAt);
Reference<BlobCipherKey> cipherKey =
makeReference<BlobCipherKey>(domainId, baseCipherId, baseCipher, baseCipherLen);
makeReference<BlobCipherKey>(domainId, baseCipherId, baseCipher, baseCipherLen, refreshAt, expireAt);
BlobCipherKeyIdCacheKey cacheKey = getCacheKey(cipherKey->getBaseCipherId(), cipherKey->getSalt());
keyIdCache.emplace(cacheKey, cipherKey);
@ -187,7 +229,9 @@ Reference<BlobCipherKey> BlobCipherKeyIdCache::insertBaseCipherKey(const Encrypt
Reference<BlobCipherKey> BlobCipherKeyIdCache::insertBaseCipherKey(const EncryptCipherBaseKeyId& baseCipherId,
const uint8_t* baseCipher,
int baseCipherLen,
const EncryptCipherRandomSalt& salt) {
const EncryptCipherRandomSalt& salt,
const int64_t refreshAt,
const int64_t expireAt) {
ASSERT_NE(baseCipherId, ENCRYPT_INVALID_CIPHER_KEY_ID);
ASSERT_NE(salt, ENCRYPT_INVALID_RANDOM_SALT);
@ -197,21 +241,31 @@ Reference<BlobCipherKey> BlobCipherKeyIdCache::insertBaseCipherKey(const Encrypt
BlobCipherKeyIdCacheMapCItr itr = keyIdCache.find(cacheKey);
if (itr != keyIdCache.end()) {
if (memcmp(itr->second->rawBaseCipher(), baseCipher, baseCipherLen) == 0) {
TraceEvent("InsertBaseCipherKey_AlreadyPresent")
#if BLOB_CIPHER_DEBUG
TraceEvent(SevDebug, "InsertBaseCipherKey_AlreadyPresent")
.detail("BaseCipherKeyId", baseCipherId)
.detail("DomainId", domainId);
#endif
// Key is already present; nothing more to do.
return itr->second;
} else {
TraceEvent("InsertBaseCipherKey_UpdateCipher")
TraceEvent(SevInfo, "BlobCipher.UpdateBaseCipherKey")
.detail("BaseCipherKeyId", baseCipherId)
.detail("DomainId", domainId);
throw encrypt_update_cipher();
}
}
TraceEvent(SevInfo, "BlobCipherKey.InsertBaseCipherKey")
.detail("DomainId", domainId)
.detail("BaseCipherId", baseCipherId)
.detail("Salt", salt)
.detail("RefreshAt", refreshAt)
.detail("ExpireAt", expireAt);
Reference<BlobCipherKey> cipherKey =
makeReference<BlobCipherKey>(domainId, baseCipherId, baseCipher, baseCipherLen, salt);
makeReference<BlobCipherKey>(domainId, baseCipherId, baseCipher, baseCipherLen, salt, refreshAt, expireAt);
keyIdCache.emplace(cacheKey, cipherKey);
return cipherKey;
}
@ -237,7 +291,9 @@ std::vector<Reference<BlobCipherKey>> BlobCipherKeyIdCache::getAllCipherKeys() {
Reference<BlobCipherKey> BlobCipherKeyCache::insertCipherKey(const EncryptCipherDomainId& domainId,
const EncryptCipherBaseKeyId& baseCipherId,
const uint8_t* baseCipher,
int baseCipherLen) {
int baseCipherLen,
const int64_t refreshAt,
const int64_t expireAt) {
if (domainId == ENCRYPT_INVALID_DOMAIN_ID || baseCipherId == ENCRYPT_INVALID_CIPHER_KEY_ID) {
throw encrypt_invalid_id();
}
@ -248,18 +304,18 @@ Reference<BlobCipherKey> BlobCipherKeyCache::insertCipherKey(const EncryptCipher
// Add mapping to track new encryption domain
Reference<BlobCipherKeyIdCache> keyIdCache = makeReference<BlobCipherKeyIdCache>(domainId);
Reference<BlobCipherKey> cipherKey =
keyIdCache->insertBaseCipherKey(baseCipherId, baseCipher, baseCipherLen);
keyIdCache->insertBaseCipherKey(baseCipherId, baseCipher, baseCipherLen, refreshAt, expireAt);
domainCacheMap.emplace(domainId, keyIdCache);
return cipherKey;
} else {
// Track new baseCipher keys
Reference<BlobCipherKeyIdCache> keyIdCache = domainItr->second;
return keyIdCache->insertBaseCipherKey(baseCipherId, baseCipher, baseCipherLen);
return keyIdCache->insertBaseCipherKey(baseCipherId, baseCipher, baseCipherLen, refreshAt, expireAt);
}
TraceEvent("InsertCipherKey").detail("DomainId", domainId).detail("BaseCipherKeyId", baseCipherId);
} catch (Error& e) {
TraceEvent("InsertCipherKey_Failed").detail("BaseCipherKeyId", baseCipherId).detail("DomainId", domainId);
TraceEvent(SevWarn, "BlobCipher.InsertCipherKeyFailed")
.detail("BaseCipherKeyId", baseCipherId)
.detail("DomainId", domainId);
throw;
}
}
@ -268,7 +324,9 @@ Reference<BlobCipherKey> BlobCipherKeyCache::insertCipherKey(const EncryptCipher
const EncryptCipherBaseKeyId& baseCipherId,
const uint8_t* baseCipher,
int baseCipherLen,
const EncryptCipherRandomSalt& salt) {
const EncryptCipherRandomSalt& salt,
const int64_t refreshAt,
const int64_t expireAt) {
if (domainId == ENCRYPT_INVALID_DOMAIN_ID || baseCipherId == ENCRYPT_INVALID_CIPHER_KEY_ID ||
salt == ENCRYPT_INVALID_RANDOM_SALT) {
throw encrypt_invalid_id();
@ -280,20 +338,17 @@ Reference<BlobCipherKey> BlobCipherKeyCache::insertCipherKey(const EncryptCipher
if (domainItr == domainCacheMap.end()) {
// Add mapping to track new encryption domain
Reference<BlobCipherKeyIdCache> keyIdCache = makeReference<BlobCipherKeyIdCache>(domainId);
cipherKey = keyIdCache->insertBaseCipherKey(baseCipherId, baseCipher, baseCipherLen, salt);
cipherKey =
keyIdCache->insertBaseCipherKey(baseCipherId, baseCipher, baseCipherLen, salt, refreshAt, expireAt);
domainCacheMap.emplace(domainId, keyIdCache);
} else {
// Track new baseCipher keys
Reference<BlobCipherKeyIdCache> keyIdCache = domainItr->second;
cipherKey = keyIdCache->insertBaseCipherKey(baseCipherId, baseCipher, baseCipherLen, salt);
cipherKey =
keyIdCache->insertBaseCipherKey(baseCipherId, baseCipher, baseCipherLen, salt, refreshAt, expireAt);
}
TraceEvent("InsertCipherKey")
.detail("DomainId", domainId)
.detail("BaseCipherKeyId", baseCipherId)
.detail("Salt", salt);
} catch (Error& e) {
TraceEvent("InsertCipherKey_Failed")
TraceEvent(SevWarn, "BlobCipher.InsertCipherKey_Failed")
.detail("BaseCipherKeyId", baseCipherId)
.detail("DomainId", domainId)
.detail("Salt", salt);
@ -305,21 +360,27 @@ Reference<BlobCipherKey> BlobCipherKeyCache::insertCipherKey(const EncryptCipher
Reference<BlobCipherKey> BlobCipherKeyCache::getLatestCipherKey(const EncryptCipherDomainId& domainId) {
if (domainId == ENCRYPT_INVALID_DOMAIN_ID) {
TraceEvent("GetLatestCipherKey_InvalidID").detail("DomainId", domainId);
TraceEvent(SevWarn, "BlobCipher.GetLatestCipherKeyInvalidID").detail("DomainId", domainId);
throw encrypt_invalid_id();
}
auto domainItr = domainCacheMap.find(domainId);
if (domainItr == domainCacheMap.end()) {
TraceEvent("GetLatestCipherKey_DomainNotFound").detail("DomainId", domainId);
TraceEvent(SevInfo, "BlobCipher.GetLatestCipherKeyDomainNotFound").detail("DomainId", domainId);
return Reference<BlobCipherKey>();
}
Reference<BlobCipherKeyIdCache> keyIdCache = domainItr->second;
Reference<BlobCipherKey> cipherKey = keyIdCache->getLatestCipherKey();
if (cipherKey.isValid() && (now() - cipherKey->getCreationTime()) > FLOW_KNOBS->ENCRYPT_CIPHER_KEY_CACHE_TTL) {
TraceEvent("GetLatestCipherKey_ExpiredTTL")
// Ensure 'freshness' guarantees for the latestCipher
if (cipherKey.isValid() && cipherKey->needsRefresh()) {
#if BLOB_CIPHER_DEBUG
TraceEvent("SevDebug, BlobCipher.GetLatestNeedsRefresh")
.detail("DomainId", domainId)
.detail("BaseCipherId", cipherKey->getBaseCipherId());
.detail("Now", now())
.detail("RefreshAt", cipherKey->getRefreshAtTS());
#endif
return Reference<BlobCipherKey>();
}
@ -335,7 +396,22 @@ Reference<BlobCipherKey> BlobCipherKeyCache::getCipherKey(const EncryptCipherDom
}
Reference<BlobCipherKeyIdCache> keyIdCache = domainItr->second;
return keyIdCache->getCipherByBaseCipherId(baseCipherId, salt);
Reference<BlobCipherKey> cipherKey = keyIdCache->getCipherByBaseCipherId(baseCipherId, salt);
// Ensure 'liveness' guarantees for the cipher
if (cipherKey.isValid() && cipherKey->isExpired()) {
#if BLOB_CIPHER_DEBUG
TraceEvent(SevDebug, "BlobCipher.GetCipherExpired")
.detail("DomainId", domainId)
.detail("BaseCipherId", baseCipherId)
.detail("Now", now())
.detail("ExpireAt", cipherKey->getExpireAtTS());
#endif
return Reference<BlobCipherKey>();
}
return cipherKey;
}
void BlobCipherKeyCache::resetEncryptDomainId(const EncryptCipherDomainId domainId) {
@ -346,15 +422,18 @@ void BlobCipherKeyCache::resetEncryptDomainId(const EncryptCipherDomainId domain
Reference<BlobCipherKeyIdCache> keyIdCache = domainItr->second;
keyIdCache->cleanup();
TraceEvent("ResetEncryptDomainId").detail("DomainId", domainId);
TraceEvent(SevInfo, "BlobCipher.ResetEncryptDomainId").detail("DomainId", domainId);
}
void BlobCipherKeyCache::cleanup() noexcept {
Reference<BlobCipherKeyCache> instance = BlobCipherKeyCache::getInstance();
TraceEvent(SevInfo, "BlobCipherKeyCache.Cleanup").log();
for (auto& domainItr : instance->domainCacheMap) {
Reference<BlobCipherKeyIdCache> keyIdCache = domainItr.second;
keyIdCache->cleanup();
TraceEvent("BlobCipherKeyCache_Cleanup").detail("DomainId", domainItr.first);
TraceEvent(SevInfo, "BlobCipher.KeyCacheCleanup").detail("DomainId", domainItr.first);
}
instance->domainCacheMap.clear();
@ -423,7 +502,7 @@ Reference<EncryptBuf> EncryptBlobCipherAes265Ctr::encrypt(const uint8_t* plainte
uint8_t* ciphertext = encryptBuf->begin();
int bytes{ 0 };
if (EVP_EncryptUpdate(ctx, ciphertext, &bytes, plaintext, plaintextLen) != 1) {
TraceEvent("Encrypt_UpdateFailed")
TraceEvent(SevWarn, "BlobCipher.EncryptUpdateFailed")
.detail("BaseCipherId", textCipherKey->getBaseCipherId())
.detail("EncryptDomainId", textCipherKey->getDomainId());
throw encrypt_ops_error();
@ -431,14 +510,14 @@ Reference<EncryptBuf> EncryptBlobCipherAes265Ctr::encrypt(const uint8_t* plainte
int finalBytes{ 0 };
if (EVP_EncryptFinal_ex(ctx, ciphertext + bytes, &finalBytes) != 1) {
TraceEvent("Encrypt_FinalFailed")
TraceEvent(SevWarn, "BlobCipher.EncryptFinalFailed")
.detail("BaseCipherId", textCipherKey->getBaseCipherId())
.detail("EncryptDomainId", textCipherKey->getDomainId());
throw encrypt_ops_error();
}
if ((bytes + finalBytes) != plaintextLen) {
TraceEvent("Encrypt_UnexpectedCipherLen")
TraceEvent(SevWarn, "BlobCipher.EncryptUnexpectedCipherLen")
.detail("PlaintextLen", plaintextLen)
.detail("EncryptedBufLen", bytes + finalBytes);
throw encrypt_ops_error();
@ -508,20 +587,20 @@ Standalone<StringRef> EncryptBlobCipherAes265Ctr::encryptBlobGranuleChunk(const
int bytes{ 0 };
if (EVP_EncryptUpdate(ctx, ciphertext, &bytes, plaintext, plaintextLen) != 1) {
TraceEvent("Encrypt_UpdateFailed")
TraceEvent(SevWarn, "BlobCipher.EncryptUpdateFailed")
.detail("BaseCipherId", textCipherKey->getBaseCipherId())
.detail("EncryptDomainId", textCipherKey->getDomainId());
throw encrypt_ops_error();
}
int finalBytes{ 0 };
if (EVP_EncryptFinal_ex(ctx, ciphertext + bytes, &finalBytes) != 1) {
TraceEvent("Encrypt_FinalFailed")
TraceEvent(SevWarn, "BlobCipher.EncryptFinalFailed")
.detail("BaseCipherId", textCipherKey->getBaseCipherId())
.detail("EncryptDomainId", textCipherKey->getDomainId());
throw encrypt_ops_error();
}
if ((bytes + finalBytes) != plaintextLen) {
TraceEvent("Encrypt_UnexpectedCipherLen")
TraceEvent(SevWarn, "BlobCipher.EncryptUnexpectedCipherLen")
.detail("PlaintextLen", plaintextLen)
.detail("EncryptedBufLen", bytes + finalBytes);
throw encrypt_ops_error();
@ -573,7 +652,7 @@ void DecryptBlobCipherAes256Ctr::verifyHeaderAuthToken(const BlobCipherEncryptHe
AES_256_KEY_LENGTH,
arena);
if (memcmp(&header.multiAuthTokens.headerAuthToken[0], computedHeaderAuthToken.begin(), AUTH_TOKEN_SIZE) != 0) {
TraceEvent("VerifyEncryptBlobHeader_AuthTokenMismatch")
TraceEvent(SevWarn, "BlobCipher.VerifyEncryptBlobHeaderAuthTokenMismatch")
.detail("HeaderVersion", header.flags.headerVersion)
.detail("HeaderMode", header.flags.encryptMode)
.detail("MultiAuthHeaderAuthToken",
@ -603,7 +682,7 @@ void DecryptBlobCipherAes256Ctr::verifyHeaderSingleAuthToken(const uint8_t* ciph
StringRef computed = computeAuthToken(
buff, ciphertextLen + sizeof(BlobCipherEncryptHeader), headerCipherKey->rawCipher(), AES_256_KEY_LENGTH, arena);
if (memcmp(&header.singleAuthToken.authToken[0], computed.begin(), AUTH_TOKEN_SIZE) != 0) {
TraceEvent("VerifyEncryptBlobHeader_AuthTokenMismatch")
TraceEvent(SevWarn, "BlobCipher.VerifyEncryptBlobHeaderAuthTokenMismatch")
.detail("HeaderVersion", header.flags.headerVersion)
.detail("HeaderMode", header.flags.encryptMode)
.detail("SingleAuthToken",
@ -629,7 +708,7 @@ void DecryptBlobCipherAes256Ctr::verifyHeaderMultiAuthToken(const uint8_t* ciphe
arena);
if (memcmp(&header.multiAuthTokens.cipherTextAuthToken[0], computedCipherTextAuthToken.begin(), AUTH_TOKEN_SIZE) !=
0) {
TraceEvent("VerifyEncryptBlobHeader_AuthTokenMismatch")
TraceEvent(SevWarn, "BlobCipher.VerifyEncryptBlobHeaderAuthTokenMismatch")
.detail("HeaderVersion", header.flags.headerVersion)
.detail("HeaderMode", header.flags.encryptMode)
.detail("MultiAuthCipherTextAuthToken",
@ -659,7 +738,7 @@ void DecryptBlobCipherAes256Ctr::verifyEncryptHeaderMetadata(const BlobCipherEnc
if (header.flags.headerVersion != EncryptBlobCipherAes265Ctr::ENCRYPT_HEADER_VERSION ||
header.flags.encryptMode != ENCRYPT_CIPHER_MODE_AES_256_CTR ||
!isEncryptHeaderAuthTokenModeValid((EncryptAuthTokenMode)header.flags.authTokenMode)) {
TraceEvent("VerifyEncryptBlobHeader")
TraceEvent(SevWarn, "BlobCipher.VerifyEncryptBlobHeader")
.detail("HeaderVersion", header.flags.headerVersion)
.detail("ExpectedVersion", EncryptBlobCipherAes265Ctr::ENCRYPT_HEADER_VERSION)
.detail("EncryptCipherMode", header.flags.encryptMode)
@ -678,7 +757,8 @@ Reference<EncryptBuf> DecryptBlobCipherAes256Ctr::decrypt(const uint8_t* ciphert
verifyEncryptHeaderMetadata(header);
if (header.flags.authTokenMode != ENCRYPT_HEADER_AUTH_TOKEN_MODE_NONE && !headerCipherKey.isValid()) {
TraceEvent("Decrypt_InvalidHeaderCipherKey").detail("AuthTokenMode", header.flags.authTokenMode);
TraceEvent(SevWarn, "BlobCipher.DecryptInvalidHeaderCipherKey")
.detail("AuthTokenMode", header.flags.authTokenMode);
throw encrypt_ops_error();
}
@ -695,7 +775,7 @@ Reference<EncryptBuf> DecryptBlobCipherAes256Ctr::decrypt(const uint8_t* ciphert
uint8_t* plaintext = decrypted->begin();
int bytesDecrypted{ 0 };
if (!EVP_DecryptUpdate(ctx, plaintext, &bytesDecrypted, ciphertext, ciphertextLen)) {
TraceEvent("Decrypt_UpdateFailed")
TraceEvent(SevWarn, "BlobCipher.DecryptUpdateFailed")
.detail("BaseCipherId", header.cipherTextDetails.baseCipherId)
.detail("EncryptDomainId", header.cipherTextDetails.encryptDomainId);
throw encrypt_ops_error();
@ -703,14 +783,14 @@ Reference<EncryptBuf> DecryptBlobCipherAes256Ctr::decrypt(const uint8_t* ciphert
int finalBlobBytes{ 0 };
if (EVP_DecryptFinal_ex(ctx, plaintext + bytesDecrypted, &finalBlobBytes) <= 0) {
TraceEvent("Decrypt_FinalFailed")
TraceEvent(SevWarn, "BlobCipher.DecryptFinalFailed")
.detail("BaseCipherId", header.cipherTextDetails.baseCipherId)
.detail("EncryptDomainId", header.cipherTextDetails.encryptDomainId);
throw encrypt_ops_error();
}
if ((bytesDecrypted + finalBlobBytes) != ciphertextLen) {
TraceEvent("Encrypt_UnexpectedPlaintextLen")
TraceEvent(SevWarn, "BlobCipher.EncryptUnexpectedPlaintextLen")
.detail("CiphertextLen", ciphertextLen)
.detail("DecryptedBufLen", bytesDecrypted + finalBlobBytes);
throw encrypt_ops_error();
@ -760,6 +840,7 @@ StringRef computeAuthToken(const uint8_t* payload,
const uint8_t* key,
const int keyLen,
Arena& arena) {
CODE_PROBE(true, "Auth token generation");
HmacSha256DigestGen hmacGenerator(key, keyLen);
StringRef digest = hmacGenerator.digest(payload, payloadLen, arena);
@ -782,7 +863,7 @@ void forceLinkBlobCipherTests() {}
// 6.1 cleanup cipherKeys by given encryptDomainId
// 6.2. Cleanup all cached cipherKeys
TEST_CASE("flow/BlobCipher") {
TraceEvent("BlobCipherTest_Start").log();
TraceEvent("BlobCipherTest.Start").log();
// Construct a dummy External Key Manager representation and populate with some keys
class BaseCipher : public ReferenceCounted<BaseCipher>, NonCopyable {
@ -791,11 +872,16 @@ TEST_CASE("flow/BlobCipher") {
int len;
EncryptCipherBaseKeyId keyId;
std::unique_ptr<uint8_t[]> key;
int64_t refreshAt;
int64_t expireAt;
EncryptCipherRandomSalt generatedSalt;
BaseCipher(const EncryptCipherDomainId& dId, const EncryptCipherBaseKeyId& kId)
BaseCipher(const EncryptCipherDomainId& dId,
const EncryptCipherBaseKeyId& kId,
const int64_t rAt,
const int64_t eAt)
: domainId(dId), len(deterministicRandom()->randomInt(AES_256_KEY_LENGTH / 2, AES_256_KEY_LENGTH + 1)),
keyId(kId), key(std::make_unique<uint8_t[]>(len)) {
keyId(kId), key(std::make_unique<uint8_t[]>(len)), refreshAt(rAt), expireAt(eAt) {
deterministicRandom()->randomBytes(key.get(), len);
}
};
@ -810,7 +896,10 @@ TEST_CASE("flow/BlobCipher") {
deterministicRandom()->randomInt(minBaseCipherKeyId, minBaseCipherKeyId + 50) + 15;
for (int dId = minDomainId; dId <= maxDomainId; dId++) {
for (int kId = minBaseCipherKeyId; kId <= maxBaseCipherKeyId; kId++) {
domainKeyMap[dId].emplace(kId, makeReference<BaseCipher>(dId, kId));
domainKeyMap[dId].emplace(
kId,
makeReference<BaseCipher>(
dId, kId, std::numeric_limits<int64_t>::max(), std::numeric_limits<int64_t>::max()));
}
}
ASSERT_EQ(domainKeyMap.size(), maxDomainId);
@ -818,7 +907,7 @@ TEST_CASE("flow/BlobCipher") {
Reference<BlobCipherKeyCache> cipherKeyCache = BlobCipherKeyCache::getInstance();
// validate getLatestCipherKey return empty when there's no cipher key
TraceEvent("BlobCipherTest_LatestKeyNotExists").log();
TraceEvent("BlobCipherTest.LatestKeyNotExists").log();
Reference<BlobCipherKey> latestKeyNonexists =
cipherKeyCache->getLatestCipherKey(deterministicRandom()->randomInt(minDomainId, maxDomainId));
ASSERT(!latestKeyNonexists.isValid());
@ -835,18 +924,27 @@ TEST_CASE("flow/BlobCipher") {
for (auto& baseKeyItr : domainItr.second) {
Reference<BaseCipher> baseCipher = baseKeyItr.second;
cipherKeyCache->insertCipherKey(
baseCipher->domainId, baseCipher->keyId, baseCipher->key.get(), baseCipher->len);
cipherKeyCache->insertCipherKey(baseCipher->domainId,
baseCipher->keyId,
baseCipher->key.get(),
baseCipher->len,
baseCipher->refreshAt,
baseCipher->expireAt);
Reference<BlobCipherKey> fetchedKey = cipherKeyCache->getLatestCipherKey(baseCipher->domainId);
baseCipher->generatedSalt = fetchedKey->getSalt();
}
}
// insert EncryptHeader BlobCipher key
Reference<BaseCipher> headerBaseCipher = makeReference<BaseCipher>(ENCRYPT_HEADER_DOMAIN_ID, 1);
cipherKeyCache->insertCipherKey(
headerBaseCipher->domainId, headerBaseCipher->keyId, headerBaseCipher->key.get(), headerBaseCipher->len);
Reference<BaseCipher> headerBaseCipher = makeReference<BaseCipher>(
ENCRYPT_HEADER_DOMAIN_ID, 1, std::numeric_limits<int64_t>::max(), std::numeric_limits<int64_t>::max());
cipherKeyCache->insertCipherKey(headerBaseCipher->domainId,
headerBaseCipher->keyId,
headerBaseCipher->key.get(),
headerBaseCipher->len,
headerBaseCipher->refreshAt,
headerBaseCipher->expireAt);
TraceEvent("BlobCipherTest_InsertKeysDone").log();
TraceEvent("BlobCipherTest.InsertKeysDone").log();
// validate the cipherKey lookups work as desired
for (auto& domainItr : domainKeyMap) {
@ -865,17 +963,21 @@ TEST_CASE("flow/BlobCipher") {
ASSERT_NE(std::memcmp(cipherKey->rawCipher(), baseCipher->key.get(), cipherKey->getBaseCipherLen()), 0);
}
}
TraceEvent("BlobCipherTest_LooksupDone").log();
TraceEvent("BlobCipherTest.LooksupDone").log();
// Ensure attemtping to insert existing cipherKey (identical) more than once is treated as a NOP
try {
Reference<BaseCipher> baseCipher = domainKeyMap[minDomainId][minBaseCipherKeyId];
cipherKeyCache->insertCipherKey(
baseCipher->domainId, baseCipher->keyId, baseCipher->key.get(), baseCipher->len);
cipherKeyCache->insertCipherKey(baseCipher->domainId,
baseCipher->keyId,
baseCipher->key.get(),
baseCipher->len,
std::numeric_limits<int64_t>::max(),
std::numeric_limits<int64_t>::max());
} catch (Error& e) {
throw;
}
TraceEvent("BlobCipherTest_ReinsertIdempotentKeyDone").log();
TraceEvent("BlobCipherTest.ReinsertIdempotentKeyDone").log();
// Ensure attemtping to insert an existing cipherKey (modified) fails with appropriate error
try {
@ -886,13 +988,18 @@ TEST_CASE("flow/BlobCipher") {
for (int i = 2; i < 5; i++) {
rawCipher[i]++;
}
cipherKeyCache->insertCipherKey(baseCipher->domainId, baseCipher->keyId, &rawCipher[0], baseCipher->len);
cipherKeyCache->insertCipherKey(baseCipher->domainId,
baseCipher->keyId,
&rawCipher[0],
baseCipher->len,
std::numeric_limits<int64_t>::max(),
std::numeric_limits<int64_t>::max());
} catch (Error& e) {
if (e.code() != error_code_encrypt_update_cipher) {
throw;
}
}
TraceEvent("BlobCipherTest_ReinsertNonIdempotentKeyDone").log();
TraceEvent("BlobCipherTest.ReinsertNonIdempotentKeyDone").log();
// Validate Encryption ops
Reference<BlobCipherKey> cipherKey = cipherKeyCache->getLatestCipherKey(minDomainId);
@ -908,7 +1015,7 @@ TEST_CASE("flow/BlobCipher") {
BlobCipherEncryptHeader headerCopy;
// validate basic encrypt followed by decrypt operation for AUTH_MODE_NONE
{
TraceEvent("NoneAuthMode_Start").log();
TraceEvent("NoneAuthMode.Start").log();
EncryptBlobCipherAes265Ctr encryptor(
cipherKey, Reference<BlobCipherKey>(), iv, AES_256_IV_LENGTH, ENCRYPT_HEADER_AUTH_TOKEN_MODE_NONE);
@ -921,7 +1028,7 @@ TEST_CASE("flow/BlobCipher") {
ASSERT_EQ(header.flags.encryptMode, ENCRYPT_CIPHER_MODE_AES_256_CTR);
ASSERT_EQ(header.flags.authTokenMode, ENCRYPT_HEADER_AUTH_TOKEN_MODE_NONE);
TraceEvent("BlobCipherTest_EncryptDone")
TraceEvent("BlobCipherTest.EncryptDone")
.detail("HeaderVersion", header.flags.headerVersion)
.detail("HeaderEncryptMode", header.flags.encryptMode)
.detail("DomainId", header.cipherTextDetails.encryptDomainId)
@ -937,7 +1044,7 @@ TEST_CASE("flow/BlobCipher") {
ASSERT_EQ(decrypted->getLogicalSize(), bufLen);
ASSERT_EQ(memcmp(decrypted->begin(), &orgData[0], bufLen), 0);
TraceEvent("BlobCipherTest_DecryptDone").log();
TraceEvent("BlobCipherTest.DecryptDone").log();
// induce encryption header corruption - headerVersion corrupted
memcpy(reinterpret_cast<uint8_t*>(&headerCopy),
@ -985,12 +1092,12 @@ TEST_CASE("flow/BlobCipher") {
ASSERT(false);
}
TraceEvent("NoneAuthMode_Done").log();
TraceEvent("NoneAuthMode.Done").log();
}
// validate basic encrypt followed by decrypt operation for AUTH_TOKEN_MODE_SINGLE
{
TraceEvent("SingleAuthMode_Start").log();
TraceEvent("SingleAuthMode.Start").log();
EncryptBlobCipherAes265Ctr encryptor(
cipherKey, headerCipherKey, iv, AES_256_IV_LENGTH, ENCRYPT_HEADER_AUTH_TOKEN_MODE_SINGLE);
@ -1003,7 +1110,7 @@ TEST_CASE("flow/BlobCipher") {
ASSERT_EQ(header.flags.encryptMode, ENCRYPT_CIPHER_MODE_AES_256_CTR);
ASSERT_EQ(header.flags.authTokenMode, ENCRYPT_HEADER_AUTH_TOKEN_MODE_SINGLE);
TraceEvent("BlobCipherTest_EncryptDone")
TraceEvent("BlobCipherTest.EncryptDone")
.detail("HeaderVersion", header.flags.headerVersion)
.detail("HeaderEncryptMode", header.flags.encryptMode)
.detail("DomainId", header.cipherTextDetails.encryptDomainId)
@ -1024,7 +1131,7 @@ TEST_CASE("flow/BlobCipher") {
ASSERT_EQ(decrypted->getLogicalSize(), bufLen);
ASSERT_EQ(memcmp(decrypted->begin(), &orgData[0], bufLen), 0);
TraceEvent("BlobCipherTest_DecryptDone").log();
TraceEvent("BlobCipherTest.DecryptDone").log();
// induce encryption header corruption - headerVersion corrupted
encrypted = encryptor.encrypt(&orgData[0], bufLen, &header, arena);
@ -1090,12 +1197,12 @@ TEST_CASE("flow/BlobCipher") {
}
}
TraceEvent("SingleAuthMode_Done").log();
TraceEvent("SingleAuthMode.Done").log();
}
// validate basic encrypt followed by decrypt operation for AUTH_TOKEN_MODE_MULTI
{
TraceEvent("MultiAuthMode_Start").log();
TraceEvent("MultiAuthMode.Start").log();
EncryptBlobCipherAes265Ctr encryptor(
cipherKey, headerCipherKey, iv, AES_256_IV_LENGTH, ENCRYPT_HEADER_AUTH_TOKEN_MODE_MULTI);
@ -1108,7 +1215,7 @@ TEST_CASE("flow/BlobCipher") {
ASSERT_EQ(header.flags.encryptMode, ENCRYPT_CIPHER_MODE_AES_256_CTR);
ASSERT_EQ(header.flags.authTokenMode, ENCRYPT_HEADER_AUTH_TOKEN_MODE_MULTI);
TraceEvent("BlobCipherTest_EncryptDone")
TraceEvent("BlobCipherTest.EncryptDone")
.detail("HeaderVersion", header.flags.headerVersion)
.detail("HeaderEncryptMode", header.flags.encryptMode)
.detail("DomainId", header.cipherTextDetails.encryptDomainId)
@ -1130,7 +1237,7 @@ TEST_CASE("flow/BlobCipher") {
ASSERT_EQ(decrypted->getLogicalSize(), bufLen);
ASSERT_EQ(memcmp(decrypted->begin(), &orgData[0], bufLen), 0);
TraceEvent("BlobCipherTest_DecryptDone").log();
TraceEvent("BlobCipherTest.DecryptDone").log();
// induce encryption header corruption - headerVersion corrupted
encrypted = encryptor.encrypt(&orgData[0], bufLen, &header, arena);
@ -1212,7 +1319,7 @@ TEST_CASE("flow/BlobCipher") {
}
}
TraceEvent("MultiAuthMode_Done").log();
TraceEvent("MultiAuthMode.Done").log();
}
// Validate dropping encryptDomainId cached keys
@ -1228,6 +1335,6 @@ TEST_CASE("flow/BlobCipher") {
ASSERT(cachedKeys.empty());
}
TraceEvent("BlobCipherTest_Done").log();
TraceEvent("BlobCipherTest.Done").log();
return Void();
}

View File

@ -283,8 +283,8 @@ void FlowKnobs::initialize(Randomize randomize, IsSimulated isSimulated) {
init( HEALTH_MONITOR_CONNECTION_MAX_CLOSED, 5 );
// Encryption
init( ENCRYPT_CIPHER_KEY_CACHE_TTL, isSimulated ? 120 : 10 * 60 );
if ( randomize && BUGGIFY) { ENCRYPT_CIPHER_KEY_CACHE_TTL = deterministicRandom()->randomInt(50, 100); }
init( ENCRYPT_CIPHER_KEY_CACHE_TTL, isSimulated ? 5 * 60 : 10 * 60 );
if ( randomize && BUGGIFY) { ENCRYPT_CIPHER_KEY_CACHE_TTL = deterministicRandom()->randomInt(2, 10) * 60; }
init( ENCRYPT_KEY_REFRESH_INTERVAL, isSimulated ? 60 : 8 * 60 );
if ( randomize && BUGGIFY) { ENCRYPT_KEY_REFRESH_INTERVAL = deterministicRandom()->randomInt(2, 10); }
init( TOKEN_CACHE_SIZE, 100 );

View File

@ -2128,7 +2128,7 @@ struct TestGVR {
};
template <class F>
void startThreadF(F&& func) {
THREAD_HANDLE startThreadF(F&& func) {
struct Thing {
F f;
Thing(F&& f) : f(std::move(f)) {}
@ -2140,7 +2140,7 @@ void startThreadF(F&& func) {
}
};
Thing* t = new Thing(std::move(func));
g_network->startThread(Thing::start, t);
return g_network->startThread(Thing::start, t);
}
TEST_CASE("/flow/Net2/ThreadSafeQueue/Interface") {
@ -2168,6 +2168,7 @@ TEST_CASE("/flow/Net2/ThreadSafeQueue/Interface") {
struct QueueTestThreadState {
QueueTestThreadState(int threadId, int toProduce) : threadId(threadId), toProduce(toProduce) {}
int threadId;
THREAD_HANDLE handle;
int toProduce;
int produced = 0;
Promise<Void> doneProducing;
@ -2186,6 +2187,8 @@ struct QueueTestThreadState {
TEST_CASE("/flow/Net2/ThreadSafeQueue/Threaded") {
// Uses ThreadSafeQueue from multiple threads. Verifies that all pushed elements are popped, maintaining the
// ordering within a thread.
noUnseed = true; // multi-threading inherently non-deterministic
ThreadSafeQueue<int> queue;
state std::vector<QueueTestThreadState> perThread = { QueueTestThreadState(0, 1000000),
QueueTestThreadState(1, 100000),
@ -2197,7 +2200,7 @@ TEST_CASE("/flow/Net2/ThreadSafeQueue/Threaded") {
auto& s = perThread[t];
doneProducing.push_back(s.doneProducing.getFuture());
total += s.toProduce;
startThreadF([&queue, &s]() {
s.handle = startThreadF([&queue, &s]() {
printf("Thread%d\n", s.threadId);
int nextYield = 0;
while (s.produced < s.toProduce) {
@ -2228,7 +2231,14 @@ TEST_CASE("/flow/Net2/ThreadSafeQueue/Threaded") {
wait(waitForAll(doneProducing));
for (int t = 0; t < std::size(perThread); ++t) {
// Make sure we continue on the main thread.
Promise<Void> signal;
state Future<Void> doneConsuming = signal.getFuture();
g_network->onMainThread(std::move(signal), TaskPriority::DefaultOnMainThread);
wait(doneConsuming);
for (int t = 0; t < perThread.size(); ++t) {
waitThread(perThread[t].handle);
perThread[t].checkDone();
}
return Void();
@ -2238,6 +2248,7 @@ TEST_CASE("/flow/Net2/ThreadSafeQueue/Threaded") {
// satisfy this requirement yet.
TEST_CASE("noSim/flow/Net2/onMainThreadFIFO") {
// Verifies that signals processed by onMainThread() are executed in order.
noUnseed = true; // multi-threading inherently non-deterministic
state std::vector<QueueTestThreadState> perThread = { QueueTestThreadState(0, 1000000),
QueueTestThreadState(1, 100000),
@ -2246,7 +2257,7 @@ TEST_CASE("noSim/flow/Net2/onMainThreadFIFO") {
for (int t = 0; t < perThread.size(); ++t) {
auto& s = perThread[t];
doneProducing.push_back(s.doneProducing.getFuture());
startThreadF([&s]() {
s.handle = startThreadF([&s]() {
int nextYield = 0;
while (s.produced < s.toProduce) {
if (nextYield-- == 0) {
@ -2267,7 +2278,8 @@ TEST_CASE("noSim/flow/Net2/onMainThreadFIFO") {
g_network->onMainThread(std::move(signal), TaskPriority::DefaultOnMainThread);
wait(doneConsuming);
for (int t = 0; t < std::size(perThread); ++t) {
for (int t = 0; t < perThread.size(); ++t) {
waitThread(perThread[t].handle);
perThread[t].checkDone();
}
return Void();

View File

@ -19,8 +19,6 @@
*/
#ifndef FLOW_BLOB_CIPHER_H
#define FLOW_BLOB_CIPHER_H
#include "flow/ProtocolVersion.h"
#include "flow/serialize.h"
#pragma once
#include "flow/Arena.h"
@ -28,10 +26,14 @@
#include "flow/FastRef.h"
#include "flow/flow.h"
#include "flow/genericactors.actor.h"
#include "flow/Knobs.h"
#include "flow/network.h"
#include "flow/ProtocolVersion.h"
#include "flow/serialize.h"
#include <boost/functional/hash.hpp>
#include <cinttypes>
#include <limits>
#include <memory>
#include <openssl/aes.h>
#include <openssl/engine.h>
@ -216,15 +218,20 @@ public:
BlobCipherKey(const EncryptCipherDomainId& domainId,
const EncryptCipherBaseKeyId& baseCiphId,
const uint8_t* baseCiph,
int baseCiphLen);
int baseCiphLen,
const int64_t refreshAt,
int64_t expireAt);
BlobCipherKey(const EncryptCipherDomainId& domainId,
const EncryptCipherBaseKeyId& baseCiphId,
const uint8_t* baseCiph,
int baseCiphLen,
const EncryptCipherRandomSalt& salt);
const EncryptCipherRandomSalt& salt,
const int64_t refreshAt,
const int64_t expireAt);
uint8_t* data() const { return cipher.get(); }
uint64_t getCreationTime() const { return creationTime; }
uint64_t getRefreshAtTS() const { return refreshAtTS; }
uint64_t getExpireAtTS() const { return expireAtTS; }
EncryptCipherDomainId getDomainId() const { return encryptDomainId; }
EncryptCipherRandomSalt getSalt() const { return randomSalt; }
EncryptCipherBaseKeyId getBaseCipherId() const { return baseCipherId; }
@ -243,6 +250,20 @@ public:
randomSalt == details.salt;
}
inline bool needsRefresh() {
if (refreshAtTS == std::numeric_limits<int64_t>::max()) {
return false;
}
return now() >= refreshAtTS ? true : false;
}
inline bool isExpired() {
if (expireAtTS == std::numeric_limits<int64_t>::max()) {
return false;
}
return now() >= expireAtTS ? true : false;
}
void reset();
private:
@ -254,16 +275,20 @@ private:
EncryptCipherBaseKeyId baseCipherId;
// Random salt used for encryption cipher key derivation
EncryptCipherRandomSalt randomSalt;
// Creation timestamp for the derived encryption cipher key
uint64_t creationTime;
// Derived encryption cipher key
std::unique_ptr<uint8_t[]> cipher;
// CipherKey needs refreshAt
int64_t refreshAtTS;
// CipherKey is valid until
int64_t expireAtTS;
void initKey(const EncryptCipherDomainId& domainId,
const uint8_t* baseCiph,
int baseCiphLen,
const EncryptCipherBaseKeyId& baseCiphId,
const EncryptCipherRandomSalt& salt);
const EncryptCipherRandomSalt& salt,
const int64_t refreshAt,
const int64_t expireAt);
void applyHmacSha256Derivation();
};
@ -326,7 +351,9 @@ public:
Reference<BlobCipherKey> insertBaseCipherKey(const EncryptCipherBaseKeyId& baseCipherId,
const uint8_t* baseCipher,
int baseCipherLen);
int baseCipherLen,
const int64_t refreshAt,
const int64_t expireAt);
// API enables inserting base encryption cipher details to the BlobCipherKeyIdCache
// Given cipherKeys are immutable, attempting to re-insert same 'identical' cipherKey
@ -341,7 +368,9 @@ public:
Reference<BlobCipherKey> insertBaseCipherKey(const EncryptCipherBaseKeyId& baseCipherId,
const uint8_t* baseCipher,
int baseCipherLen,
const EncryptCipherRandomSalt& salt);
const EncryptCipherRandomSalt& salt,
const int64_t refreshAt,
const int64_t expireAt);
// API cleanup the cache by dropping all cached cipherKeys
void cleanup();
@ -377,7 +406,9 @@ public:
Reference<BlobCipherKey> insertCipherKey(const EncryptCipherDomainId& domainId,
const EncryptCipherBaseKeyId& baseCipherId,
const uint8_t* baseCipher,
int baseCipherLen);
int baseCipherLen,
const int64_t refreshAt,
const int64_t expireAt);
// Enable clients to insert base encryption cipher details to the BlobCipherKeyCache.
// The cipherKeys are indexed using 'baseCipherId', given cipherKeys are immutable,
@ -394,7 +425,9 @@ public:
const EncryptCipherBaseKeyId& baseCipherId,
const uint8_t* baseCipher,
int baseCipherLen,
const EncryptCipherRandomSalt& salt);
const EncryptCipherRandomSalt& salt,
const int64_t refreshAt,
const int64_t expireAt);
// API returns the last insert cipherKey for a given encryption domain Id.
// If domain Id is invalid, it would throw 'encrypt_invalid_id' exception,

View File

@ -1,3 +1,6 @@
[[knobs]]
enable_encryption = false
[[test]]
testTitle = 'EncryptDecrypt'