Merge pull request #3394 from dongxinEric/feature/range-split-points-based-on-size

Feature/range split points based on size
This commit is contained in:
Xin Dong 2020-10-08 14:25:11 -07:00 committed by GitHub
commit 5d902f9177
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
37 changed files with 806 additions and 12 deletions

View File

@ -157,7 +157,7 @@ class ApiTest(Test):
read_conflicts = ['READ_CONFLICT_RANGE', 'READ_CONFLICT_KEY']
write_conflicts = ['WRITE_CONFLICT_RANGE', 'WRITE_CONFLICT_KEY', 'DISABLE_WRITE_CONFLICT']
txn_sizes = ['GET_APPROXIMATE_SIZE']
storage_metrics = ['GET_ESTIMATED_RANGE_SIZE']
storage_metrics = ['GET_ESTIMATED_RANGE_SIZE', 'GET_RANGE_SPLIT_POINTS']
op_choices += reads
op_choices += mutations
@ -553,6 +553,23 @@ class ApiTest(Test):
instructions.push_args(key1, key2)
instructions.append(op)
self.add_strings(1)
elif op == 'GET_RANGE_SPLIT_POINTS':
# Protect against inverted range and identical keys
key1 = self.workspace.pack(self.random.random_tuple(1))
key2 = self.workspace.pack(self.random.random_tuple(1))
while key1 == key2:
key1 = self.workspace.pack(self.random.random_tuple(1))
key2 = self.workspace.pack(self.random.random_tuple(1))
if key1 > key2:
key1, key2 = key2, key1
# TODO: randomize chunkSize but should not exceed 100M(shard limit)
chunkSize = 10000000 # 10M
instructions.push_args(key1, key2, chunkSize)
instructions.append(op)
self.add_strings(1)
else:
assert False, 'Unknown operation: ' + op

View File

@ -281,6 +281,17 @@ fdb_error_t fdb_future_get_string_array(
);
}
extern "C" DLLEXPORT
fdb_error_t fdb_future_get_key_array(
FDBFuture* f, FDBKey const** out_key_array, int* out_count)
{
CATCH_AND_RETURN(
Standalone<VectorRef<KeyRef>> na = TSAV(Standalone<VectorRef<KeyRef>>, f)->get();
*out_key_array = (FDBKey*) na.begin();
*out_count = na.size();
);
}
FDBFuture* fdb_create_cluster_v609( const char* cluster_file_path ) {
char *path;
if(cluster_file_path) {
@ -626,13 +637,20 @@ fdb_error_t fdb_transaction_add_conflict_range( FDBTransaction*tr, uint8_t const
}
extern "C" DLLEXPORT
extern "C" DLLEXPORT
FDBFuture* fdb_transaction_get_estimated_range_size_bytes( FDBTransaction* tr, uint8_t const* begin_key_name,
int begin_key_name_length, uint8_t const* end_key_name, int end_key_name_length ) {
KeyRangeRef range(KeyRef(begin_key_name, begin_key_name_length), KeyRef(end_key_name, end_key_name_length));
return (FDBFuture*)(TXN(tr)->getEstimatedRangeSizeBytes(range).extractPtr());
}
extern "C" DLLEXPORT
FDBFuture* fdb_transaction_get_range_split_points( FDBTransaction* tr, uint8_t const* begin_key_name,
int begin_key_name_length, uint8_t const* end_key_name, int end_key_name_length, int64_t chunkSize) {
KeyRangeRef range(KeyRef(begin_key_name, begin_key_name_length), KeyRef(end_key_name, end_key_name_length));
return (FDBFuture*)(TXN(tr)->getRangeSplitPoints(range, chunkSize).extractPtr());
}
#include "fdb_c_function_pointers.g.h"
#define FDB_API_CHANGED(func, ver) if (header_version < ver) fdb_api_ptr_##func = (void*)&(func##_v##ver##_PREV); else if (fdb_api_ptr_##func == (void*)&fdb_api_ptr_unimpl) fdb_api_ptr_##func = (void*)&(func##_impl);

View File

@ -91,6 +91,10 @@ extern "C" {
DLLEXPORT WARN_UNUSED_RESULT fdb_error_t fdb_add_network_thread_completion_hook(void (*hook)(void*), void *hook_parameter);
#pragma pack(push, 4)
typedef struct key {
const uint8_t* key;
int key_length;
} FDBKey;
#if FDB_API_VERSION >= 700
typedef struct keyvalue {
const uint8_t* key;
@ -146,6 +150,9 @@ extern "C" {
fdb_future_get_keyvalue_array( FDBFuture* f, FDBKeyValue const** out_kv,
int* out_count, fdb_bool_t* out_more );
#endif
DLLEXPORT WARN_UNUSED_RESULT fdb_error_t
fdb_future_get_key_array( FDBFuture* f, FDBKey const** out_key_array,
int* out_count);
DLLEXPORT WARN_UNUSED_RESULT fdb_error_t fdb_future_get_string_array(FDBFuture* f,
const char*** out_strings, int* out_count);
@ -260,6 +267,10 @@ extern "C" {
fdb_transaction_get_estimated_range_size_bytes( FDBTransaction* tr, uint8_t const* begin_key_name,
int begin_key_name_length, uint8_t const* end_key_name, int end_key_name_length);
DLLEXPORT WARN_UNUSED_RESULT FDBFuture*
fdb_transaction_get_range_split_points( FDBTransaction* tr, uint8_t const* begin_key_name,
int begin_key_name_length, uint8_t const* end_key_name, int end_key_name_length, int64_t chunkSize);
#define FDB_KEYSEL_LAST_LESS_THAN(k, l) k, l, 0, 0
#define FDB_KEYSEL_LAST_LESS_OR_EQUAL(k, l) k, l, 1, 0
#define FDB_KEYSEL_FIRST_GREATER_THAN(k, l) k, l, 1, 1

View File

@ -134,6 +134,7 @@ namespace FDB {
FDBStreamingMode streamingMode = FDB_STREAMING_MODE_SERIAL) override;
Future<int64_t> getEstimatedRangeSizeBytes(const KeyRange& keys) override;
Future<FDBStandalone<VectorRef<KeyRef>>> getRangeSplitPoints(const KeyRange& range, int64_t chunkSize) override;
void addReadConflictRange(KeyRangeRef const& keys) override;
void addReadConflictKey(KeyRef const& key) override;
@ -356,6 +357,16 @@ namespace FDB {
});
}
Future<FDBStandalone<VectorRef<KeyRef>>> TransactionImpl::getRangeSplitPoints(const KeyRange& range, int64_t chunkSize) {
return backToFuture<FDBStandalone<VectorRef<KeyRef>>>(fdb_transaction_get_range_split_points(tr, range.begin.begin(), range.begin.size(), range.end.begin(), range.end.size(), chunkSize), [](Reference<CFuture> f) {
FDBKey const* ks;
int count;
throw_on_error(fdb_future_get_key_array(f->f, &ks, &count));
return FDBStandalone<VectorRef<KeyRef>>(f, VectorRef<KeyRef>((KeyRef*)ks, count));
});
}
void TransactionImpl::addReadConflictRange(KeyRangeRef const& keys) {
throw_on_error( fdb_transaction_add_conflict_range( tr, keys.begin.begin(), keys.begin.size(), keys.end.begin(), keys.end.size(), FDB_CONFLICT_RANGE_TYPE_READ ) );
}

View File

@ -90,6 +90,7 @@ namespace FDB {
}
virtual Future<int64_t> getEstimatedRangeSizeBytes(const KeyRange& keys) = 0;
virtual Future<FDBStandalone<VectorRef<KeyRef>>> getRangeSplitPoints(const KeyRange& range, int64_t chunkSize) = 0;
virtual void addReadConflictRange(KeyRangeRef const& keys) = 0;
virtual void addReadConflictKey(KeyRef const& key) = 0;

View File

@ -661,6 +661,33 @@ struct GetEstimatedRangeSize : InstructionFunc {
const char* GetEstimatedRangeSize::name = "GET_ESTIMATED_RANGE_SIZE";
REGISTER_INSTRUCTION_FUNC(GetEstimatedRangeSize);
struct GetRangeSplitPoints : InstructionFunc {
static const char* name;
ACTOR static Future<Void> call(Reference<FlowTesterData> data, Reference<InstructionData> instruction) {
state std::vector<StackItem> items = data->stack.pop(3);
if (items.size() != 3)
return Void();
Standalone<StringRef> s1 = wait(items[0].value);
state Standalone<StringRef> beginKey = Tuple::unpack(s1).getString(0);
Standalone<StringRef> s2 = wait(items[1].value);
state Standalone<StringRef> endKey = Tuple::unpack(s2).getString(0);
Standalone<StringRef> s3 = wait(items[2].value);
state int64_t chunkSize = Tuple::unpack(s3).getInt(0);
Future<FDBStandalone<VectorRef<KeyRef>>> fsplitPoints = instruction->tr->getRangeSplitPoints(KeyRangeRef(beginKey, endKey), chunkSize);
FDBStandalone<VectorRef<KeyRef>> splitPoints = wait(fsplitPoints);
data->stack.pushTuple(LiteralStringRef("GOT_RANGE_SPLIT_POINTS"));
return Void();
}
};
const char* GetRangeSplitPoints::name = "GET_RANGE_SPLIT_POINTS";
REGISTER_INSTRUCTION_FUNC(GetRangeSplitPoints);
struct GetKeyFunc : InstructionFunc {
static const char* name;

View File

@ -579,6 +579,17 @@ func (sm *StackMachine) processInst(idx int, inst tuple.Tuple) {
if e != nil {
panic(e)
}
case op == "GET_RANGE_SPLIT_POINTS":
r := sm.popKeyRange()
chunkSize := sm.waitAndPop().item.(int64)
_, e := rt.ReadTransact(func(rtr fdb.ReadTransaction) (interface{}, error) {
_ = rtr.GetRangeSplitPoints(r, chunkSize).MustGet()
sm.store(idx, []byte("GOT_RANGE_SPLIT_POINTS"))
return nil, nil
})
if e != nil {
panic(e)
}
case op == "COMMIT":
sm.store(idx, sm.currentTransaction().Commit())
case op == "RESET":

View File

@ -306,6 +306,57 @@ func (f *futureKeyValueArray) Get() ([]KeyValue, bool, error) {
return ret, (more != 0), nil
}
// FutureKeyArray represents the asynchronous result of a function
// that returns an array of keys. FutureKeyArray is a lightweight object
// that may be efficiently copied, and is safe for concurrent use by multiple goroutines.
type FutureKeyArray interface {
// Get returns an array of keys or an error if the asynchronous operation
// associated with this future did not successfully complete. The current
// goroutine will be blocked until the future is ready.
Get() ([]Key, error)
// MustGet returns an array of keys, or panics if the asynchronous operations
// associated with this future did not successfully complete. The current goroutine
// will be blocked until the future is ready.
MustGet() []Key
}
type futureKeyArray struct {
*future
}
func (f *futureKeyArray) Get() ([]Key, error) {
defer runtime.KeepAlive(f.future)
f.BlockUntilReady()
var ks *C.FDBKey
var count C.int
if err := C.fdb_future_get_key_array(f.ptr, &ks, &count); err != 0 {
return nil, Error{int(err)}
}
ret := make([]Key, int(count))
for i := 0; i < int(count); i++ {
kptr := unsafe.Pointer(uintptr(unsafe.Pointer(ks)) + uintptr(i*12))
ret[i] = stringRefToSlice(kptr)
}
return ret, nil
}
func (f *futureKeyArray) MustGet() []Key {
val, err := f.Get()
if err != nil {
panic(err)
}
return val
}
// FutureInt64 represents the asynchronous result of a function that returns a
// database version. FutureInt64 is a lightweight object that may be efficiently
// copied, and is safe for concurrent use by multiple goroutines.

View File

@ -87,6 +87,8 @@ func (s Snapshot) GetDatabase() Database {
return s.transaction.db
}
// GetEstimatedRangeSizeBytes will get an estimate for the number of bytes
// stored in the given range.
func (s Snapshot) GetEstimatedRangeSizeBytes(r ExactRange) FutureInt64 {
beginKey, endKey := r.FDBRangeKeys()
return s.getEstimatedRangeSizeBytes(
@ -94,3 +96,14 @@ func (s Snapshot) GetEstimatedRangeSizeBytes(r ExactRange) FutureInt64 {
endKey.FDBKey(),
)
}
// GetRangeSplitPoints will return a list of keys that can divide the given range into
// chunks based on the chunk size provided.
func (s Snapshot) GetRangeSplitPoints(r ExactRange, chunkSize int64) FutureKeyArray {
beginKey, endKey := r.FDBRangeKeys()
return s.getRangeSplitPoints(
beginKey.FDBKey(),
endKey.FDBKey(),
chunkSize,
)
}

View File

@ -40,6 +40,7 @@ type ReadTransaction interface {
GetDatabase() Database
Snapshot() Snapshot
GetEstimatedRangeSizeBytes(r ExactRange) FutureInt64
GetRangeSplitPoints(r ExactRange, chunkSize int64) FutureKeyArray
ReadTransactor
}
@ -334,6 +335,30 @@ func (t Transaction) GetEstimatedRangeSizeBytes(r ExactRange) FutureInt64 {
)
}
func (t *transaction) getRangeSplitPoints(beginKey Key, endKey Key, chunkSize int64) FutureKeyArray {
return &futureKeyArray{
future: newFuture(C.fdb_transaction_get_range_split_points(
t.ptr,
byteSliceToPtr(beginKey),
C.int(len(beginKey)),
byteSliceToPtr(endKey),
C.int(len(endKey)),
C.int64_t(chunkSize),
)),
}
}
// GetRangeSplitPoints will return a list of keys that can divide the given range into
// chunks based on the chunk size provided.
func (t Transaction) GetRangeSplitPoints(r ExactRange, chunkSize int64) FutureKeyArray {
beginKey, endKey := r.FDBRangeKeys()
return t.getRangeSplitPoints(
beginKey.FDBKey(),
endKey.FDBKey(),
chunkSize,
)
}
func (t *transaction) getReadVersion() FutureInt64 {
return &futureInt64{
future: newFuture(C.fdb_transaction_get_read_version(t.ptr)),

View File

@ -32,6 +32,7 @@ set(JAVA_BINDING_SRCS
src/main/com/apple/foundationdb/FDBTransaction.java
src/main/com/apple/foundationdb/FutureInt64.java
src/main/com/apple/foundationdb/FutureKey.java
src/main/com/apple/foundationdb/FutureKeyArray.java
src/main/com/apple/foundationdb/FutureResult.java
src/main/com/apple/foundationdb/FutureResults.java
src/main/com/apple/foundationdb/FutureStrings.java
@ -47,6 +48,7 @@ set(JAVA_BINDING_SRCS
src/main/com/apple/foundationdb/package-info.java
src/main/com/apple/foundationdb/Range.java
src/main/com/apple/foundationdb/RangeQuery.java
src/main/com/apple/foundationdb/KeyArrayResult.java
src/main/com/apple/foundationdb/RangeResult.java
src/main/com/apple/foundationdb/RangeResultInfo.java
src/main/com/apple/foundationdb/RangeResultSummary.java

View File

@ -39,6 +39,8 @@ static thread_local bool is_external = false;
static jclass range_result_summary_class;
static jclass range_result_class;
static jclass string_class;
static jclass key_array_result_class;
static jmethodID key_array_result_init;
static jmethodID range_result_init;
static jmethodID range_result_summary_init;
@ -305,6 +307,77 @@ JNIEXPORT jobject JNICALL Java_com_apple_foundationdb_FutureStrings_FutureString
return arr;
}
JNIEXPORT jobject JNICALL Java_com_apple_foundationdb_FutureKeyArray_FutureKeyArray_1get(JNIEnv *jenv, jobject, jlong future) {
if( !future ) {
throwParamNotNull(jenv);
return JNI_NULL;
}
FDBFuture *f = (FDBFuture *)future;
const FDBKey *ks;
int count;
fdb_error_t err = fdb_future_get_key_array( f, &ks, &count );
if( err ) {
safeThrow( jenv, getThrowable( jenv, err ) );
return JNI_NULL;
}
int totalKeySize = 0;
for(int i = 0; i < count; i++) {
totalKeySize += ks[i].key_length;
}
jbyteArray keyArray = jenv->NewByteArray(totalKeySize);
if( !keyArray ) {
if( !jenv->ExceptionOccurred() )
throwOutOfMem(jenv);
return JNI_NULL;
}
uint8_t *keys_barr = (uint8_t *)jenv->GetByteArrayElements(keyArray, JNI_NULL);
if (!keys_barr) {
throwRuntimeEx( jenv, "Error getting handle to native resources" );
return JNI_NULL;
}
jintArray lengthArray = jenv->NewIntArray(count);
if( !lengthArray ) {
if( !jenv->ExceptionOccurred() )
throwOutOfMem(jenv);
jenv->ReleaseByteArrayElements(keyArray, (jbyte *)keys_barr, 0);
return JNI_NULL;
}
jint *length_barr = jenv->GetIntArrayElements(lengthArray, JNI_NULL);
if( !length_barr ) {
if( !jenv->ExceptionOccurred() )
throwOutOfMem(jenv);
jenv->ReleaseByteArrayElements(keyArray, (jbyte *)keys_barr, 0);
return JNI_NULL;
}
int offset = 0;
for(int i = 0; i < count; i++) {
memcpy(keys_barr + offset, ks[i].key, ks[i].key_length);
length_barr[i] = ks[i].key_length;
offset += ks[i].key_length;
}
jenv->ReleaseByteArrayElements(keyArray, (jbyte *)keys_barr, 0);
jenv->ReleaseIntArrayElements(lengthArray, length_barr, 0);
jobject result = jenv->NewObject(key_array_result_class, key_array_result_init, keyArray, lengthArray);
if( jenv->ExceptionOccurred() )
return JNI_NULL;
return result;
}
// SOMEDAY: explore doing this more efficiently with Direct ByteBuffers
JNIEXPORT jobject JNICALL Java_com_apple_foundationdb_FutureResults_FutureResults_1get(JNIEnv *jenv, jobject, jlong future) {
if( !future ) {
@ -695,6 +768,35 @@ JNIEXPORT jlong JNICALL Java_com_apple_foundationdb_FDBTransaction_Transaction_1
return (jlong)f;
}
JNIEXPORT jlong JNICALL Java_com_apple_foundationdb_FDBTransaction_Transaction_1getRangeSplitPoints(JNIEnv *jenv, jobject, jlong tPtr,
jbyteArray beginKeyBytes, jbyteArray endKeyBytes, jlong chunkSize) {
if( !tPtr || !beginKeyBytes || !endKeyBytes) {
throwParamNotNull(jenv);
return 0;
}
FDBTransaction *tr = (FDBTransaction *)tPtr;
uint8_t *startKey = (uint8_t *)jenv->GetByteArrayElements( beginKeyBytes, JNI_NULL );
if(!startKey) {
if( !jenv->ExceptionOccurred() )
throwRuntimeEx( jenv, "Error getting handle to native resources" );
return 0;
}
uint8_t *endKey = (uint8_t *)jenv->GetByteArrayElements(endKeyBytes, JNI_NULL);
if (!endKey) {
jenv->ReleaseByteArrayElements( beginKeyBytes, (jbyte *)startKey, JNI_ABORT );
if( !jenv->ExceptionOccurred() )
throwRuntimeEx( jenv, "Error getting handle to native resources" );
return 0;
}
FDBFuture *f = fdb_transaction_get_range_split_points( tr, startKey, jenv->GetArrayLength( beginKeyBytes ), endKey, jenv->GetArrayLength( endKeyBytes ), chunkSize );
jenv->ReleaseByteArrayElements( beginKeyBytes, (jbyte *)startKey, JNI_ABORT );
jenv->ReleaseByteArrayElements( endKeyBytes, (jbyte *)endKey, JNI_ABORT );
return (jlong)f;
}
JNIEXPORT void JNICALL Java_com_apple_foundationdb_FDBTransaction_Transaction_1set(JNIEnv *jenv, jobject, jlong tPtr, jbyteArray keyBytes, jbyteArray valueBytes) {
if( !tPtr || !keyBytes || !valueBytes ) {
throwParamNotNull(jenv);
@ -1071,6 +1173,10 @@ jint JNI_OnLoad(JavaVM *vm, void *reserved) {
range_result_init = env->GetMethodID(local_range_result_class, "<init>", "([B[IZ)V");
range_result_class = (jclass) (env)->NewGlobalRef(local_range_result_class);
jclass local_key_array_result_class = env->FindClass("com/apple/foundationdb/KeyArrayResult");
key_array_result_init = env->GetMethodID(local_key_array_result_class, "<init>", "([B[I)V");
key_array_result_class = (jclass) (env)->NewGlobalRef(local_key_array_result_class);
jclass local_range_result_summary_class = env->FindClass("com/apple/foundationdb/RangeResultSummary");
range_result_summary_init = env->GetMethodID(local_range_result_summary_class, "<init>", "([BIZ)V");
range_result_summary_class = (jclass) (env)->NewGlobalRef(local_range_result_summary_class);

View File

@ -80,6 +80,16 @@ class FDBTransaction extends NativeObjectWrapper implements Transaction, OptionC
return FDBTransaction.this.getEstimatedRangeSizeBytes(range);
}
@Override
public CompletableFuture<KeyArrayResult> getRangeSplitPoints(byte[] begin, byte[] end, long chunkSize) {
return FDBTransaction.this.getRangeSplitPoints(begin, end, chunkSize);
}
@Override
public CompletableFuture<KeyArrayResult> getRangeSplitPoints(Range range, long chunkSize) {
return FDBTransaction.this.getRangeSplitPoints(range, chunkSize);
}
///////////////////
// getRange -> KeySelectors
///////////////////
@ -282,6 +292,21 @@ class FDBTransaction extends NativeObjectWrapper implements Transaction, OptionC
return this.getEstimatedRangeSizeBytes(range.begin, range.end);
}
@Override
public CompletableFuture<KeyArrayResult> getRangeSplitPoints(byte[] begin, byte[] end, long chunkSize) {
pointerReadLock.lock();
try {
return new FutureKeyArray(Transaction_getRangeSplitPoints(getPtr(), begin, end, chunkSize), executor);
} finally {
pointerReadLock.unlock();
}
}
@Override
public CompletableFuture<KeyArrayResult> getRangeSplitPoints(Range range, long chunkSize) {
return this.getRangeSplitPoints(range.begin, range.end, chunkSize);
}
///////////////////
// getRange -> KeySelectors
///////////////////
@ -686,4 +711,5 @@ class FDBTransaction extends NativeObjectWrapper implements Transaction, OptionC
private native void Transaction_cancel(long cPtr);
private native long Transaction_getKeyLocations(long cPtr, byte[] key);
private native long Transaction_getEstimatedRangeSizeBytes(long cPtr, byte[] keyBegin, byte[] keyEnd);
private native long Transaction_getRangeSplitPoints(long cPtr, byte[] keyBegin, byte[] keyEnd, long chunkSize);
}

View File

@ -0,0 +1,37 @@
/*
* FutureKeyArray.java
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2019 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.apple.foundationdb;
import java.util.concurrent.Executor;
class FutureKeyArray extends NativeFuture<KeyArrayResult> {
FutureKeyArray(long cPtr, Executor executor) {
super(cPtr);
registerMarshalCallback(executor);
}
@Override
protected KeyArrayResult getIfDone_internal(long cPtr) throws FDBException {
return FutureKeyArray_get(cPtr);
}
private native KeyArrayResult FutureKeyArray_get(long cPtr) throws FDBException;
}

View File

@ -0,0 +1,44 @@
/*
* KeyArrayResult.java
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2020 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.apple.foundationdb;
import java.util.ArrayList;
import java.util.List;
public class KeyArrayResult {
final List<byte[]> keys;
KeyArrayResult(byte[] keyBytes, int[] keyLengths) {
int count = keyLengths.length;
keys = new ArrayList<byte[]>(count);
int offset = 0;
for(int i = 0; i < count; i++) {
int keyLength = keyLengths[i];
byte[] key = new byte[keyLength];
System.arraycopy(keyBytes, offset, key, 0, keyLength);
offset += keyLength;
keys.add(key);
}
}
}

View File

@ -455,6 +455,26 @@ public interface ReadTransaction extends ReadTransactionContext {
*/
CompletableFuture<Long> getEstimatedRangeSizeBytes(Range range);
/**
* Gets a list of keys that can split the given range into similar sized chunks based on chunkSize
*
* @param begin the beginning of the range (inclusive)
* @param end the end of the range (exclusive)
*
* @return a handle to access the results of the asynchronous call
*/
CompletableFuture<KeyArrayResult> getRangeSplitPoints(byte[] begin, byte[] end, long chunkSize);
/**
* Gets a list of keys that can split the given range into similar sized chunks based on chunkSize
*
* @param range the range of the keys
*
* @return a handle to access the results of the asynchronous call
*/
CompletableFuture<KeyArrayResult> getRangeSplitPoints(Range range, long chunkSize);
/**
* Returns a set of options that can be set on a {@code Transaction}
*

View File

@ -38,6 +38,7 @@ import com.apple.foundationdb.FDB;
import com.apple.foundationdb.FDBException;
import com.apple.foundationdb.KeySelector;
import com.apple.foundationdb.KeyValue;
import com.apple.foundationdb.KeyArrayResult;
import com.apple.foundationdb.MutationType;
import com.apple.foundationdb.Range;
import com.apple.foundationdb.StreamingMode;
@ -229,6 +230,12 @@ public class AsyncStackTester {
inst.push("GOT_ESTIMATED_RANGE_SIZE".getBytes());
}, FDB.DEFAULT_EXECUTOR);
}
else if (op == StackOperation.GET_RANGE_SPLIT_POINTS) {
List<Object> params = inst.popParams(3).join();
return inst.readTr.getRangeSplitPoints((byte[])params.get(0), (byte[])params.get(1), (long)params.get(2)).thenAcceptAsync(splitPoints -> {
inst.push("GOT_RANGE_SPLIT_POINTS".getBytes());
}, FDB.DEFAULT_EXECUTOR);
}
else if(op == StackOperation.GET_RANGE) {
return inst.popParams(5).thenComposeAsync(params -> {
int limit = StackUtils.getInt(params.get(2));

View File

@ -57,6 +57,7 @@ enum StackOperation {
GET_APPROXIMATE_SIZE,
GET_VERSIONSTAMP,
GET_ESTIMATED_RANGE_SIZE,
GET_RANGE_SPLIT_POINTS,
SET_READ_VERSION,
ON_ERROR,
SUB,

View File

@ -39,6 +39,7 @@ import com.apple.foundationdb.FDB;
import com.apple.foundationdb.FDBException;
import com.apple.foundationdb.KeySelector;
import com.apple.foundationdb.KeyValue;
import com.apple.foundationdb.KeyArrayResult;
import com.apple.foundationdb.LocalityUtil;
import com.apple.foundationdb.MutationType;
import com.apple.foundationdb.Range;
@ -211,6 +212,11 @@ public class StackTester {
Long size = inst.readTr.getEstimatedRangeSizeBytes((byte[])params.get(0), (byte[])params.get(1)).join();
inst.push("GOT_ESTIMATED_RANGE_SIZE".getBytes());
}
else if (op == StackOperation.GET_RANGE_SPLIT_POINTS) {
List<Object> params = inst.popParams(3).join();
KeyArrayResult splitPoints = inst.readTr.getRangeSplitPoints((byte[])params.get(0), (byte[])params.get(1), (long)params.get(2)).join();
inst.push("GOT_RANGE_SPLIT_POINTS".getBytes());
}
else if(op == StackOperation.GET_RANGE) {
List<Object> params = inst.popParams(5).join();

View File

@ -462,16 +462,29 @@ class TransactionRead(_FDBBase):
return self.get(key)
def get_estimated_range_size_bytes(self, begin_key, end_key):
if begin_key is None:
begin_key = b''
if end_key is None:
end_key = b'\xff'
if begin_key is None or end_key is None:
if fdb.get_api_version() >= 700:
raise Exception('Invalid begin key or end key')
else:
if begin_key is None:
begin_key = b''
if end_key is None:
end_key = b'\xff'
return FutureInt64(self.capi.fdb_transaction_get_estimated_range_size_bytes(
self.tpointer,
begin_key, len(begin_key),
end_key, len(end_key)
))
def get_range_split_points(self, begin_key, end_key, chunk_size):
if begin_key is None or end_key is None or chunk_size <=0:
raise Exception('Invalid begin key, end key or chunk size')
return FutureKeyArray(self.capi.fdb_transaction_get_range_split_points(
self.tpointer,
begin_key, len(begin_key),
end_key, len(end_key),
chunk_size
))
class Transaction(TransactionRead):
"""A modifiable snapshot of a Database.
@ -736,6 +749,14 @@ class FutureKeyValueArray(Future):
# the KVs on the python side and in most cases we are about to
# destroy the future anyway
class FutureKeyArray(Future):
def wait(self):
self.block_until_ready()
ks = ctypes.pointer(KeyStruct())
count = ctypes.c_int()
self.capi.fdb_future_get_key_array(self.fpointer, ctypes.byref(ks), ctypes.byref(count))
return [ctypes.string_at(x.key, x.key_length) for x in ks[0:count.value]]
class FutureStringArray(Future):
def wait(self):
@ -1217,6 +1238,11 @@ class KeyValueStruct(ctypes.Structure):
('value_length', ctypes.c_int)]
_pack_ = 4
class KeyStruct(ctypes.Structure):
_fields_ = [('key', ctypes.POINTER(ctypes.c_byte)),
('key_length', ctypes.c_int)]
_pack_ = 4
class KeyValue(object):
def __init__(self, key, value):
@ -1406,6 +1432,11 @@ def init_c_api():
_capi.fdb_future_get_keyvalue_array.restype = int
_capi.fdb_future_get_keyvalue_array.errcheck = check_error_code
_capi.fdb_future_get_key_array.argtypes = [ctypes.c_void_p, ctypes.POINTER(
ctypes.POINTER(KeyStruct)), ctypes.POINTER(ctypes.c_int)]
_capi.fdb_future_get_key_array.restype = int
_capi.fdb_future_get_key_array.errcheck = check_error_code
_capi.fdb_future_get_string_array.argtypes = [ctypes.c_void_p, ctypes.POINTER(ctypes.POINTER(ctypes.c_char_p)), ctypes.POINTER(ctypes.c_int)]
_capi.fdb_future_get_string_array.restype = int
_capi.fdb_future_get_string_array.errcheck = check_error_code
@ -1451,6 +1482,9 @@ def init_c_api():
_capi.fdb_transaction_get_estimated_range_size_bytes.argtypes = [ctypes.c_void_p, ctypes.c_void_p, ctypes.c_int, ctypes.c_void_p, ctypes.c_int]
_capi.fdb_transaction_get_estimated_range_size_bytes.restype = ctypes.c_void_p
_capi.fdb_transaction_get_range_split_points.argtypes = [ctypes.c_void_p, ctypes.c_void_p, ctypes.c_int, ctypes.c_void_p, ctypes.c_int, ctypes.c_int]
_capi.fdb_transaction_get_range_split_points.restype = ctypes.c_void_p
_capi.fdb_transaction_add_conflict_range.argtypes = [ctypes.c_void_p, ctypes.c_void_p, ctypes.c_int, ctypes.c_void_p, ctypes.c_int, ctypes.c_int]
_capi.fdb_transaction_add_conflict_range.restype = ctypes.c_int
_capi.fdb_transaction_add_conflict_range.errcheck = check_error_code

View File

@ -393,6 +393,10 @@ class Tester:
begin, end = inst.pop(2)
estimatedSize = obj.get_estimated_range_size_bytes(begin, end).wait()
inst.push(b"GOT_ESTIMATED_RANGE_SIZE")
elif inst.op == six.u("GET_RANGE_SPLIT_POINTS"):
begin, end, chunkSize = inst.pop(3)
estimatedSize = obj.get_range_split_points(begin, end, chunkSize).wait()
inst.push(b"GOT_RANGE_SPLIT_POINTS")
elif inst.op == six.u("GET_KEY"):
key, or_equal, offset, prefix = inst.pop(4)
result = obj.get_key(fdb.KeySelector(key, or_equal, offset))

View File

@ -109,6 +109,7 @@ module FDB
attach_function :fdb_transaction_get_key, [ :pointer, :pointer, :int, :int, :int, :int ], :pointer
attach_function :fdb_transaction_get_range, [ :pointer, :pointer, :int, :int, :int, :pointer, :int, :int, :int, :int, :int, :int, :int, :int, :int ], :pointer
attach_function :fdb_transaction_get_estimated_range_size_bytes, [ :pointer, :pointer, :int, :pointer, :int ], :pointer
attach_function :fdb_transaction_get_range_split_points, [ :pointer, :pointer, :int, :pointer, :int, :int64 ], :pointer
attach_function :fdb_transaction_set, [ :pointer, :pointer, :int, :pointer, :int ], :void
attach_function :fdb_transaction_clear, [ :pointer, :pointer, :int ], :void
attach_function :fdb_transaction_clear_range, [ :pointer, :pointer, :int, :pointer, :int ], :void
@ -129,6 +130,12 @@ module FDB
:value_length, :int
end
class KeyStruct < FFI::Struct
pack 4
layout :key, :pointer,
:key_length, :int
end
def self.check_error(code)
raise Error.new(code) if code.nonzero?
nil
@ -472,6 +479,22 @@ module FDB
end
end
class FutureKeyArray < Future
def wait
block_until_ready
ks = FFI::MemoryPointer.new :pointer
count = FFI::MemoryPointer.new :int
FDBC.check_error FDBC.fdb_future_get_key_array(@fpointer, kvs, count)
ks = ks.read_pointer
(0..count.read_int-1).map{|i|
x = FDBC::KeyStruct.new(ks + (i * FDBC::KeyStruct.size))
x[:key].read_bytes(x[:key_length])
}
end
end
class FutureStringArray < LazyFuture
def getter
strings = FFI::MemoryPointer.new :pointer
@ -825,6 +848,15 @@ module FDB
Int64Future.new(FDBC.fdb_transaction_get_estimated_range_size_bytes(@tpointer, bkey, bkey.bytesize, ekey, ekey.bytesize))
end
def get_range_split_points(begin_key, end_key, chunk_size)
if chunk_size <=0
raise ArgumentError, "Invalid chunk size"
end
bkey = FDB.key_to_bytes(begin_key)
ekey = FDB.key_to_bytes(end_key)
FutureKeyArray.new(FDBC.fdb_transaction_get_range_split_points(@tpointer, bkey, bkey.bytesize, ekey, ekey.bytesize, chunk_size))
end
end
TransactionRead.class_variable_set("@@StreamingMode", @@StreamingMode)

View File

@ -320,6 +320,9 @@ class Tester
when "GET_ESTIMATED_RANGE_SIZE"
inst.tr.get_estimated_range_size_bytes(inst.wait_and_pop, inst.wait_and_pop).to_i
inst.push("GOT_ESTIMATED_RANGE_SIZE")
when "GET_RANGE_SPLIT_POINTS"
inst.tr.get_range_split_points(inst.wait_and_pop, inst.wait_and_pop, inst.wait_and_pop).length()
inst.push("GOT_RANGE_SPLIT_POINTS")
when "GET_KEY"
selector = FDB::KeySelector.new(inst.wait_and_pop, inst.wait_and_pop, inst.wait_and_pop)
prefix = inst.wait_and_pop

View File

@ -9,7 +9,7 @@ Release Notes
Features
--------
* Added a new API in all bindings that can be used to get a list of split points that will split the given range into (roughly) equally sized chunks. `(PR #3394) <https://github.com/apple/foundationdb/pull/3394>`_
Performance
@ -34,7 +34,7 @@ Status
Bindings
--------
* Python: The method ``get_estimated_range_size_bytes`` will now throw an error if the ``begin_key`` or ``end_key`` is ``None``. `(PR #3394) <https://github.com/apple/foundationdb/pull/3394>`_
Other Changes

View File

@ -49,6 +49,8 @@ public:
virtual void addReadConflictRange(const KeyRangeRef& keys) = 0;
virtual ThreadFuture<int64_t> getEstimatedRangeSizeBytes(const KeyRangeRef& keys) = 0;
virtual ThreadFuture<Standalone<VectorRef<KeyRef>>> getRangeSplitPoints(const KeyRangeRef& range,
int64_t chunkSize) = 0;
virtual void atomicOp(const KeyRef& key, const ValueRef& value, uint32_t operationType) = 0;
virtual void set(const KeyRef& key, const ValueRef& value) = 0;

View File

@ -159,6 +159,23 @@ ThreadFuture<int64_t> DLTransaction::getEstimatedRangeSizeBytes(const KeyRangeRe
});
}
ThreadFuture<Standalone<VectorRef<KeyRef>>> DLTransaction::getRangeSplitPoints(const KeyRangeRef& range,
int64_t chunkSize) {
if (!api->transactionGetRangeSplitPoints) {
return unsupported_operation();
}
FdbCApi::FDBFuture* f = api->transactionGetRangeSplitPoints(tr, range.begin.begin(), range.begin.size(),
range.end.begin(), range.end.size(), chunkSize);
return toThreadFuture<Standalone<VectorRef<KeyRef>>>(api, f, [](FdbCApi::FDBFuture* f, FdbCApi* api) {
const FdbCApi::FDBKey* splitKeys;
int keysArrayLength;
FdbCApi::fdb_error_t error = api->futureGetKeyArray(f, &splitKeys, &keysArrayLength);
ASSERT(!error);
return Standalone<VectorRef<KeyRef>>(VectorRef<KeyRef>((KeyRef*)splitKeys, keysArrayLength), Arena());
});
}
void DLTransaction::addReadConflictRange(const KeyRangeRef& keys) {
throwIfError(api->transactionAddConflictRange(tr, keys.begin.begin(), keys.begin.size(), keys.end.begin(), keys.end.size(), FDBConflictRangeTypes::READ));
}
@ -322,12 +339,15 @@ void DLApi::init() {
loadClientFunction(&api->transactionCancel, lib, fdbCPath, "fdb_transaction_cancel");
loadClientFunction(&api->transactionAddConflictRange, lib, fdbCPath, "fdb_transaction_add_conflict_range");
loadClientFunction(&api->transactionGetEstimatedRangeSizeBytes, lib, fdbCPath, "fdb_transaction_get_estimated_range_size_bytes", headerVersion >= 630);
loadClientFunction(&api->transactionGetRangeSplitPoints, lib, fdbCPath, "fdb_transaction_get_range_split_points",
headerVersion >= 700);
loadClientFunction(&api->futureGetInt64, lib, fdbCPath, headerVersion >= 620 ? "fdb_future_get_int64" : "fdb_future_get_version");
loadClientFunction(&api->futureGetError, lib, fdbCPath, "fdb_future_get_error");
loadClientFunction(&api->futureGetKey, lib, fdbCPath, "fdb_future_get_key");
loadClientFunction(&api->futureGetValue, lib, fdbCPath, "fdb_future_get_value");
loadClientFunction(&api->futureGetStringArray, lib, fdbCPath, "fdb_future_get_string_array");
loadClientFunction(&api->futureGetKeyArray, lib, fdbCPath, "fdb_future_get_key_array", headerVersion >= 700);
loadClientFunction(&api->futureGetKeyValueArray, lib, fdbCPath, "fdb_future_get_keyvalue_array");
loadClientFunction(&api->futureSetCallback, lib, fdbCPath, "fdb_future_set_callback");
loadClientFunction(&api->futureCancel, lib, fdbCPath, "fdb_future_cancel");
@ -568,6 +588,14 @@ ThreadFuture<int64_t> MultiVersionTransaction::getEstimatedRangeSizeBytes(const
return abortableFuture(f, tr.onChange);
}
ThreadFuture<Standalone<VectorRef<KeyRef>>> MultiVersionTransaction::getRangeSplitPoints(const KeyRangeRef& range,
int64_t chunkSize) {
auto tr = getTransaction();
auto f = tr.transaction ? tr.transaction->getRangeSplitPoints(range, chunkSize)
: ThreadFuture<Standalone<VectorRef<KeyRef>>>(Never());
return abortableFuture(f, tr.onChange);
}
void MultiVersionTransaction::atomicOp(const KeyRef& key, const ValueRef& value, uint32_t operationType) {
auto tr = getTransaction();
if(tr.transaction) {

View File

@ -35,6 +35,10 @@ struct FdbCApi : public ThreadSafeReferenceCounted<FdbCApi> {
typedef struct transaction FDBTransaction;
#pragma pack(push, 4)
typedef struct key {
const uint8_t* key;
int keyLength;
} FDBKey;
typedef struct keyvalue {
const void *key;
int keyLength;
@ -84,7 +88,11 @@ struct FdbCApi : public ThreadSafeReferenceCounted<FdbCApi> {
FDBFuture* (*transactionGetEstimatedRangeSizeBytes)(FDBTransaction* tr, uint8_t const* begin_key_name,
int begin_key_name_length, uint8_t const* end_key_name, int end_key_name_length);
FDBFuture* (*transactionGetRangeSplitPoints)(FDBTransaction* tr, uint8_t const* begin_key_name,
int begin_key_name_length, uint8_t const* end_key_name,
int end_key_name_length, int64_t chunkSize);
FDBFuture* (*transactionCommit)(FDBTransaction *tr);
fdb_error_t (*transactionGetCommittedVersion)(FDBTransaction *tr, int64_t *outVersion);
FDBFuture* (*transactionGetApproximateSize)(FDBTransaction *tr);
@ -103,6 +111,7 @@ struct FdbCApi : public ThreadSafeReferenceCounted<FdbCApi> {
fdb_error_t (*futureGetKey)(FDBFuture *f, uint8_t const **outKey, int *outKeyLength);
fdb_error_t (*futureGetValue)(FDBFuture *f, fdb_bool_t *outPresent, uint8_t const **outValue, int *outValueLength);
fdb_error_t (*futureGetStringArray)(FDBFuture *f, const char ***outStrings, int *outCount);
fdb_error_t (*futureGetKeyArray)(FDBFuture* f, FDBKey const** outKeys, int* outCount);
fdb_error_t (*futureGetKeyValueArray)(FDBFuture *f, FDBKeyValue const ** outKV, int *outCount, fdb_bool_t *outMore);
fdb_error_t (*futureSetCallback)(FDBFuture *f, FDBCallback callback, void *callback_parameter);
void (*futureCancel)(FDBFuture *f);
@ -133,7 +142,9 @@ public:
ThreadFuture<Standalone<VectorRef<const char*>>> getAddressesForKey(const KeyRef& key) override;
ThreadFuture<Standalone<StringRef>> getVersionstamp() override;
ThreadFuture<int64_t> getEstimatedRangeSizeBytes(const KeyRangeRef& keys) override;
ThreadFuture<Standalone<VectorRef<KeyRef>>> getRangeSplitPoints(const KeyRangeRef& range,
int64_t chunkSize) override;
void addReadConflictRange(const KeyRangeRef& keys) override;
void atomicOp(const KeyRef& key, const ValueRef& value, uint32_t operationType) override;
@ -237,6 +248,8 @@ public:
void addReadConflictRange(const KeyRangeRef& keys) override;
ThreadFuture<int64_t> getEstimatedRangeSizeBytes(const KeyRangeRef& keys) override;
ThreadFuture<Standalone<VectorRef<KeyRef>>> getRangeSplitPoints(const KeyRangeRef& range,
int64_t chunkSize) override;
void atomicOp(const KeyRef& key, const ValueRef& value, uint32_t operationType) override;
void set(const KeyRef& key, const ValueRef& value) override;

View File

@ -1822,6 +1822,7 @@ ACTOR Future<vector<pair<KeyRange, Reference<LocationInfo>>>> getKeyRangeLocatio
}
}
// Returns a vector of <ShardRange, storage server location info> pairs.
template <class F>
Future< vector< pair<KeyRange,Reference<LocationInfo>> > > getKeyRangeLocations( Database const& cx, KeyRange const& keys, int limit, bool reverse, F StorageServerInterface::*member, TransactionInfo const& info ) {
ASSERT (!keys.empty());
@ -4346,8 +4347,11 @@ ACTOR Future<Standalone<VectorRef<ReadHotRangeWithMetrics>>> getReadHotRanges(Da
// .detail("KeysEnd", keys.end.printable().c_str());
// }
state vector<Future<ReadHotSubRangeReply>> fReplies(nLocs);
KeyRef partBegin, partEnd;
for (int i = 0; i < nLocs; i++) {
ReadHotSubRangeRequest req(locations[i].first);
partBegin = (i == 0) ? keys.begin : locations[i].first.begin;
partEnd = (i == nLocs - 1) ? keys.end : locations[i].first.end;
ReadHotSubRangeRequest req(KeyRangeRef(partBegin, partEnd));
fReplies[i] = loadBalance(locations[i].second->locations(), &StorageServerInterface::getReadHotRanges, req,
TaskPriority::DataDistribution);
}
@ -4475,6 +4479,58 @@ Future<Standalone<VectorRef<ReadHotRangeWithMetrics>>> Transaction::getReadHotRa
return ::getReadHotRanges(cx, keys);
}
ACTOR Future<Standalone<VectorRef<KeyRef>>> getRangeSplitPoints(Database cx, KeyRange keys, int64_t chunkSize) {
state Span span("NAPI:GetRangeSplitPoints"_loc);
loop {
state vector<pair<KeyRange, Reference<LocationInfo>>> locations =
wait(getKeyRangeLocations(cx, keys, 100, false, &StorageServerInterface::getRangeSplitPoints,
TransactionInfo(TaskPriority::DataDistribution, span.context)));
try {
state int nLocs = locations.size();
state vector<Future<SplitRangeReply>> fReplies(nLocs);
KeyRef partBegin, partEnd;
for (int i = 0; i < nLocs; i++) {
partBegin = (i == 0) ? keys.begin : locations[i].first.begin;
partEnd = (i == nLocs - 1) ? keys.end : locations[i].first.end;
SplitRangeRequest req(KeyRangeRef(partBegin, partEnd), chunkSize);
fReplies[i] = loadBalance(locations[i].second->locations(), &StorageServerInterface::getRangeSplitPoints, req,
TaskPriority::DataDistribution);
}
wait(waitForAll(fReplies));
Standalone<VectorRef<KeyRef>> results;
results.push_back_deep(results.arena(), keys.begin);
for (int i = 0; i < nLocs; i++) {
if (i > 0) {
results.push_back_deep(results.arena(), locations[i].first.begin); // Need this shard boundary
}
if (fReplies[i].get().splitPoints.size() > 0) {
results.append(results.arena(), fReplies[i].get().splitPoints.begin(),
fReplies[i].get().splitPoints.size());
results.arena().dependsOn(fReplies[i].get().splitPoints.arena());
}
}
if (results.back() != keys.end) {
results.push_back_deep(results.arena(), keys.end);
}
return results;
} catch (Error& e) {
if (e.code() != error_code_wrong_shard_server && e.code() != error_code_all_alternatives_failed) {
TraceEvent(SevError, "GetRangeSplitPoints").error(e);
throw;
}
cx->invalidateCache(keys);
wait(delay(CLIENT_KNOBS->WRONG_SHARD_SERVER_DELAY, TaskPriority::DataDistribution));
}
}
}
Future<Standalone<VectorRef<KeyRef>>> Transaction::getRangeSplitPoints(KeyRange const& keys, int64_t chunkSize) {
return ::getRangeSplitPoints(cx, keys, chunkSize);
}
ACTOR Future< Standalone<VectorRef<KeyRef>> > splitStorageMetrics( Database cx, KeyRange keys, StorageMetrics limit, StorageMetrics estimated )
{
state Span span("NAPI:SplitStorageMetrics"_loc);

View File

@ -268,6 +268,9 @@ public:
Future< Standalone<VectorRef<KeyRef>> > splitStorageMetrics( KeyRange const& keys, StorageMetrics const& limit, StorageMetrics const& estimated );
Future<Standalone<VectorRef<ReadHotRangeWithMetrics>>> getReadHotRanges(KeyRange const& keys);
// Try to split the given range into equally sized chunks based on estimated size.
// The returned list would still be in form of [keys.begin, splitPoint1, splitPoint2, ... , keys.end]
Future<Standalone<VectorRef<KeyRef>>> getRangeSplitPoints(KeyRange const& keys, int64_t chunkSize);
// If checkWriteConflictRanges is true, existing write conflict ranges will be searched for this key
void set( const KeyRef& key, const ValueRef& value, bool addConflictRange = true );
void atomicOp( const KeyRef& key, const ValueRef& value, MutationRef::Type operationType, bool addConflictRange = true );

View File

@ -1401,6 +1401,20 @@ Future<int64_t> ReadYourWritesTransaction::getEstimatedRangeSizeBytes(const KeyR
return map(waitOrError(tr.getStorageMetrics(keys, -1), resetPromise.getFuture()), [](const StorageMetrics& m) { return m.bytes; });
}
Future<Standalone<VectorRef<KeyRef>>> ReadYourWritesTransaction::getRangeSplitPoints(const KeyRange& range,
int64_t chunkSize) {
if (checkUsedDuringCommit()) {
return used_during_commit();
}
if (resetPromise.isSet()) return resetPromise.getFuture().getError();
KeyRef maxKey = getMaxReadKey();
if(range.begin > maxKey || range.end > maxKey)
return key_outside_legal_range();
return waitOrError(tr.getRangeSplitPoints(range, chunkSize), resetPromise.getFuture());
}
void ReadYourWritesTransaction::addReadConflictRange( KeyRangeRef const& keys ) {
if(checkUsedDuringCommit()) {
throw used_during_commit();

View File

@ -86,6 +86,7 @@ public:
}
[[nodiscard]] Future<Standalone<VectorRef<const char*>>> getAddressesForKey(const Key& key);
Future<Standalone<VectorRef<KeyRef>>> getRangeSplitPoints(const KeyRange& range, int64_t chunkSize);
Future<int64_t> getEstimatedRangeSizeBytes(const KeyRange& keys);
void addReadConflictRange( KeyRangeRef const& keys );

View File

@ -72,6 +72,8 @@ struct StorageServerInterface {
RequestStream<ReplyPromise<KeyValueStoreType>> getKeyValueStoreType;
RequestStream<struct WatchValueRequest> watchValue;
RequestStream<struct ReadHotSubRangeRequest> getReadHotRanges;
RequestStream<struct SplitRangeRequest> getRangeSplitPoints;
explicit StorageServerInterface(UID uid) : uniqueID( uid ) {}
StorageServerInterface() : uniqueID( deterministicRandom()->randomUniqueID() ) {}
NetworkAddress address() const { return getValue.getEndpoint().getPrimaryAddress(); }
@ -98,6 +100,7 @@ struct StorageServerInterface {
getKeyValueStoreType = RequestStream<ReplyPromise<KeyValueStoreType>>( getValue.getEndpoint().getAdjustedEndpoint(9) );
watchValue = RequestStream<struct WatchValueRequest>( getValue.getEndpoint().getAdjustedEndpoint(10) );
getReadHotRanges = RequestStream<struct ReadHotSubRangeRequest>( getValue.getEndpoint().getAdjustedEndpoint(11) );
getRangeSplitPoints = RequestStream<struct SplitRangeRequest>(getValue.getEndpoint().getAdjustedEndpoint(12));
}
} else {
ASSERT(Ar::isDeserializing);
@ -125,6 +128,7 @@ struct StorageServerInterface {
streams.push_back(getKeyValueStoreType.getReceiver());
streams.push_back(watchValue.getReceiver());
streams.push_back(getReadHotRanges.getReceiver());
streams.push_back(getRangeSplitPoints.getReceiver());
FlowTransport::transport().addEndpoints(streams);
}
};
@ -479,6 +483,34 @@ struct ReadHotSubRangeRequest {
}
};
struct SplitRangeReply {
constexpr static FileIdentifier file_identifier = 11813134;
// If the given range can be divided, contains the split points.
// If the given range cannot be divided(for exmaple its total size is smaller than the chunk size), this would be
// empty
Standalone<VectorRef<KeyRef>> splitPoints;
template <class Ar>
void serialize(Ar& ar) {
serializer(ar, splitPoints);
}
};
struct SplitRangeRequest {
constexpr static FileIdentifier file_identifier = 10725174;
Arena arena;
KeyRangeRef keys;
int64_t chunkSize;
ReplyPromise<SplitRangeReply> reply;
SplitRangeRequest() {}
SplitRangeRequest(KeyRangeRef const& keys, int64_t chunkSize) : keys(arena, keys), chunkSize(chunkSize) {}
template <class Ar>
void serialize(Ar& ar) {
serializer(ar, keys, chunkSize, reply, arena);
}
};
struct GetStorageMetricsReply {
constexpr static FileIdentifier file_identifier = 15491478;
StorageMetrics load;

View File

@ -164,6 +164,16 @@ ThreadFuture<int64_t> ThreadSafeTransaction::getEstimatedRangeSizeBytes( const K
} );
}
ThreadFuture<Standalone<VectorRef<KeyRef>>> ThreadSafeTransaction::getRangeSplitPoints(const KeyRangeRef& range,
int64_t chunkSize) {
KeyRange r = range;
ReadYourWritesTransaction* tr = this->tr;
return onMainThread([tr, r, chunkSize]() -> Future<Standalone<VectorRef<KeyRef>>> {
tr->checkDeferredError();
return tr->getRangeSplitPoints(r, chunkSize);
});
}
ThreadFuture< Standalone<RangeResultRef> > ThreadSafeTransaction::getRange( const KeySelectorRef& begin, const KeySelectorRef& end, int limit, bool snapshot, bool reverse ) {
KeySelector b = begin;

View File

@ -72,6 +72,8 @@ public:
ThreadFuture<Standalone<VectorRef<const char*>>> getAddressesForKey(const KeyRef& key) override;
ThreadFuture<Standalone<StringRef>> getVersionstamp() override;
ThreadFuture<int64_t> getEstimatedRangeSizeBytes(const KeyRangeRef& keys) override;
ThreadFuture<Standalone<VectorRef<KeyRef>>> getRangeSplitPoints(const KeyRangeRef& range,
int64_t chunkSize) override;
void addReadConflictRange( const KeyRangeRef& keys ) override;
void makeSelfConflicting();

View File

@ -483,6 +483,35 @@ struct StorageServerMetrics {
req.reply.send(reply);
}
std::vector<KeyRef> getSplitPoints(KeyRangeRef range, int64_t chunkSize) {
std::vector<KeyRef> toReturn;
KeyRef beginKey = range.begin;
IndexedSet<Key, int64_t>::iterator endKey =
byteSample.sample.index(byteSample.sample.sumTo(byteSample.sample.lower_bound(beginKey)) + chunkSize);
while (endKey != byteSample.sample.end()) {
if (*endKey > range.end) {
break;
}
if (*endKey == beginKey) {
++endKey;
continue;
}
toReturn.push_back(*endKey);
beginKey = *endKey;
endKey =
byteSample.sample.index(byteSample.sample.sumTo(byteSample.sample.lower_bound(beginKey)) + chunkSize);
}
return toReturn;
}
void getSplitPoints(SplitRangeRequest req) {
SplitRangeReply reply;
std::vector<KeyRef> points = getSplitPoints(req.keys, req.chunkSize);
reply.splitPoints.append_deep(reply.splitPoints.arena(), points.data(), points.size());
req.reply.send(reply);
}
private:
static void collapse( KeyRangeMap<int>& map, KeyRef const& key ) {
auto range = map.rangeContaining(key);
@ -503,6 +532,92 @@ private:
}
};
TEST_CASE("/fdbserver/StorageMetricSample/rangeSplitPoints/simple") {
int64_t sampleUnit = SERVER_KNOBS->BYTES_READ_UNITS_PER_SAMPLE;
StorageServerMetrics ssm;
ssm.byteSample.sample.insert(LiteralStringRef("A"), 200 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Absolute"), 800 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Apple"), 1000 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Bah"), 20 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Banana"), 80 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Bob"), 200 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("But"), 100 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Cat"), 300 * sampleUnit);
vector<KeyRef> t = ssm.getSplitPoints(KeyRangeRef(LiteralStringRef("A"), LiteralStringRef("C")), 2000 * sampleUnit);
ASSERT(t.size() == 1 && t[0] == LiteralStringRef("Bah"));
return Void();
}
TEST_CASE("/fdbserver/StorageMetricSample/rangeSplitPoints/multipleReturnedPoints") {
int64_t sampleUnit = SERVER_KNOBS->BYTES_READ_UNITS_PER_SAMPLE;
StorageServerMetrics ssm;
ssm.byteSample.sample.insert(LiteralStringRef("A"), 200 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Absolute"), 800 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Apple"), 1000 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Bah"), 20 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Banana"), 80 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Bob"), 200 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("But"), 100 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Cat"), 300 * sampleUnit);
vector<KeyRef> t = ssm.getSplitPoints(KeyRangeRef(LiteralStringRef("A"), LiteralStringRef("C")), 600 * sampleUnit);
ASSERT(t.size() == 3 && t[0] == LiteralStringRef("Absolute") && t[1] == LiteralStringRef("Apple") &&
t[2] == LiteralStringRef("Bah"));
return Void();
}
TEST_CASE("/fdbserver/StorageMetricSample/rangeSplitPoints/noneSplitable") {
int64_t sampleUnit = SERVER_KNOBS->BYTES_READ_UNITS_PER_SAMPLE;
StorageServerMetrics ssm;
ssm.byteSample.sample.insert(LiteralStringRef("A"), 200 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Absolute"), 800 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Apple"), 1000 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Bah"), 20 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Banana"), 80 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Bob"), 200 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("But"), 100 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Cat"), 300 * sampleUnit);
vector<KeyRef> t = ssm.getSplitPoints(KeyRangeRef(LiteralStringRef("A"), LiteralStringRef("C")), 10000 * sampleUnit);
ASSERT(t.size() == 0);
return Void();
}
TEST_CASE("/fdbserver/StorageMetricSample/rangeSplitPoints/chunkTooLarge") {
int64_t sampleUnit = SERVER_KNOBS->BYTES_READ_UNITS_PER_SAMPLE;
StorageServerMetrics ssm;
ssm.byteSample.sample.insert(LiteralStringRef("A"), 20 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Absolute"), 80 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Apple"), 10 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Bah"), 20 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Banana"), 80 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Bob"), 20 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("But"), 10 * sampleUnit);
ssm.byteSample.sample.insert(LiteralStringRef("Cat"), 30 * sampleUnit);
vector<KeyRef> t = ssm.getSplitPoints(KeyRangeRef(LiteralStringRef("A"), LiteralStringRef("C")), 1000 * sampleUnit);
ASSERT(t.size() == 0);
return Void();
}
TEST_CASE("/fdbserver/StorageMetricSample/readHotDetect/simple") {
int64_t sampleUnit = SERVER_KNOBS->BYTES_READ_UNITS_PER_SAMPLE;

View File

@ -3738,6 +3738,14 @@ ACTOR Future<Void> metricsCore( StorageServer* self, StorageServerInterface ssi
self->metrics.getReadHotRanges(req);
}
}
when(SplitRangeRequest req = waitNext(ssi.getRangeSplitPoints.getFuture())) {
if (!self->isReadable(req.keys)) {
TEST(true); // getSplitPoints immediate wrong_shard_server()
self->sendErrorWithPenalty(req.reply, wrong_shard_server(), self->getPenalty());
} else {
self->metrics.getSplitPoints(req);
}
}
when (wait(doPollMetrics) ) {
self->metrics.poll();
doPollMetrics = delay(SERVER_KNOBS->STORAGE_SERVER_POLL_METRICS_DELAY);

View File

@ -706,6 +706,7 @@ ACTOR Future<Void> storageServerRollbackRebooter( Future<Void> prevStorageServer
DUMPTOKEN(recruited.waitMetrics);
DUMPTOKEN(recruited.splitMetrics);
DUMPTOKEN(recruited.getReadHotRanges);
DUMPTOKEN(recruited.getRangeSplitPoints);
DUMPTOKEN(recruited.getStorageMetrics);
DUMPTOKEN(recruited.waitFailure);
DUMPTOKEN(recruited.getQueuingMetrics);
@ -1036,6 +1037,7 @@ ACTOR Future<Void> workerServer(
DUMPTOKEN(recruited.waitMetrics);
DUMPTOKEN(recruited.splitMetrics);
DUMPTOKEN(recruited.getReadHotRanges);
DUMPTOKEN(recruited.getRangeSplitPoints);
DUMPTOKEN(recruited.getStorageMetrics);
DUMPTOKEN(recruited.waitFailure);
DUMPTOKEN(recruited.getQueuingMetrics);
@ -1347,6 +1349,7 @@ ACTOR Future<Void> workerServer(
DUMPTOKEN(recruited.waitMetrics);
DUMPTOKEN(recruited.splitMetrics);
DUMPTOKEN(recruited.getReadHotRanges);
DUMPTOKEN(recruited.getRangeSplitPoints);
DUMPTOKEN(recruited.getStorageMetrics);
DUMPTOKEN(recruited.waitFailure);
DUMPTOKEN(recruited.getQueuingMetrics);