Merged from upstream master
This commit is contained in:
commit
f211a54593
|
@ -536,3 +536,51 @@ sse2neon Authors (sse2neon)
|
|||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
|
||||
|
||||
rte_memcpy.h (from DPDK):
|
||||
SPDX-License-Identifier: BSD-3-Clause
|
||||
Copyright(c) 2010-2014 Intel Corporation
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
3. Neither the name of the copyright holder nor the names of its
|
||||
contributors may be used to endorse or promote products derived from this
|
||||
software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
folly_memcpy:
|
||||
|
||||
Copyright (c) Facebook, Inc. and its affiliates.
|
||||
Author: Bin Liu <binliu@fb.com>
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
|
|
@ -158,15 +158,15 @@ namespace FDB {
|
|||
void reset() override;
|
||||
|
||||
TransactionImpl() : tr(NULL) {}
|
||||
TransactionImpl(TransactionImpl&& r) BOOST_NOEXCEPT {
|
||||
tr = r.tr;
|
||||
r.tr = NULL;
|
||||
}
|
||||
TransactionImpl& operator=(TransactionImpl&& r) BOOST_NOEXCEPT {
|
||||
tr = r.tr;
|
||||
r.tr = NULL;
|
||||
TransactionImpl(TransactionImpl&& r) noexcept {
|
||||
tr = r.tr;
|
||||
r.tr = NULL;
|
||||
}
|
||||
TransactionImpl& operator=(TransactionImpl&& r) noexcept {
|
||||
tr = r.tr;
|
||||
r.tr = NULL;
|
||||
return *this;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
FDBTransaction* tr;
|
||||
|
|
|
@ -35,6 +35,7 @@ import com.apple.foundationdb.Transaction;
|
|||
*
|
||||
*/
|
||||
public class ByteArrayUtil extends FastByteComparisons {
|
||||
private static final byte[] EMPTY_BYTES = new byte[0];
|
||||
|
||||
/**
|
||||
* Joins a set of byte arrays into a larger array. The {@code interlude} is placed
|
||||
|
@ -45,36 +46,46 @@ public class ByteArrayUtil extends FastByteComparisons {
|
|||
* concatenated elements.
|
||||
* @param parts the pieces to be joined. May be {@code null}, but does not allow
|
||||
* for elements in the list to be {@code null}.
|
||||
*
|
||||
*
|
||||
* @return a newly created concatenation of the input
|
||||
*/
|
||||
public static byte[] join(byte[] interlude, List<byte[]> parts) {
|
||||
return interludeJoin(interlude, parts.toArray(new byte[0][]));
|
||||
}
|
||||
/**
|
||||
* Joins a set of byte arrays into a larger array. The {@code interlude} is placed
|
||||
* between each of the elements, but not at the beginning or end. In the case that
|
||||
* the list is empty or {@code null}, a zero-length byte array will be returned.
|
||||
*
|
||||
* @param interlude can be {@code null} or zero length. Placed internally between
|
||||
* concatenated elements.
|
||||
* @param parts the pieces to be joined. May be {@code null}, but does not allow
|
||||
* for elements in the array to be {@code null}.
|
||||
*
|
||||
* @return a newly created concatenation of the input
|
||||
*/
|
||||
public static byte[] interludeJoin(byte[] interlude, byte[][] parts) {
|
||||
if(parts == null)
|
||||
return new byte[0];
|
||||
int partCount = parts.size();
|
||||
int partCount = parts.length;
|
||||
if(partCount == 0)
|
||||
return new byte[0];
|
||||
return EMPTY_BYTES;
|
||||
|
||||
if(interlude == null)
|
||||
interlude = new byte[0];
|
||||
interlude = EMPTY_BYTES;
|
||||
|
||||
int elementTotals = 0;
|
||||
int interludeSize = interlude.length;
|
||||
for(byte[] e : parts) {
|
||||
elementTotals += e.length;
|
||||
for (int i = 0; i < partCount; i++) {
|
||||
elementTotals += parts[i].length;
|
||||
}
|
||||
|
||||
byte[] dest = new byte[(interludeSize * (partCount - 1)) + elementTotals];
|
||||
|
||||
//System.out.println(" interlude -> " + ArrayUtils.printable(interlude));
|
||||
|
||||
int startByte = 0;
|
||||
int index = 0;
|
||||
for(byte[] part : parts) {
|
||||
//System.out.println(" section -> " + ArrayUtils.printable(parts.get(i)));
|
||||
int length = part.length;
|
||||
for (int i = 0; i < partCount; i++) {
|
||||
int length = parts[i].length;
|
||||
if(length > 0) {
|
||||
System.arraycopy(part, 0, dest, startByte, length);
|
||||
System.arraycopy(parts[i], 0, dest, startByte, length);
|
||||
startByte += length;
|
||||
}
|
||||
if(index < partCount - 1 && interludeSize > 0) {
|
||||
|
@ -84,8 +95,6 @@ public class ByteArrayUtil extends FastByteComparisons {
|
|||
}
|
||||
index++;
|
||||
}
|
||||
|
||||
//System.out.println(" complete -> " + ArrayUtils.printable(dest));
|
||||
return dest;
|
||||
}
|
||||
|
||||
|
@ -97,7 +106,7 @@ public class ByteArrayUtil extends FastByteComparisons {
|
|||
* @return a newly created concatenation of the input
|
||||
*/
|
||||
public static byte[] join(byte[]... parts) {
|
||||
return join(null, Arrays.asList(parts));
|
||||
return interludeJoin(null, parts);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -761,10 +761,6 @@ public class AsyncStackTester {
|
|||
c.run();
|
||||
//System.out.println("Done with test.");
|
||||
|
||||
/*byte[] key = Tuple.from("test_results".getBytes(), 5).pack();
|
||||
byte[] bs = db.createTransaction().get(key).get();
|
||||
System.out.println("output of " + ByteArrayUtil.printable(key) + " as: " + ByteArrayUtil.printable(bs));*/
|
||||
|
||||
db.close();
|
||||
System.gc();
|
||||
|
||||
|
|
|
@ -45,17 +45,16 @@ public class LocalityTests {
|
|||
|
||||
long start = System.currentTimeMillis();
|
||||
|
||||
CloseableAsyncIterator<byte[]> keys = LocalityUtil.getBoundaryKeys(database, new byte[0], new byte[]{(byte) 255});
|
||||
CompletableFuture<List<byte[]>> collection = AsyncUtil.collectRemaining(keys);
|
||||
List<byte[]> list = collection.join();
|
||||
System.out.println("Took " + (System.currentTimeMillis() - start) + "ms to get " +
|
||||
list.size() + " items");
|
||||
try(CloseableAsyncIterator<byte[]> keys = LocalityUtil.getBoundaryKeys(database, new byte[0], new byte[]{(byte) 255})) {
|
||||
CompletableFuture<List<byte[]>> collection = AsyncUtil.collectRemaining(keys);
|
||||
List<byte[]> list = collection.join();
|
||||
System.out.println("Took " + (System.currentTimeMillis() - start) + "ms to get " +
|
||||
list.size() + " items");
|
||||
|
||||
keys.close();
|
||||
|
||||
int i = 0;
|
||||
for(byte[] key : collection.join()) {
|
||||
System.out.println(i++ + ": " + ByteArrayUtil.printable(key));
|
||||
int i = 0;
|
||||
for(byte[] key : collection.join()) {
|
||||
System.out.println(i++ + ": " + ByteArrayUtil.printable(key));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -64,7 +64,9 @@ public class RangeTest {
|
|||
|
||||
System.out.println("First transaction was successful");
|
||||
|
||||
checkRange(db.createTransaction());
|
||||
try(Transaction tr = db.createTransaction()) {
|
||||
checkRange(tr);
|
||||
}
|
||||
|
||||
long version;
|
||||
try(Transaction tr = db.createTransaction()) {
|
||||
|
@ -184,7 +186,6 @@ public class RangeTest {
|
|||
String value = new String(kv.getValue());
|
||||
System.out.println(" -- " + key + " -> " + value);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private RangeTest() {}
|
||||
|
|
|
@ -88,6 +88,7 @@ public class SerialInsertion {
|
|||
tr.set(buf.array(), value);
|
||||
}
|
||||
tr.commit().join();
|
||||
tr.close();
|
||||
tr = db.createTransaction();
|
||||
done += i;
|
||||
}
|
||||
|
|
|
@ -649,9 +649,10 @@ public class StackTester {
|
|||
}
|
||||
}
|
||||
catch(FDBException e) {
|
||||
Transaction tr = db.createTransaction();
|
||||
tr.onError(e).join();
|
||||
return false;
|
||||
try(Transaction tr = db.createTransaction()) {
|
||||
tr.onError(e).join();
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -15,8 +15,8 @@ EOF
|
|||
s.email = 'fdb-dist@apple.com'
|
||||
s.files = ["${CMAKE_SOURCE_DIR}/LICENSE", "${CMAKE_CURRENT_SOURCE_DIR}/lib/fdb.rb", "${CMAKE_CURRENT_SOURCE_DIR}/lib/fdbdirectory.rb", "${CMAKE_CURRENT_SOURCE_DIR}/lib/fdbimpl.rb", "${CMAKE_CURRENT_SOURCE_DIR}/lib/fdblocality.rb", "${CMAKE_CURRENT_SOURCE_DIR}/lib/fdboptions.rb", "${CMAKE_CURRENT_SOURCE_DIR}/lib/fdbsubspace.rb", "${CMAKE_CURRENT_SOURCE_DIR}/lib/fdbtuple.rb", "${CMAKE_CURRENT_SOURCE_DIR}/lib/fdbimpl_v609.rb"]
|
||||
s.homepage = 'https://www.foundationdb.org'
|
||||
s.license = 'Apache v2'
|
||||
s.add_dependency('ffi', '>= 1.1.5')
|
||||
s.license = 'Apache-2.0'
|
||||
s.add_dependency('ffi', '~> 1.1', '>= 1.1.5')
|
||||
s.required_ruby_version = '>= 1.9.3'
|
||||
s.requirements << 'These bindings require the FoundationDB client. The client can be obtained from https://www.foundationdb.org/download/.'
|
||||
end
|
||||
|
|
|
@ -15,8 +15,8 @@ EOF
|
|||
s.email = 'fdb-dist@apple.com'
|
||||
s.files = ["LICENSE", "lib/fdb.rb", "lib/fdbdirectory.rb", "lib/fdbimpl.rb", "lib/fdblocality.rb", "lib/fdboptions.rb", "lib/fdbsubspace.rb", "lib/fdbtuple.rb", "lib/fdbimpl_v609.rb"]
|
||||
s.homepage = 'https://www.foundationdb.org'
|
||||
s.license = 'Apache v2'
|
||||
s.add_dependency('ffi', '>= 1.1.5')
|
||||
s.license = 'Apache-2.0'
|
||||
s.add_dependency('ffi', '~> 1.1', '>= 1.1.5')
|
||||
s.required_ruby_version = '>= 1.9.3'
|
||||
s.requirements << 'These bindings require the FoundationDB client. The client can be obtained from https://www.foundationdb.org/download/.'
|
||||
end
|
||||
|
|
|
@ -74,6 +74,14 @@ services:
|
|||
<<: *snapshot-bindings-cmake
|
||||
|
||||
|
||||
snapshot-cmake: &snapshot-testpackages
|
||||
<<: *build-setup
|
||||
command: scl enable devtoolset-8 rh-python36 rh-ruby24 -- bash -c 'mkdir -p "$${BUILD_DIR}" && cd "$${BUILD_DIR}" && cmake -G "Ninja" -DFDB_RELEASE=0 /__this_is_some_very_long_name_dir_needed_to_fix_a_bug_with_debug_rpms__/foundationdb && ninja -v -j "$${MAKEJOBS}"'
|
||||
|
||||
prb-testpackages:
|
||||
<<: *snapshot-testpackages
|
||||
|
||||
|
||||
snapshot-ctest: &snapshot-ctest
|
||||
<<: *build-setup
|
||||
command: scl enable devtoolset-8 rh-python36 rh-ruby24 -- bash -c 'mkdir -p "$${BUILD_DIR}" && cd "$${BUILD_DIR}" && cmake -G "Ninja" -DFDB_RELEASE=1 /__this_is_some_very_long_name_dir_needed_to_fix_a_bug_with_debug_rpms__/foundationdb && ninja -v -j "$${MAKEJOBS}" && ctest -L fast -j "$${MAKEJOBS}" --output-on-failure'
|
||||
|
|
|
@ -151,10 +151,15 @@ if(NOT WIN32)
|
|||
set(TEST_PACKAGE_ADD_DIRECTORIES "" CACHE STRING "A ;-separated list of directories. All files within each directory will be added to the test package")
|
||||
endif()
|
||||
|
||||
function(create_test_package)
|
||||
if(WIN32)
|
||||
return()
|
||||
endif()
|
||||
# This sets up a directory with the correctness files common to all correctness packages.
|
||||
# This function should be called with the following arguments:
|
||||
#
|
||||
# - OUT_DIR the directory where files will be staged
|
||||
# - CONTEXT the type of correctness package being built (e.g. 'valgrind correctness')
|
||||
function(stage_correctness_package)
|
||||
set(oneValueArgs OUT_DIR CONTEXT)
|
||||
cmake_parse_arguments(STAGE "" "${oneValueArgs}" "" "${ARGN}")
|
||||
file(MAKE_DIRECTORY ${STAGE_OUT_DIR}/bin)
|
||||
string(LENGTH "${CMAKE_SOURCE_DIR}/tests/" base_length)
|
||||
foreach(test IN LISTS TEST_NAMES)
|
||||
if(("${TEST_TYPE_${test}}" STREQUAL "simulation") AND
|
||||
|
@ -162,12 +167,14 @@ function(create_test_package)
|
|||
(NOT ${test} MATCHES ${TEST_PACKAGE_EXCLUDE}))
|
||||
foreach(file IN LISTS TEST_FILES_${test})
|
||||
string(SUBSTRING ${file} ${base_length} -1 rel_out_file)
|
||||
set(out_file ${CMAKE_BINARY_DIR}/packages/tests/${rel_out_file})
|
||||
list(APPEND out_files ${out_file})
|
||||
set(out_file ${STAGE_OUT_DIR}/tests/${rel_out_file})
|
||||
list(APPEND test_files ${out_file})
|
||||
add_custom_command(
|
||||
OUTPUT ${out_file}
|
||||
DEPENDS ${file}
|
||||
COMMAND ${CMAKE_COMMAND} -E copy ${file} ${out_file})
|
||||
COMMAND ${CMAKE_COMMAND} -E copy ${file} ${out_file}
|
||||
COMMENT "Copying ${STAGE_CONTEXT} test file ${rel_out_file}"
|
||||
)
|
||||
endforeach()
|
||||
endif()
|
||||
endforeach()
|
||||
|
@ -181,68 +188,83 @@ function(create_test_package)
|
|||
# SUBSTRING will fail
|
||||
set(src_dir "${src_dir}/")
|
||||
string(SUBSTRING ${src_dir} ${dir_len} -1 dest_dir)
|
||||
string(SUBSTRING ${file} ${dir_len} -1 out_file)
|
||||
list(APPEND external_files ${CMAKE_BINARY_DIR}/packages/${out_file})
|
||||
file(COPY ${file} DESTINATION ${CMAKE_BINARY_DIR}/packages/${dest_dir})
|
||||
string(SUBSTRING ${file} ${dir_len} -1 rel_out_file)
|
||||
set(out_file ${STAGE_OUT_DIR}/${rel_out_file})
|
||||
list(APPEND external_files ${out_file})
|
||||
add_custom_command(
|
||||
OUTPUT ${out_file}
|
||||
DEPENDS ${file}
|
||||
COMMAND ${CMAKE_COMMAND} -E copy ${file} ${out_file}
|
||||
COMMENT "Copying ${STAGE_CONTEXT} external file ${file}"
|
||||
)
|
||||
endforeach()
|
||||
endforeach()
|
||||
if(NOT USE_VALGRIND)
|
||||
set(tar_file ${CMAKE_BINARY_DIR}/packages/correctness-${CMAKE_PROJECT_VERSION}.tar.gz)
|
||||
add_custom_command(
|
||||
OUTPUT ${tar_file}
|
||||
DEPENDS ${out_files}
|
||||
${CMAKE_BINARY_DIR}/packages/bin/fdbserver
|
||||
${CMAKE_BINARY_DIR}/packages/bin/TestHarness.exe
|
||||
${CMAKE_BINARY_DIR}/packages/bin/TraceLogHelper.dll
|
||||
${CMAKE_SOURCE_DIR}/contrib/Joshua/scripts/correctnessTest.sh
|
||||
${CMAKE_SOURCE_DIR}/contrib/Joshua/scripts/correctnessTimeout.sh
|
||||
${external_files}
|
||||
COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_SOURCE_DIR}/contrib/Joshua/scripts/correctnessTest.sh
|
||||
${CMAKE_BINARY_DIR}/packages/joshua_test
|
||||
COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_SOURCE_DIR}/contrib/Joshua/scripts/correctnessTimeout.sh
|
||||
${CMAKE_BINARY_DIR}/packages/joshua_timeout
|
||||
COMMAND ${CMAKE_COMMAND} -E tar cfz ${tar_file} ${CMAKE_BINARY_DIR}/packages/bin/fdbserver
|
||||
${CMAKE_BINARY_DIR}/packages/bin/TestHarness.exe
|
||||
${CMAKE_BINARY_DIR}/packages/bin/TraceLogHelper.dll
|
||||
${CMAKE_BINARY_DIR}/packages/joshua_test
|
||||
${CMAKE_BINARY_DIR}/packages/joshua_timeout
|
||||
${out_files}
|
||||
${external_files}
|
||||
COMMAND ${CMAKE_COMMAND} -E remove ${CMAKE_BINARY_DIR}/packages/joshua_test ${CMAKE_BINARY_DIR}/packages/joshua_timeout
|
||||
WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/packages
|
||||
COMMENT "Package correctness archive"
|
||||
)
|
||||
add_custom_target(package_tests ALL DEPENDS ${tar_file})
|
||||
# seems make needs this dependency while this does nothing with ninja
|
||||
add_dependencies(package_tests strip_only_fdbserver TestHarness)
|
||||
endif()
|
||||
list(APPEND package_files ${STAGE_OUT_DIR}/bin/fdbserver
|
||||
${STAGE_OUT_DIR}/bin/TestHarness.exe
|
||||
${STAGE_OUT_DIR}/bin/TraceLogHelper.dll
|
||||
${STAGE_OUT_DIR}/CMakeCache.txt
|
||||
)
|
||||
add_custom_command(
|
||||
OUTPUT ${package_files}
|
||||
DEPENDS ${CMAKE_BINARY_DIR}/CMakeCache.txt
|
||||
${CMAKE_BINARY_DIR}/packages/bin/fdbserver
|
||||
${CMAKE_BINARY_DIR}/packages/bin/TestHarness.exe
|
||||
${CMAKE_BINARY_DIR}/packages/bin/TraceLogHelper.dll
|
||||
COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_BINARY_DIR}/CMakeCache.txt ${STAGE_OUT_DIR}
|
||||
COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_BINARY_DIR}/packages/bin/fdbserver
|
||||
${CMAKE_BINARY_DIR}/packages/bin/TestHarness.exe
|
||||
${CMAKE_BINARY_DIR}/packages/bin/TraceLogHelper.dll
|
||||
${STAGE_OUT_DIR}/bin
|
||||
COMMENT "Copying files for ${STAGE_CONTEXT} package"
|
||||
)
|
||||
list(APPEND package_files ${test_files} ${external_files})
|
||||
set(package_files ${package_files} PARENT_SCOPE)
|
||||
endfunction()
|
||||
|
||||
function(create_correctness_package)
|
||||
if(WIN32)
|
||||
return()
|
||||
endif()
|
||||
set(out_dir "${CMAKE_BINARY_DIR}/correctness")
|
||||
stage_correctness_package(OUT_DIR ${out_dir} CONTEXT "correctness")
|
||||
set(tar_file ${CMAKE_BINARY_DIR}/packages/correctness-${CMAKE_PROJECT_VERSION}.tar.gz)
|
||||
add_custom_command(
|
||||
OUTPUT ${tar_file}
|
||||
DEPENDS ${package_files}
|
||||
${CMAKE_SOURCE_DIR}/contrib/Joshua/scripts/correctnessTest.sh
|
||||
${CMAKE_SOURCE_DIR}/contrib/Joshua/scripts/correctnessTimeout.sh
|
||||
COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_SOURCE_DIR}/contrib/Joshua/scripts/correctnessTest.sh
|
||||
${out_dir}/joshua_test
|
||||
COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_SOURCE_DIR}/contrib/Joshua/scripts/correctnessTimeout.sh
|
||||
${out_dir}/joshua_timeout
|
||||
COMMAND ${CMAKE_COMMAND} -E tar cfz ${tar_file} *
|
||||
WORKING_DIRECTORY ${out_dir}
|
||||
COMMENT "Package correctness archive"
|
||||
)
|
||||
add_custom_target(package_tests ALL DEPENDS ${tar_file})
|
||||
add_dependencies(package_tests strip_only_fdbserver TestHarness)
|
||||
endfunction()
|
||||
|
||||
function(create_valgrind_correctness_package)
|
||||
if(WIN32)
|
||||
return()
|
||||
endif()
|
||||
if(USE_VALGRIND)
|
||||
set(out_dir "${CMAKE_BINARY_DIR}/valgrind_correctness")
|
||||
stage_correctness_package(OUT_DIR ${out_dir} CONTEXT "valgrind correctness")
|
||||
set(tar_file ${CMAKE_BINARY_DIR}/packages/valgrind-${CMAKE_PROJECT_VERSION}.tar.gz)
|
||||
add_custom_command(
|
||||
OUTPUT ${tar_file}
|
||||
DEPENDS ${out_files}
|
||||
${CMAKE_BINARY_DIR}/packages/bin/fdbserver
|
||||
${CMAKE_BINARY_DIR}/packages/bin/TestHarness.exe
|
||||
${CMAKE_BINARY_DIR}/packages/bin/TraceLogHelper.dll
|
||||
DEPENDS ${package_files}
|
||||
${CMAKE_SOURCE_DIR}/contrib/Joshua/scripts/valgrindTest.sh
|
||||
${CMAKE_SOURCE_DIR}/contrib/Joshua/scripts/valgrindTimeout.sh
|
||||
${external_files}
|
||||
COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_SOURCE_DIR}/contrib/Joshua/scripts/valgrindTest.sh
|
||||
${CMAKE_BINARY_DIR}/packages/joshua_test
|
||||
${out_dir}/joshua_test
|
||||
COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_SOURCE_DIR}/contrib/Joshua/scripts/valgrindTimeout.sh
|
||||
${CMAKE_BINARY_DIR}/packages/joshua_timeout
|
||||
COMMAND ${CMAKE_COMMAND} -E tar cfz ${tar_file}
|
||||
${CMAKE_BINARY_DIR}/packages/bin/fdbserver
|
||||
${CMAKE_BINARY_DIR}/packages/bin/TestHarness.exe
|
||||
${CMAKE_BINARY_DIR}/packages/bin/TraceLogHelper.dll
|
||||
${CMAKE_BINARY_DIR}/packages/joshua_test
|
||||
${CMAKE_BINARY_DIR}/packages/joshua_timeout
|
||||
${out_files}
|
||||
${external_files}
|
||||
COMMAND ${CMAKE_COMMAND} -E remove ${CMAKE_BINARY_DIR}/packages/joshua_test ${CMAKE_BINARY_DIR}/packages/joshua_timeout
|
||||
WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/packages
|
||||
COMMENT "Package correctness archive"
|
||||
${out_dir}/joshua_timeout
|
||||
COMMAND ${CMAKE_COMMAND} -E tar cfz ${tar_file} *
|
||||
WORKING_DIRECTORY ${out_dir}
|
||||
COMMENT "Package valgrind correctness archive"
|
||||
)
|
||||
add_custom_target(package_valgrind_tests ALL DEPENDS ${tar_file})
|
||||
add_dependencies(package_valgrind_tests strip_only_fdbserver TestHarness)
|
||||
|
@ -262,7 +284,8 @@ function(package_bindingtester)
|
|||
set(outfiles ${bdir}/fdbcli ${bdir}/fdbserver ${bdir}/${fdbcName} ${bdir}/joshua_test ${bdir}/joshua_timeout)
|
||||
add_custom_command(
|
||||
OUTPUT ${outfiles}
|
||||
COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_BINARY_DIR}/packages/bin/fdbcli
|
||||
COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_BINARY_DIR}/CMakeCache.txt
|
||||
${CMAKE_BINARY_DIR}/packages/bin/fdbcli
|
||||
${CMAKE_BINARY_DIR}/packages/bin/fdbserver
|
||||
${CMAKE_BINARY_DIR}/packages/lib/${fdbcName}
|
||||
${bdir}
|
||||
|
@ -270,7 +293,7 @@ function(package_bindingtester)
|
|||
COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_SOURCE_DIR}/contrib/Joshua/scripts/bindingTimeout.sh ${bdir}/joshua_timeout
|
||||
COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_SOURCE_DIR}/contrib/Joshua/scripts/localClusterStart.sh ${bdir}/localClusterStart.sh
|
||||
COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_SOURCE_DIR}/contrib/Joshua/scripts/bindingTestScript.sh ${bdir}/bindingTestScript.sh
|
||||
COMMENT "Copy executes to bindingtester dir")
|
||||
COMMENT "Copy executables and scripts to bindingtester dir")
|
||||
file(GLOB_RECURSE test_files ${CMAKE_SOURCE_DIR}/bindings/*)
|
||||
add_custom_command(
|
||||
OUTPUT "${CMAKE_BINARY_DIR}/bindingtester.touch"
|
||||
|
|
|
@ -209,6 +209,25 @@ else()
|
|||
# -mavx
|
||||
# -msse4.2)
|
||||
|
||||
# Tentatively re-enabling vector instructions
|
||||
set(USE_AVX512F OFF CACHE BOOL "Enable AVX 512F instructions")
|
||||
if (USE_AVX512F)
|
||||
add_compile_options(-mavx512f)
|
||||
endif()
|
||||
set(USE_AVX ON CACHE BOOL "Enable AVX instructions")
|
||||
if (USE_AVX)
|
||||
add_compile_options(-mavx)
|
||||
endif()
|
||||
|
||||
# Intentionally using builtin memcpy. G++ does a good job on small memcpy's when the size is known at runtime.
|
||||
# If the size is not known, then it falls back on the memcpy that's available at runtime (rte_memcpy, as of this
|
||||
# writing; see flow.cpp).
|
||||
#
|
||||
# The downside of the builtin memcpy is that it's slower at large copies, so if we spend a lot of time on large
|
||||
# copies of sizes that are known at compile time, this might not be a win. See the output of performance/memcpy
|
||||
# for more information.
|
||||
#add_compile_options(-fno-builtin-memcpy)
|
||||
|
||||
if (USE_VALGRIND)
|
||||
add_compile_options(-DVALGRIND -DUSE_VALGRIND)
|
||||
endif()
|
||||
|
@ -254,7 +273,6 @@ else()
|
|||
endif()
|
||||
if (GCC)
|
||||
add_compile_options(-Wno-pragmas)
|
||||
|
||||
# Otherwise `state [[maybe_unused]] int x;` will issue a warning.
|
||||
# https://stackoverflow.com/questions/50646334/maybe-unused-on-member-variable-gcc-warns-incorrectly-that-attribute-is
|
||||
add_compile_options(-Wno-attributes)
|
||||
|
@ -268,6 +286,7 @@ else()
|
|||
-fvisibility=hidden
|
||||
-Wreturn-type
|
||||
-fPIC)
|
||||
add_compile_options($<$<COMPILE_LANGUAGE:CXX>:-Wclass-memaccess>)
|
||||
if (GPERFTOOLS_FOUND AND GCC)
|
||||
add_compile_options(
|
||||
-fno-builtin-malloc
|
||||
|
|
|
@ -493,7 +493,7 @@ If a process has had more than 10 TCP segments retransmitted in the last 5 secon
|
|||
10.0.4.1:4500 ( 3% cpu; 2% machine; 0.004 Gbps; 0% disk; REXMIT! 2.5 GB / 4.1 GB RAM )
|
||||
|
||||
Machine-readable status
|
||||
--------------------------------
|
||||
-----------------------
|
||||
|
||||
The status command can provide a complete summary of statistics about the cluster and the database with the ``json`` argument. Full documentation for ``status json`` output can be found :doc:`here <mr-status>`.
|
||||
From the output of ``status json``, operators can find useful health metrics to determine whether or not their cluster is hitting performance limits.
|
||||
|
@ -505,6 +505,72 @@ Durable version lag ``cluster.qos.worst_durability_lag_storage_server`` cont
|
|||
Transaction log queue ``cluster.qos.worst_queue_bytes_log_server`` contains the maximum size in bytes of the mutations stored on a transaction log that have not yet been popped by storage servers. A large transaction log queue size can potentially cause the ratekeeper to increase throttling.
|
||||
====================== ==============================================================================================================
|
||||
|
||||
Server-side latency band tracking
|
||||
---------------------------------
|
||||
|
||||
As part of the status document, ``status json`` provides some sampled latency metrics obtained by running probe transactions internally. While this can often be useful, it does not necessarily reflect the distribution of latencies for requests originated by clients.
|
||||
|
||||
FoundationDB additionally provides optional functionality to measure the latencies of all incoming get read version (GRV), read, and commit requests and report some basic details about those requests. The latencies are measured from the time the server receives the request to the point when it replies, and will therefore not include time spent in transit between the client and server or delays in the client process itself.
|
||||
|
||||
The latency band tracking works by configuring various latency thresholds and counting the number of requests that occur in each band (i.e. between two consecutive thresholds). For example, if you wanted to define a service-level objective (SLO) for your cluster where 99.9% of read requests were answered within N seconds, you could set a read latency threshold at N. You could then count the number of requests below and above the threshold and determine whether the required percentage of requests are answered sufficiently quickly.
|
||||
|
||||
Configuration of server-side latency bands is performed by setting the ``\xff\x02/latencyBandConfig`` key to a string encoding the following JSON document::
|
||||
|
||||
{
|
||||
"get_read_version" : {
|
||||
"bands" : [ 0.01, 0.1]
|
||||
},
|
||||
"read" : {
|
||||
"bands" : [ 0.01, 0.1],
|
||||
"max_key_selector_offset" : 1000,
|
||||
"max_read_bytes" : 1000000
|
||||
},
|
||||
"commit" : {
|
||||
"bands" : [ 0.01, 0.1],
|
||||
"max_commit_bytes" : 1000000
|
||||
}
|
||||
}
|
||||
|
||||
Every field in this configuration is optional, and any missing fields will be left unset (i.e. no bands will be tracked or limits will not apply). The configuration takes the following arguments:
|
||||
|
||||
* ``bands`` - a list of thresholds (in seconds) to be measured for the given request type (``get_read_version``, ``read``, or ``commit``)
|
||||
* ``max_key_selector_offset`` - an integer specifying the maximum key selector offset a read request can have and still be counted
|
||||
* ``max_read_bytes`` - an integer specifying the maximum size in bytes of a read response that will be counted
|
||||
* ``max_commit_bytes`` - an integer specifying the maximum size in bytes of a commit request that will be counted
|
||||
|
||||
Setting this configuration key to a value that changes the configuration will result in the cluster controller server process logging a ``LatencyBandConfigChanged`` event. This event will indicate whether a configuration is present or not using its ``Present`` field. Specifying an invalid configuration will result in the latency band feature being unconfigured, and the server process running the cluster controller will log a ``InvalidLatencyBandConfiguration`` trace event.
|
||||
|
||||
.. note:: GRV requests are counted only at default and immediate priority. Batch priority GRV requests are ignored for the purposes of latency band tracking.
|
||||
|
||||
When configured, the ``status json`` output will include additional fields to report the number of requests in each latency band located at ``cluster.processes.<ID>.roles[N].*_latency_bands``::
|
||||
|
||||
"grv_latency_bands" : {
|
||||
0.01: 10,
|
||||
0.1: 0,
|
||||
inf: 1,
|
||||
filtered: 0
|
||||
},
|
||||
"read_latency_bands" : {
|
||||
0.01: 12,
|
||||
0.1: 1,
|
||||
inf: 0,
|
||||
filtered: 0
|
||||
},
|
||||
"commit_latency_bands" : {
|
||||
0.01: 5,
|
||||
0.1: 5,
|
||||
inf: 2,
|
||||
filtered: 1
|
||||
}
|
||||
|
||||
The ``grv_latency_bands`` and ``commit_latency_bands`` objects will only be logged for ``proxy`` roles, and ``read_latency_bands`` will only be logged for storage roles. Each threshold is represented as a key in the map, and its associated value will be the total number of requests in the lifetime of the process with a latency smaller than the threshold but larger than the next smaller threshold.
|
||||
|
||||
For example, ``0.1: 1`` in ``read_latency_bands`` indicates that there has been 1 read request with a latency in the range ``[0.01, 0.1)``. For the smallest specified threshold, the lower bound is 0 (e.g. ``[0, 0.01)`` in the example above). Requests that took longer than any defined latency band will be reported in the ``inf`` (infinity) band. Requests that were filtered by the configuration (e.g. using ``max_read_bytes``) are reported in the ``filtered`` category.
|
||||
|
||||
Because each threshold reports latencies strictly in the range between the next lower threshold and itself, it may be necessary to sum up the counts for multiple bands to determine the total number of requests below a certain threshold.
|
||||
|
||||
.. note:: No history of request counts is recorded for processes that ran in the past. This includes the history prior to restart for a process that has been restarted, for which the counts get reset to 0. For this reason, it is recommended that you collect this information periodically if you need to be able to track requests from such processes.
|
||||
|
||||
.. _administration_fdbmonitor:
|
||||
|
||||
``fdbmonitor`` and ``fdbserver``
|
||||
|
|
|
@ -10,38 +10,38 @@ macOS
|
|||
|
||||
The macOS installation package is supported on macOS 10.7+. It includes the client and (optionally) the server.
|
||||
|
||||
* `FoundationDB-6.3.0.pkg <https://www.foundationdb.org/downloads/6.3.0/macOS/installers/FoundationDB-6.3.0.pkg>`_
|
||||
* `FoundationDB-6.3.1.pkg <https://www.foundationdb.org/downloads/6.3.1/macOS/installers/FoundationDB-6.3.1.pkg>`_
|
||||
|
||||
Ubuntu
|
||||
------
|
||||
|
||||
The Ubuntu packages are supported on 64-bit Ubuntu 12.04+, but beware of the Linux kernel bug in Ubuntu 12.x.
|
||||
|
||||
* `foundationdb-clients-6.3.0-1_amd64.deb <https://www.foundationdb.org/downloads/6.3.0/ubuntu/installers/foundationdb-clients_6.3.0-1_amd64.deb>`_
|
||||
* `foundationdb-server-6.3.0-1_amd64.deb <https://www.foundationdb.org/downloads/6.3.0/ubuntu/installers/foundationdb-server_6.3.0-1_amd64.deb>`_ (depends on the clients package)
|
||||
* `foundationdb-clients-6.3.1-1_amd64.deb <https://www.foundationdb.org/downloads/6.3.1/ubuntu/installers/foundationdb-clients_6.3.1-1_amd64.deb>`_
|
||||
* `foundationdb-server-6.3.1-1_amd64.deb <https://www.foundationdb.org/downloads/6.3.1/ubuntu/installers/foundationdb-server_6.3.1-1_amd64.deb>`_ (depends on the clients package)
|
||||
|
||||
RHEL/CentOS EL6
|
||||
---------------
|
||||
|
||||
The RHEL/CentOS EL6 packages are supported on 64-bit RHEL/CentOS 6.x.
|
||||
|
||||
* `foundationdb-clients-6.3.0-1.el6.x86_64.rpm <https://www.foundationdb.org/downloads/6.3.0/rhel6/installers/foundationdb-clients-6.3.0-1.el6.x86_64.rpm>`_
|
||||
* `foundationdb-server-6.3.0-1.el6.x86_64.rpm <https://www.foundationdb.org/downloads/6.3.0/rhel6/installers/foundationdb-server-6.3.0-1.el6.x86_64.rpm>`_ (depends on the clients package)
|
||||
* `foundationdb-clients-6.3.1-1.el6.x86_64.rpm <https://www.foundationdb.org/downloads/6.3.1/rhel6/installers/foundationdb-clients-6.3.1-1.el6.x86_64.rpm>`_
|
||||
* `foundationdb-server-6.3.1-1.el6.x86_64.rpm <https://www.foundationdb.org/downloads/6.3.1/rhel6/installers/foundationdb-server-6.3.1-1.el6.x86_64.rpm>`_ (depends on the clients package)
|
||||
|
||||
RHEL/CentOS EL7
|
||||
---------------
|
||||
|
||||
The RHEL/CentOS EL7 packages are supported on 64-bit RHEL/CentOS 7.x.
|
||||
|
||||
* `foundationdb-clients-6.3.0-1.el7.x86_64.rpm <https://www.foundationdb.org/downloads/6.3.0/rhel7/installers/foundationdb-clients-6.3.0-1.el7.x86_64.rpm>`_
|
||||
* `foundationdb-server-6.3.0-1.el7.x86_64.rpm <https://www.foundationdb.org/downloads/6.3.0/rhel7/installers/foundationdb-server-6.3.0-1.el7.x86_64.rpm>`_ (depends on the clients package)
|
||||
* `foundationdb-clients-6.3.1-1.el7.x86_64.rpm <https://www.foundationdb.org/downloads/6.3.1/rhel7/installers/foundationdb-clients-6.3.1-1.el7.x86_64.rpm>`_
|
||||
* `foundationdb-server-6.3.1-1.el7.x86_64.rpm <https://www.foundationdb.org/downloads/6.3.1/rhel7/installers/foundationdb-server-6.3.1-1.el7.x86_64.rpm>`_ (depends on the clients package)
|
||||
|
||||
Windows
|
||||
-------
|
||||
|
||||
The Windows installer is supported on 64-bit Windows XP and later. It includes the client and (optionally) the server.
|
||||
|
||||
* `foundationdb-6.3.0-x64.msi <https://www.foundationdb.org/downloads/6.3.0/windows/installers/foundationdb-6.3.0-x64.msi>`_
|
||||
* `foundationdb-6.3.1-x64.msi <https://www.foundationdb.org/downloads/6.3.1/windows/installers/foundationdb-6.3.1-x64.msi>`_
|
||||
|
||||
API Language Bindings
|
||||
=====================
|
||||
|
@ -58,18 +58,18 @@ On macOS and Windows, the FoundationDB Python API bindings are installed as part
|
|||
|
||||
If you need to use the FoundationDB Python API from other Python installations or paths, use the Python package manager ``pip`` (``pip install foundationdb``) or download the Python package:
|
||||
|
||||
* `foundationdb-6.3.0.tar.gz <https://www.foundationdb.org/downloads/6.3.0/bindings/python/foundationdb-6.3.0.tar.gz>`_
|
||||
* `foundationdb-6.3.1.tar.gz <https://www.foundationdb.org/downloads/6.3.1/bindings/python/foundationdb-6.3.1.tar.gz>`_
|
||||
|
||||
Ruby 1.9.3/2.0.0+
|
||||
-----------------
|
||||
|
||||
* `fdb-6.3.0.gem <https://www.foundationdb.org/downloads/6.3.0/bindings/ruby/fdb-6.3.0.gem>`_
|
||||
* `fdb-6.3.1.gem <https://www.foundationdb.org/downloads/6.3.1/bindings/ruby/fdb-6.3.1.gem>`_
|
||||
|
||||
Java 8+
|
||||
-------
|
||||
|
||||
* `fdb-java-6.3.0.jar <https://www.foundationdb.org/downloads/6.3.0/bindings/java/fdb-java-6.3.0.jar>`_
|
||||
* `fdb-java-6.3.0-javadoc.jar <https://www.foundationdb.org/downloads/6.3.0/bindings/java/fdb-java-6.3.0-javadoc.jar>`_
|
||||
* `fdb-java-6.3.1.jar <https://www.foundationdb.org/downloads/6.3.1/bindings/java/fdb-java-6.3.1.jar>`_
|
||||
* `fdb-java-6.3.1-javadoc.jar <https://www.foundationdb.org/downloads/6.3.1/bindings/java/fdb-java-6.3.1-javadoc.jar>`_
|
||||
|
||||
Go 1.11+
|
||||
--------
|
||||
|
|
|
@ -2,6 +2,15 @@
|
|||
Release Notes
|
||||
#############
|
||||
|
||||
6.2.22
|
||||
======
|
||||
|
||||
Fixes
|
||||
-----
|
||||
|
||||
* Coordinator class processes could be recruited as the cluster controller. `(PR #3282) <https://github.com/apple/foundationdb/pull/3282>`_
|
||||
* HTTPS requests made by backup would fail (introduced in 6.2.21). `(PR #3284) <https://github.com/apple/foundationdb/pull/3284>`_
|
||||
|
||||
6.2.21
|
||||
======
|
||||
|
||||
|
|
|
@ -0,0 +1,123 @@
|
|||
#############
|
||||
Release Notes
|
||||
#############
|
||||
|
||||
6.3.1
|
||||
=====
|
||||
|
||||
Features
|
||||
--------
|
||||
|
||||
* Added the ability to set arbitrary tags on transactions. Tags can be specifically throttled using ``fdbcli``, and certain types of tags can be automatically throttled by ratekeeper. `(PR #2942) <https://github.com/apple/foundationdb/pull/2942>`_
|
||||
* Add an option for transactions to report conflicting keys by calling ``getRange`` with the special key prefix ``\xff\xff/transaction/conflicting_keys/``. `(PR 2257) <https://github.com/apple/foundationdb/pull/2257>`_
|
||||
* Added the ``exclude failed`` command to ``fdbcli``. This command designates that a process is dead and will never come back, so the transaction logs can forget about mutations sent to that process. `(PR #1955) <https://github.com/apple/foundationdb/pull/1955>`_
|
||||
* A new fast restore system that can restore a database to a point in time from backup files. It is a Spark-like parallel processing framework that processes backup data asynchronously, in parallel and in pipeline. `(Fast Restore Project) <https://github.com/apple/foundationdb/projects/7>`_
|
||||
* Added backup workers for pulling mutations from transaction logs and uploading them to blob storage. Switching from the previous backup implementation will double a cluster's maximum write bandwidth. `(PR #1625) <https://github.com/apple/foundationdb/pull/1625>`_ `(PR #2588) <https://github.com/apple/foundationdb/pull/2588>`_ `(PR #2642) <https://github.com/apple/foundationdb/pull/2642>`_
|
||||
* Added a new API in all bindings that can be used to query the estimated byte size of a given range. `(PR #2537) <https://github.com/apple/foundationdb/pull/2537>`_
|
||||
* Added the ``lock`` and ``unlock`` commands to ``fdbcli`` which lock or unlock a cluster. `(PR #2890) <https://github.com/apple/foundationdb/pull/2890>`_
|
||||
* Add a framework which helps to add client functions using special keys (keys within ``[\xff\xff, \xff\xff\xff)``). `(PR #2662) <https://github.com/apple/foundationdb/pull/2662>`_
|
||||
|
||||
Performance
|
||||
-----------
|
||||
|
||||
* Improved the client's load balancing algorithm so that each proxy processes an equal number of requests. `(PR #2520) <https://github.com/apple/foundationdb/pull/2520>`_
|
||||
* Significantly reduced the amount of work done on the cluster controller by removing the centralized failure monitoring. `(PR #2518) <https://github.com/apple/foundationdb/pull/2518>`_
|
||||
* Improved master recovery speeds by more efficiently broadcasting the recovery state between processes. `(PR #2941) <https://github.com/apple/foundationdb/pull/2941>`_
|
||||
* Significantly reduced the number of network connections opened to the coordinators. `(PR #3069) <https://github.com/apple/foundationdb/pull/3069>`_
|
||||
* Improve GRV tail latencies, particularly as the transaction rate gets nearer the ratekeeper limit. `(PR #2735) <https://github.com/apple/foundationdb/pull/2735>`_
|
||||
* The proxies are now more responsive to changes in workload when unthrottling lower priority transactions. `(PR #2735) <https://github.com/apple/foundationdb/pull/2735>`_
|
||||
* Removed a lot of unnecessary copying across the codebase. `(PR #2986) <https://github.com/apple/foundationdb/pull/2986>`_ `(PR #2915) <https://github.com/apple/foundationdb/pull/2915>`_ `(PR #3024) <https://github.com/apple/foundationdb/pull/3024>`_ `(PR #2999) <https://github.com/apple/foundationdb/pull/2999>`_
|
||||
* Optimized the performance of the storage server. `(PR #1988) <https://github.com/apple/foundationdb/pull/1988>`_ `(PR #3103) <https://github.com/apple/foundationdb/pull/3103>`_
|
||||
* Optimized the performance of the resolver. `(PR #2648) <https://github.com/apple/foundationdb/pull/2648>`_
|
||||
* Replaced most uses of hashlittle2 with crc32 for better performance. `(PR #2538) <https://github.com/apple/foundationdb/pull/2538>`_
|
||||
* Significantly reduced the serialized size of conflict ranges and single key clears. `(PR #2513) <https://github.com/apple/foundationdb/pull/2513>`_
|
||||
* Improved range read performance when the reads overlap recently cleared key ranges. `(PR #2028) <https://github.com/apple/foundationdb/pull/2028>`_
|
||||
* Reduced the number of comparisons used by various map implementations. `(PR #2882) <https://github.com/apple/foundationdb/pull/2882>`_
|
||||
* Reduced the serialized size of empty strings. `(PR #3063) <https://github.com/apple/foundationdb/pull/3063>`_
|
||||
* Reduced the serialized size of various interfaces by 10x. `(PR #3068) <https://github.com/apple/foundationdb/pull/3068>`_
|
||||
|
||||
Reliability
|
||||
-----------
|
||||
|
||||
* Connections that disconnect frequently are not immediately marked available. `(PR #2932) <https://github.com/apple/foundationdb/pull/2932>`_
|
||||
* The data distributor will consider storage servers that are continually lagging behind as if they were failed. `(PR #2917) <https://github.com/apple/foundationdb/pull/2917>`_
|
||||
* Changing the storage engine type of a cluster will no longer cause the cluster to run out of memory. Instead, the cluster will gracefully migrate storage server processes to the new storage engine one by one. `(PR #1985) <https://github.com/apple/foundationdb/pull/1985>`_
|
||||
* Batch priority transactions which are being throttled by ratekeeper will get a ``batch_transaction_throttled`` error instead of hanging indefinitely. `(PR #1868) <https://github.com/apple/foundationdb/pull/1868>`_
|
||||
* Avoid using too much memory on the transaction logs when multiple types of transaction logs exist in the same process. `(PR #2213) <https://github.com/apple/foundationdb/pull/2213>`_
|
||||
|
||||
Fixes
|
||||
-----
|
||||
|
||||
* The ``SetVersionstampedKey`` atomic operation no longer conflicts with versions smaller than the current read version of the transaction. `(PR #2557) <https://github.com/apple/foundationdb/pull/2557>`_
|
||||
* Ratekeeper would measure durability lag a few seconds higher than reality. `(PR #2499) <https://github.com/apple/foundationdb/pull/2499>`_
|
||||
* In very rare scenarios, the data distributor process could get stuck in an infinite loop. `(PR #2228) <https://github.com/apple/foundationdb/pull/2228>`_
|
||||
* If the number of configured transaction logs were reduced at the exact same time a change to the system keyspace took place, it was possible for the transaction state store to become corrupted. `(PR #3051) <https://github.com/apple/foundationdb/pull/3051>`_
|
||||
* Fix multiple data races between threads on the client. `(PR #3026) <https://github.com/apple/foundationdb/pull/3026>`_
|
||||
* Transaction logs configured to spill by reference had an unintended delay between each spilled batch. `(PR #3153) <https://github.com/apple/foundationdb/pull/3153>`_
|
||||
* Added guards to honor ``DISABLE_POSIX_KERNEL_AIO``. `(PR #2888) <https://github.com/apple/foundationdb/pull/2888>`_
|
||||
|
||||
Status
|
||||
------
|
||||
|
||||
* A process's ``memory.available_bytes`` can no longer exceed the memory limit of the process. For purposes of this statistic, processes on the same machine will be allocated memory proportionally based on the size of their memory limits. `(PR #3174) <https://github.com/apple/foundationdb/pull/3174>`_
|
||||
* Replaced ``cluster.database_locked`` status field with ``cluster.database_lock_state``, which contains two subfields: ``locked`` (boolean) and ``lock_uid`` (which contains the database lock uid if the database is locked). `(PR #2058) <https://github.com/apple/foundationdb/pull/2058>`_
|
||||
* Removed fields ``worst_version_lag_storage_server`` and ``limiting_version_lag_storage_server`` from the ``cluster.qos`` section. The ``worst_data_lag_storage_server`` and ``limiting_data_lag_storage_server`` objects can be used instead. `(PR #3196) <https://github.com/apple/foundationdb/pull/3196>`_
|
||||
* If a process is unable to flush trace logs to disk, the problem will now be reported via the output of ``status`` command inside ``fdbcli``. `(PR #2605) <https://github.com/apple/foundationdb/pull/2605>`_ `(PR #2820) <https://github.com/apple/foundationdb/pull/2820>`_
|
||||
|
||||
Bindings
|
||||
--------
|
||||
|
||||
* API version updated to 630. See the :ref:`API version upgrade guide <api-version-upgrade-guide-630>` for upgrade details.
|
||||
* Python: The ``@fdb.transactional`` decorator will now throw an error if the decorated function returns a generator. `(PR #1724) <https://github.com/apple/foundationdb/pull/1724>`_
|
||||
* Java: Add caching for various JNI objects to improve performance. `(PR #2809) <https://github.com/apple/foundationdb/pull/2809>`_
|
||||
* Java: Optimize byte array comparisons in ``ByteArrayUtil``. `(PR #2823) <https://github.com/apple/foundationdb/pull/2823>`_
|
||||
* Java: Add ``FDB.disableShutdownHook`` that can be used to prevent the default shutdown hook from running. Users of this new function should make sure to call ``stopNetwork`` before terminating a client process. `(PR #2635) <https://github.com/apple/foundationdb/pull/2635>`_
|
||||
* Java: Introduced ``keyAfter`` utility function that can be used to create the immediate next key for a given byte array. `(PR #2458) <https://github.com/apple/foundationdb/pull/2458>`_
|
||||
* Golang: The ``Transact`` function will unwrap errors that have been wrapped using ``xerrors`` to determine if a retryable FoundationDB error is in the error chain. `(PR #3131) <https://github.com/apple/foundationdb/pull/3131>`_
|
||||
* Golang: Added ``Subspace.PackWithVersionstamp`` that can be used to pack a ``Tuple`` that contains a versionstamp. `(PR #2243) <https://github.com/apple/foundationdb/pull/2243>`_
|
||||
* Golang: Implement ``Stringer`` interface for ``Tuple``, ``Subspace``, ``UUID``, and ``Versionstamp``. `(PR #3032) <https://github.com/apple/foundationdb/pull/3032>`_
|
||||
* C: The ``FDBKeyValue`` struct's ``key`` and ``value`` members have changed type from ``void*`` to ``uint8_t*``. `(PR #2622) <https://github.com/apple/foundationdb/pull/2622>`_
|
||||
* Deprecated ``enable_slow_task_profiling`` network option and replaced it with ``enable_run_loop_profiling``. `(PR #2608) <https://github.com/apple/foundationdb/pull/2608>`_
|
||||
|
||||
Other Changes
|
||||
-------------
|
||||
|
||||
* Small key ranges which are being heavily read will be reported in the logs using the trace event ``ReadHotRangeLog``. `(PR #2046) <https://github.com/apple/foundationdb/pull/2046>`_ `(PR #2378) <https://github.com/apple/foundationdb/pull/2378>`_ `(PR #2532) <https://github.com/apple/foundationdb/pull/2532>`_
|
||||
* Added the read version, commit version, and datacenter locality to the client transaction information. `(PR #3079) <https://github.com/apple/foundationdb/pull/3079>`_ `(PR #3205) <https://github.com/apple/foundationdb/pull/3205>`_
|
||||
* Added a network option ``TRACE_FILE_IDENTIFIER`` that can be used to provide a custom identifier string that will be part of the file name for all trace log files created on the client. `(PR #2869) <https://github.com/apple/foundationdb/pull/2869>`_
|
||||
* It is now possible to use the ``TRACE_LOG_GROUP`` option on a client process after the database has been created. `(PR #2862) <https://github.com/apple/foundationdb/pull/2862>`_
|
||||
* Added a network option ``TRACE_CLOCK_SOURCE`` that can be used to switch the trace event timestamps to use a realtime clock source. `(PR #2329) <https://github.com/apple/foundationdb/pull/2329>`_
|
||||
* The ``INCLUDE_PORT_IN_ADDRESS`` transaction option is now on by default. This means ``get_addresses_for_key`` will always return ports in the address strings. `(PR #2639) <https://github.com/apple/foundationdb/pull/2639>`_
|
||||
* Added the ``getversion`` command to ``fdbcli`` which returns the current read version of the cluster. `(PR #2882) <https://github.com/apple/foundationdb/pull/2882>`_
|
||||
* Added the ``advanceversion`` command to ``fdbcli`` which increases the current version of a cluster. `(PR #2965) <https://github.com/apple/foundationdb/pull/2965>`_
|
||||
* Improved the slow task profiler to also report backtraces for periods when the run loop is saturated. `(PR #2608) <https://github.com/apple/foundationdb/pull/2608>`_
|
||||
* Double the number of shard locations that the client will cache locally. `(PR #2198) <https://github.com/apple/foundationdb/pull/2198>`_
|
||||
* Replaced the ``-add_prefix`` and ``-remove_prefix`` options with ``--add_prefix`` and ``--remove_prefix`` in ``fdbrestore`` `(PR 3206) <https://github.com/apple/foundationdb/pull/3206>`_
|
||||
* Data distribution metrics can now be read using the special keyspace ``\xff\xff/metrics/data_distribution_stats``. `(PR #2547) <https://github.com/apple/foundationdb/pull/2547>`_
|
||||
* The ``\xff\xff/worker_interfaces/`` keyspace now begins at a key which includes a trailing ``/`` (previously ``\xff\xff/worker_interfaces``). Range reads to this range now respect the end key passed into the range and include the keyspace prefix in the resulting keys. `(PR #3095) <https://github.com/apple/foundationdb/pull/3095>`_
|
||||
* Added FreeBSD support. `(PR #2634) <https://github.com/apple/foundationdb/pull/2634>`_
|
||||
* Updated boost to 1.72. `(PR #2684) <https://github.com/apple/foundationdb/pull/2684>`_
|
||||
|
||||
Earlier release notes
|
||||
---------------------
|
||||
* :doc:`6.2 (API Version 620) </old-release-notes/release-notes-620>`
|
||||
* :doc:`6.1 (API Version 610) </old-release-notes/release-notes-610>`
|
||||
* :doc:`6.0 (API Version 600) </old-release-notes/release-notes-600>`
|
||||
* :doc:`5.2 (API Version 520) </old-release-notes/release-notes-520>`
|
||||
* :doc:`5.1 (API Version 510) </old-release-notes/release-notes-510>`
|
||||
* :doc:`5.0 (API Version 500) </old-release-notes/release-notes-500>`
|
||||
* :doc:`4.6 (API Version 460) </old-release-notes/release-notes-460>`
|
||||
* :doc:`4.5 (API Version 450) </old-release-notes/release-notes-450>`
|
||||
* :doc:`4.4 (API Version 440) </old-release-notes/release-notes-440>`
|
||||
* :doc:`4.3 (API Version 430) </old-release-notes/release-notes-430>`
|
||||
* :doc:`4.2 (API Version 420) </old-release-notes/release-notes-420>`
|
||||
* :doc:`4.1 (API Version 410) </old-release-notes/release-notes-410>`
|
||||
* :doc:`4.0 (API Version 400) </old-release-notes/release-notes-400>`
|
||||
* :doc:`3.0 (API Version 300) </old-release-notes/release-notes-300>`
|
||||
* :doc:`2.0 (API Version 200) </old-release-notes/release-notes-200>`
|
||||
* :doc:`1.0 (API Version 100) </old-release-notes/release-notes-100>`
|
||||
* :doc:`Beta 3 (API Version 23) </old-release-notes/release-notes-023>`
|
||||
* :doc:`Beta 2 (API Version 22) </old-release-notes/release-notes-022>`
|
||||
* :doc:`Beta 1 (API Version 21) </old-release-notes/release-notes-021>`
|
||||
* :doc:`Alpha 6 (API Version 16) </old-release-notes/release-notes-016>`
|
||||
* :doc:`Alpha 5 (API Version 14) </old-release-notes/release-notes-014>`
|
|
@ -2,7 +2,7 @@
|
|||
Release Notes
|
||||
#############
|
||||
|
||||
6.3.0
|
||||
6.3.2
|
||||
=====
|
||||
|
||||
Features
|
||||
|
@ -98,8 +98,14 @@ Other Changes
|
|||
* Added FreeBSD support. `(PR #2634) <https://github.com/apple/foundationdb/pull/2634>`_
|
||||
* Updated boost to 1.72. `(PR #2684) <https://github.com/apple/foundationdb/pull/2684>`_
|
||||
|
||||
Fixes only impacting 6.3.0+
|
||||
---------------------------
|
||||
|
||||
* Renamed ``MIN_DELAY_STORAGE_CANDIDACY_SECONDS`` knob to ``MIN_DELAY_CC_WORST_FIT_CANDIDACY_SECONDS``. [6.3.2] `(PR #3327) <https://github.com/apple/foundationdb/pull/3327>`_
|
||||
|
||||
Earlier release notes
|
||||
---------------------
|
||||
* :doc:`6.3 (API Version 630) </old-release-notes/release-notes-630>`
|
||||
* :doc:`6.2 (API Version 620) </old-release-notes/release-notes-620>`
|
||||
* :doc:`6.1 (API Version 610) </old-release-notes/release-notes-610>`
|
||||
* :doc:`6.0 (API Version 600) </old-release-notes/release-notes-600>`
|
||||
|
|
|
@ -955,7 +955,7 @@ static void printBackupUsage(bool devhelp) {
|
|||
printf(" -e ERRORLIMIT The maximum number of errors printed by status (default is 10).\n");
|
||||
printf(" -k KEYS List of key ranges to backup.\n"
|
||||
" If not specified, the entire database will be backed up.\n");
|
||||
printf(" -p, --partitioned_log Starts with new type of backup system using partitioned logs.\n");
|
||||
printf(" --partitioned_log_experimental Starts with new type of backup system using partitioned logs.\n");
|
||||
printf(" -n, --dryrun For backup start or restore start, performs a trial run with no actual changes made.\n");
|
||||
printf(" --log Enables trace file logging for the CLI session.\n"
|
||||
" --logdir PATH Specifes the output directory for trace files. If\n"
|
||||
|
|
|
@ -247,14 +247,11 @@ class FileBackupAgent : public BackupAgentBase {
|
|||
public:
|
||||
FileBackupAgent();
|
||||
|
||||
FileBackupAgent( FileBackupAgent&& r ) BOOST_NOEXCEPT :
|
||||
subspace( std::move(r.subspace) ),
|
||||
config( std::move(r.config) ),
|
||||
lastRestorable( std::move(r.lastRestorable) ),
|
||||
taskBucket( std::move(r.taskBucket) ),
|
||||
futureBucket( std::move(r.futureBucket) ) {}
|
||||
FileBackupAgent(FileBackupAgent&& r) noexcept
|
||||
: subspace(std::move(r.subspace)), config(std::move(r.config)), lastRestorable(std::move(r.lastRestorable)),
|
||||
taskBucket(std::move(r.taskBucket)), futureBucket(std::move(r.futureBucket)) {}
|
||||
|
||||
void operator=( FileBackupAgent&& r ) BOOST_NOEXCEPT {
|
||||
void operator=(FileBackupAgent&& r) noexcept {
|
||||
subspace = std::move(r.subspace);
|
||||
config = std::move(r.config);
|
||||
lastRestorable = std::move(r.lastRestorable),
|
||||
|
@ -381,19 +378,13 @@ public:
|
|||
DatabaseBackupAgent();
|
||||
explicit DatabaseBackupAgent(Database src);
|
||||
|
||||
DatabaseBackupAgent( DatabaseBackupAgent&& r ) BOOST_NOEXCEPT :
|
||||
subspace( std::move(r.subspace) ),
|
||||
states( std::move(r.states) ),
|
||||
config( std::move(r.config) ),
|
||||
errors( std::move(r.errors) ),
|
||||
ranges( std::move(r.ranges) ),
|
||||
tagNames( std::move(r.tagNames) ),
|
||||
taskBucket( std::move(r.taskBucket) ),
|
||||
futureBucket( std::move(r.futureBucket) ),
|
||||
sourceStates( std::move(r.sourceStates) ),
|
||||
sourceTagNames( std::move(r.sourceTagNames) ) {}
|
||||
DatabaseBackupAgent(DatabaseBackupAgent&& r) noexcept
|
||||
: subspace(std::move(r.subspace)), states(std::move(r.states)), config(std::move(r.config)),
|
||||
errors(std::move(r.errors)), ranges(std::move(r.ranges)), tagNames(std::move(r.tagNames)),
|
||||
taskBucket(std::move(r.taskBucket)), futureBucket(std::move(r.futureBucket)),
|
||||
sourceStates(std::move(r.sourceStates)), sourceTagNames(std::move(r.sourceTagNames)) {}
|
||||
|
||||
void operator=( DatabaseBackupAgent&& r ) BOOST_NOEXCEPT {
|
||||
void operator=(DatabaseBackupAgent&& r) noexcept {
|
||||
subspace = std::move(r.subspace);
|
||||
states = std::move(r.states);
|
||||
config = std::move(r.config);
|
||||
|
@ -883,7 +874,7 @@ public:
|
|||
}
|
||||
TraceEvent t(SevWarn, "FileBackupError");
|
||||
t.error(e).detail("BackupUID", uid).detail("Description", details).detail("TaskInstance", (uint64_t)taskInstance);
|
||||
// These should not happen
|
||||
// key_not_found could happen
|
||||
if(e.code() == error_code_key_not_found)
|
||||
t.backtrace();
|
||||
|
||||
|
|
|
@ -20,6 +20,11 @@
|
|||
|
||||
#ifndef DatabaseContext_h
|
||||
#define DatabaseContext_h
|
||||
#include "flow/FastAlloc.h"
|
||||
#include "flow/FastRef.h"
|
||||
#include "fdbclient/StorageServerInterface.h"
|
||||
#include "flow/genericactors.actor.h"
|
||||
#include <vector>
|
||||
#pragma once
|
||||
|
||||
#include "fdbclient/NativeAPI.actor.h"
|
||||
|
@ -44,7 +49,25 @@ private:
|
|||
StorageServerInfo( DatabaseContext *cx, StorageServerInterface const& interf, LocalityData const& locality ) : cx(cx), ReferencedInterface<StorageServerInterface>(interf, locality) {}
|
||||
};
|
||||
|
||||
typedef MultiInterface<ReferencedInterface<StorageServerInterface>> LocationInfo;
|
||||
struct LocationInfo : MultiInterface<ReferencedInterface<StorageServerInterface>>, FastAllocated<LocationInfo> {
|
||||
using Locations = MultiInterface<ReferencedInterface<StorageServerInterface>>;
|
||||
explicit LocationInfo(const std::vector<Reference<ReferencedInterface<StorageServerInterface>>>& v)
|
||||
: Locations(v)
|
||||
{}
|
||||
LocationInfo(const std::vector<Reference<ReferencedInterface<StorageServerInterface>>>& v, bool hasCaches)
|
||||
: Locations(v)
|
||||
, hasCaches(hasCaches)
|
||||
{}
|
||||
LocationInfo(const LocationInfo&) = delete;
|
||||
LocationInfo(LocationInfo&&) = delete;
|
||||
LocationInfo& operator=(const LocationInfo&) = delete;
|
||||
LocationInfo& operator=(LocationInfo&&) = delete;
|
||||
bool hasCaches = false;
|
||||
Reference<Locations> locations() {
|
||||
return Reference<Locations>::addRef(this);
|
||||
}
|
||||
};
|
||||
|
||||
typedef ModelInterface<MasterProxyInterface> ProxyInfo;
|
||||
|
||||
class ClientTagThrottleData : NonCopyable {
|
||||
|
@ -131,7 +154,7 @@ public:
|
|||
|
||||
Database clone() const { return Database(new DatabaseContext( connectionFile, clientInfo, clientInfoMonitor, taskID, clientLocality, enableLocalityLoadBalance, lockAware, internal, apiVersion, switchable )); }
|
||||
|
||||
std::pair<KeyRange,Reference<LocationInfo>> getCachedLocation( const KeyRef&, bool isBackward = false );
|
||||
std::pair<KeyRange, Reference<LocationInfo>> getCachedLocation( const KeyRef&, bool isBackward = false );
|
||||
bool getCachedLocations( const KeyRangeRef&, vector<std::pair<KeyRange,Reference<LocationInfo>>>&, int limit, bool reverse );
|
||||
Reference<LocationInfo> setCachedLocation( const KeyRangeRef&, const vector<struct StorageServerInterface>& );
|
||||
void invalidateCache( const KeyRef&, bool isBackward = false );
|
||||
|
@ -200,11 +223,13 @@ public:
|
|||
bool enableLocalityLoadBalance;
|
||||
|
||||
struct VersionRequest {
|
||||
SpanID spanContext;
|
||||
Promise<GetReadVersionReply> reply;
|
||||
TagSet tags;
|
||||
Optional<UID> debugID;
|
||||
|
||||
VersionRequest(TagSet tags = TagSet(), Optional<UID> debugID = Optional<UID>()) : tags(tags), debugID(debugID) {}
|
||||
VersionRequest(SpanID spanContext, TagSet tags = TagSet(), Optional<UID> debugID = Optional<UID>())
|
||||
: spanContext(spanContext), tags(tags), debugID(debugID) {}
|
||||
};
|
||||
|
||||
// Transaction start request batching
|
||||
|
@ -232,7 +257,7 @@ public:
|
|||
|
||||
// Cache of location information
|
||||
int locationCacheSize;
|
||||
CoalescedKeyRangeMap< Reference<LocationInfo> > locationCache;
|
||||
CoalescedKeyRangeMap<Reference<LocationInfo>> locationCache;
|
||||
|
||||
std::map< UID, StorageServerInfo* > server_interf;
|
||||
|
||||
|
@ -314,7 +339,8 @@ public:
|
|||
double detailedHealthMetricsLastUpdated;
|
||||
|
||||
UniqueOrderedOptionList<FDBTransactionOptions> transactionDefaults;
|
||||
|
||||
Future<Void> cacheListMonitor;
|
||||
AsyncTrigger updateCache;
|
||||
std::vector<std::unique_ptr<SpecialKeyRangeBaseImpl>> specialKeySpaceModules;
|
||||
std::unique_ptr<SpecialKeySpace> specialKeySpace;
|
||||
void registerSpecialKeySpaceModule(SpecialKeySpace::MODULE module, std::unique_ptr<SpecialKeyRangeBaseImpl> impl);
|
||||
|
|
|
@ -26,6 +26,7 @@
|
|||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "flow/Arena.h"
|
||||
#include "flow/flow.h"
|
||||
#include "fdbclient/Knobs.h"
|
||||
|
||||
|
@ -35,6 +36,7 @@ typedef uint64_t Sequence;
|
|||
typedef StringRef KeyRef;
|
||||
typedef StringRef ValueRef;
|
||||
typedef int64_t Generation;
|
||||
typedef UID SpanID;
|
||||
|
||||
enum {
|
||||
tagLocalitySpecial = -1,
|
||||
|
@ -77,6 +79,10 @@ struct Tag {
|
|||
serializer(ar, locality, id);
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct flow_ref<Tag> : std::integral_constant<bool, false> {};
|
||||
|
||||
#pragma pack(pop)
|
||||
|
||||
template <class Ar> void load( Ar& ar, Tag& tag ) { tag.serialize_unversioned(ar); }
|
||||
|
|
|
@ -235,7 +235,7 @@ public:
|
|||
}
|
||||
TraceEvent t(SevWarn, "FileRestoreError");
|
||||
t.error(e).detail("RestoreUID", uid).detail("Description", details).detail("TaskInstance", (uint64_t)taskInstance);
|
||||
// These should not happen
|
||||
// key_not_found could happen
|
||||
if(e.code() == error_code_key_not_found)
|
||||
t.backtrace();
|
||||
|
||||
|
@ -3580,33 +3580,38 @@ public:
|
|||
// Parallel restore
|
||||
ACTOR static Future<Void> parallelRestoreFinish(Database cx, UID randomUID) {
|
||||
state ReadYourWritesTransaction tr(cx);
|
||||
state Future<Void> watchForRestoreRequestDone;
|
||||
state bool restoreDone = false;
|
||||
state Optional<Value> restoreRequestDoneKeyValue;
|
||||
TraceEvent("FastRestoreAgentWaitForRestoreToFinish").detail("DBLock", randomUID);
|
||||
// TODO: register watch first and then check if the key exist
|
||||
loop {
|
||||
try {
|
||||
tr.reset();
|
||||
tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||
tr.setOption(FDBTransactionOptions::LOCK_AWARE);
|
||||
tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
|
||||
Optional<Value> restoreRequestDoneKeyValue = wait(tr.get(restoreRequestDoneKey));
|
||||
Optional<Value> _restoreRequestDoneKeyValue = wait(tr.get(restoreRequestDoneKey));
|
||||
restoreRequestDoneKeyValue = _restoreRequestDoneKeyValue;
|
||||
// Restore may finish before restoreAgent waits on the restore finish event.
|
||||
if (restoreRequestDoneKeyValue.present()) {
|
||||
restoreDone = true; // In case commit clears the key but in unknown_state
|
||||
tr.clear(restoreRequestDoneKey);
|
||||
wait(tr.commit());
|
||||
break;
|
||||
} else if (!restoreDone) {
|
||||
watchForRestoreRequestDone = tr.watch(restoreRequestDoneKey);
|
||||
} else {
|
||||
state Future<Void> watchForRestoreRequestDone = tr.watch(restoreRequestDoneKey);
|
||||
wait(tr.commit());
|
||||
wait(watchForRestoreRequestDone);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
} catch (Error& e) {
|
||||
wait(tr.onError(e));
|
||||
}
|
||||
}
|
||||
TraceEvent("FastRestoreAgentRestoreFinished")
|
||||
.detail("ClearRestoreRequestDoneKey", restoreRequestDoneKeyValue.present());
|
||||
// Only this agent can clear the restoreRequestDoneKey
|
||||
wait(runRYWTransaction(cx, [](Reference<ReadYourWritesTransaction> tr) -> Future<Void> {
|
||||
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||
tr->setOption(FDBTransactionOptions::LOCK_AWARE);
|
||||
tr->clear(restoreRequestDoneKey);
|
||||
return Void();
|
||||
}));
|
||||
|
||||
TraceEvent("FastRestoreAgentRestoreFinished").detail("UnlockDBStart", randomUID);
|
||||
try {
|
||||
wait(unlockDatabase(cx, randomUID));
|
||||
|
@ -3671,18 +3676,18 @@ public:
|
|||
TraceEvent("FastRestoreAgentSubmitRestoreRequests").detail("DBIsLocked", randomUID);
|
||||
break;
|
||||
} catch (Error& e) {
|
||||
TraceEvent("FastRestoreAgentSubmitRestoreRequests").detail("CheckLockError", e.what());
|
||||
TraceEvent(numTries > 50 ? SevError : SevWarnAlways, "FastRestoreMayFail")
|
||||
TraceEvent(numTries > 50 ? SevError : SevWarnAlways, "FastRestoreAgentSubmitRestoreRequestsMayFail")
|
||||
.detail("Reason", "DB is not properly locked")
|
||||
.detail("ExpectedLockID", randomUID);
|
||||
.detail("ExpectedLockID", randomUID)
|
||||
.error(e);
|
||||
numTries++;
|
||||
wait(delay(5.0));
|
||||
wait(tr->onError(e));
|
||||
}
|
||||
}
|
||||
|
||||
// set up restore request
|
||||
tr->reset();
|
||||
loop {
|
||||
tr->reset();
|
||||
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||
tr->setOption(FDBTransactionOptions::LOCK_AWARE);
|
||||
try {
|
||||
|
@ -4444,7 +4449,10 @@ public:
|
|||
return r;
|
||||
}
|
||||
|
||||
ACTOR static Future<Version> restore(FileBackupAgent* backupAgent, Database cx, Optional<Database> cxOrig, Key tagName, Key url, Standalone<VectorRef<KeyRangeRef>> ranges, bool waitForComplete, Version targetVersion, bool verbose, Key addPrefix, Key removePrefix, bool lockDB, UID randomUid) {
|
||||
ACTOR static Future<Version> restore(FileBackupAgent* backupAgent, Database cx, Optional<Database> cxOrig,
|
||||
Key tagName, Key url, Standalone<VectorRef<KeyRangeRef>> ranges,
|
||||
bool waitForComplete, Version targetVersion, bool verbose, Key addPrefix,
|
||||
Key removePrefix, bool lockDB, UID randomUid) {
|
||||
state Reference<IBackupContainer> bc = IBackupContainer::openContainer(url.toString());
|
||||
|
||||
state BackupDescription desc = wait(bc->describeBackup());
|
||||
|
|
|
@ -352,6 +352,9 @@ namespace HTTP {
|
|||
send_start = timer();
|
||||
|
||||
loop {
|
||||
wait(conn->onWritable());
|
||||
wait( delay( 0, TaskPriority::WriteSocket ) );
|
||||
|
||||
// If we already got a response, before finishing sending the request, then close the connection,
|
||||
// set the Connection header to "close" as a hint to the caller that this connection can't be used
|
||||
// again, and break out of the send loop.
|
||||
|
@ -372,11 +375,6 @@ namespace HTTP {
|
|||
pContent->sent(len);
|
||||
if(pContent->empty())
|
||||
break;
|
||||
|
||||
if(len == 0) {
|
||||
wait(conn->onWritable());
|
||||
wait( delay( 0, TaskPriority::WriteSocket ) );
|
||||
}
|
||||
}
|
||||
|
||||
wait(responseReading);
|
||||
|
|
|
@ -36,7 +36,10 @@ template <class Val, class Metric=int, class MetricFunc = ConstantMetric<Metric>
|
|||
class KeyRangeMap : public RangeMap<Key,Val,KeyRangeRef,Metric,MetricFunc>, NonCopyable, public ReferenceCounted<KeyRangeMap<Val>> {
|
||||
public:
|
||||
explicit KeyRangeMap(Val v=Val(), Key endKey = allKeys.end) : RangeMap<Key,Val,KeyRangeRef,Metric,MetricFunc>(endKey, v), mapEnd(endKey) {}
|
||||
void operator=(KeyRangeMap&& r) BOOST_NOEXCEPT { mapEnd = std::move(r.mapEnd); RangeMap<Key,Val,KeyRangeRef,Metric,MetricFunc>::operator=(std::move(r)); }
|
||||
void operator=(KeyRangeMap&& r) noexcept {
|
||||
mapEnd = std::move(r.mapEnd);
|
||||
RangeMap<Key, Val, KeyRangeRef, Metric, MetricFunc>::operator=(std::move(r));
|
||||
}
|
||||
void insert( const KeyRangeRef& keys, const Val& value ) { RangeMap<Key,Val,KeyRangeRef,Metric,MetricFunc>::insert(keys, value); }
|
||||
void insert( const KeyRef& key, const Val& value ) { RangeMap<Key,Val,KeyRangeRef,Metric,MetricFunc>::insert( singleKeyRange(key), value); }
|
||||
std::vector<KeyRangeWith<Val>> getAffectedRangesAfterInsertion( const KeyRangeRef& keys, const Val &insertionValue = Val());
|
||||
|
@ -67,7 +70,10 @@ template <class Val, class Metric=int, class MetricFunc = ConstantMetric<Metric>
|
|||
class CoalescedKeyRefRangeMap : public RangeMap<KeyRef,Val,KeyRangeRef,Metric,MetricFunc>, NonCopyable {
|
||||
public:
|
||||
explicit CoalescedKeyRefRangeMap(Val v=Val(), Key endKey = allKeys.end) : RangeMap<KeyRef,Val,KeyRangeRef,Metric,MetricFunc>(endKey, v), mapEnd(endKey) {}
|
||||
void operator=(CoalescedKeyRefRangeMap&& r) BOOST_NOEXCEPT { mapEnd = std::move(r.mapEnd); RangeMap<KeyRef, Val, KeyRangeRef,Metric,MetricFunc>::operator=(std::move(r)); }
|
||||
void operator=(CoalescedKeyRefRangeMap&& r) noexcept {
|
||||
mapEnd = std::move(r.mapEnd);
|
||||
RangeMap<KeyRef, Val, KeyRangeRef, Metric, MetricFunc>::operator=(std::move(r));
|
||||
}
|
||||
void insert( const KeyRangeRef& keys, const Val& value );
|
||||
void insert( const KeyRef& key, const Val& value, Arena& arena );
|
||||
Key mapEnd;
|
||||
|
@ -77,7 +83,10 @@ template <class Val, class Metric=int, class MetricFunc = ConstantMetric<Metric>
|
|||
class CoalescedKeyRangeMap : public RangeMap<Key,Val,KeyRangeRef,Metric,MetricFunc>, NonCopyable {
|
||||
public:
|
||||
explicit CoalescedKeyRangeMap(Val v=Val(), Key endKey = allKeys.end) : RangeMap<Key,Val,KeyRangeRef,Metric,MetricFunc>(endKey, v), mapEnd(endKey) {}
|
||||
void operator=(CoalescedKeyRangeMap&& r) BOOST_NOEXCEPT { mapEnd = std::move(r.mapEnd); RangeMap<Key,Val,KeyRangeRef,Metric,MetricFunc>::operator=(std::move(r)); }
|
||||
void operator=(CoalescedKeyRangeMap&& r) noexcept {
|
||||
mapEnd = std::move(r.mapEnd);
|
||||
RangeMap<Key, Val, KeyRangeRef, Metric, MetricFunc>::operator=(std::move(r));
|
||||
}
|
||||
void insert( const KeyRangeRef& keys, const Val& value );
|
||||
void insert( const KeyRef& key, const Val& value );
|
||||
Key mapEnd;
|
||||
|
|
|
@ -19,7 +19,12 @@
|
|||
*/
|
||||
|
||||
#include <cinttypes>
|
||||
#include <vector>
|
||||
|
||||
#include "flow/Arena.h"
|
||||
#include "fdbclient/FDBOptions.g.h"
|
||||
#include "fdbclient/FDBTypes.h"
|
||||
#include "fdbclient/ReadYourWrites.h"
|
||||
#include "fdbclient/ManagementAPI.actor.h"
|
||||
|
||||
#include "fdbclient/SystemData.h"
|
||||
|
@ -1858,6 +1863,69 @@ ACTOR Future<Void> waitForPrimaryDC( Database cx, StringRef dcId ) {
|
|||
}
|
||||
}
|
||||
|
||||
ACTOR Future<Void> changeCachedRange(Database cx, KeyRangeRef range, bool add) {
|
||||
state ReadYourWritesTransaction tr(cx);
|
||||
state KeyRange sysRange = KeyRangeRef(storageCacheKey(range.begin), storageCacheKey(range.end));
|
||||
state KeyRange sysRangeClear = KeyRangeRef(storageCacheKey(range.begin), keyAfter(storageCacheKey(range.end)));
|
||||
state KeyRange privateRange = KeyRangeRef(cacheKeysKey(0, range.begin), cacheKeysKey(0, range.end));
|
||||
state Value trueValue = storageCacheValue(std::vector<uint16_t>{ 0 });
|
||||
state Value falseValue = storageCacheValue(std::vector<uint16_t>{});
|
||||
loop {
|
||||
tr.setOption(FDBTransactionOptions::LOCK_AWARE);
|
||||
tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||
try {
|
||||
tr.clear(sysRangeClear);
|
||||
tr.clear(privateRange);
|
||||
tr.addReadConflictRange(privateRange);
|
||||
Standalone<RangeResultRef> previous =
|
||||
wait(tr.getRange(KeyRangeRef(storageCachePrefix, sysRange.begin), 1, true));
|
||||
bool prevIsCached = false;
|
||||
if (!previous.empty()) {
|
||||
std::vector<uint16_t> prevVal;
|
||||
decodeStorageCacheValue(previous[0].value, prevVal);
|
||||
prevIsCached = !prevVal.empty();
|
||||
}
|
||||
if (prevIsCached && !add) {
|
||||
// we need to uncache from here
|
||||
tr.set(sysRange.begin, falseValue);
|
||||
tr.set(privateRange.begin, serverKeysFalse);
|
||||
} else if (!prevIsCached && add) {
|
||||
// we need to cache, starting from here
|
||||
tr.set(sysRange.begin, trueValue);
|
||||
tr.set(privateRange.begin, serverKeysTrue);
|
||||
}
|
||||
Standalone<RangeResultRef> after =
|
||||
wait(tr.getRange(KeyRangeRef(sysRange.end, storageCacheKeys.end), 1, false));
|
||||
bool afterIsCached = false;
|
||||
if (!after.empty()) {
|
||||
std::vector<uint16_t> afterVal;
|
||||
decodeStorageCacheValue(after[0].value, afterVal);
|
||||
afterIsCached = afterVal.empty();
|
||||
}
|
||||
if (afterIsCached && !add) {
|
||||
tr.set(sysRange.end, trueValue);
|
||||
tr.set(privateRange.end, serverKeysTrue);
|
||||
} else if (!afterIsCached && add) {
|
||||
tr.set(sysRange.end, falseValue);
|
||||
tr.set(privateRange.end, serverKeysFalse);
|
||||
}
|
||||
wait(tr.commit());
|
||||
return Void();
|
||||
} catch (Error& e) {
|
||||
state Error err = e;
|
||||
wait(tr.onError(err));
|
||||
TraceEvent(SevDebug, "ChangeCachedRangeError").error(err);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Future<Void> addCachedRange(const Database& cx, KeyRangeRef range) {
|
||||
return changeCachedRange(cx, range, true);
|
||||
}
|
||||
Future<Void> removeCachedRange(const Database& cx, KeyRangeRef range) {
|
||||
return changeCachedRange(cx, range, false);
|
||||
}
|
||||
|
||||
json_spirit::Value_type normJSONType(json_spirit::Value_type type) {
|
||||
if (type == json_spirit::int_type)
|
||||
return json_spirit::real_type;
|
||||
|
|
|
@ -201,5 +201,8 @@ bool schemaMatch( json_spirit::mValue const& schema, json_spirit::mValue const&
|
|||
// storage nodes
|
||||
ACTOR Future<Void> mgmtSnapCreate(Database cx, Standalone<StringRef> snapCmd, UID snapUID);
|
||||
|
||||
Future<Void> addCachedRange(const Database& cx, KeyRangeRef range);
|
||||
Future<Void> removeCachedRange(const Database& cx, KeyRangeRef range);
|
||||
|
||||
#include "flow/unactorcompiler.h"
|
||||
#endif
|
||||
|
|
|
@ -153,6 +153,7 @@ struct CommitTransactionRequest : TimedRequest {
|
|||
bool firstInBatch() const { return (flags & FLAG_FIRST_IN_BATCH) != 0; }
|
||||
|
||||
Arena arena;
|
||||
SpanID spanContext;
|
||||
CommitTransactionRef transaction;
|
||||
ReplyPromise<CommitID> reply;
|
||||
uint32_t flags;
|
||||
|
@ -162,7 +163,7 @@ struct CommitTransactionRequest : TimedRequest {
|
|||
|
||||
template <class Ar>
|
||||
void serialize(Ar& ar) {
|
||||
serializer(ar, transaction, reply, arena, flags, debugID);
|
||||
serializer(ar, transaction, reply, arena, flags, debugID, spanContext);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -209,6 +210,7 @@ struct GetReadVersionRequest : TimedRequest {
|
|||
FLAG_PRIORITY_MASK = PRIORITY_SYSTEM_IMMEDIATE,
|
||||
};
|
||||
|
||||
SpanID spanContext;
|
||||
uint32_t transactionCount;
|
||||
uint32_t flags;
|
||||
TransactionPriority priority;
|
||||
|
@ -219,9 +221,11 @@ struct GetReadVersionRequest : TimedRequest {
|
|||
ReplyPromise<GetReadVersionReply> reply;
|
||||
|
||||
GetReadVersionRequest() : transactionCount(1), flags(0) {}
|
||||
GetReadVersionRequest(uint32_t transactionCount, TransactionPriority priority, uint32_t flags = 0, TransactionTagMap<uint32_t> tags = TransactionTagMap<uint32_t>(), Optional<UID> debugID = Optional<UID>())
|
||||
: transactionCount(transactionCount), priority(priority), flags(flags), tags(tags), debugID(debugID)
|
||||
{
|
||||
GetReadVersionRequest(SpanID spanContext, uint32_t transactionCount, TransactionPriority priority,
|
||||
uint32_t flags = 0, TransactionTagMap<uint32_t> tags = TransactionTagMap<uint32_t>(),
|
||||
Optional<UID> debugID = Optional<UID>())
|
||||
: spanContext(spanContext), transactionCount(transactionCount), priority(priority), flags(flags), tags(tags),
|
||||
debugID(debugID) {
|
||||
flags = flags & ~FLAG_PRIORITY_MASK;
|
||||
switch(priority) {
|
||||
case TransactionPriority::BATCH:
|
||||
|
@ -237,12 +241,12 @@ struct GetReadVersionRequest : TimedRequest {
|
|||
ASSERT(false);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
bool operator < (GetReadVersionRequest const& rhs) const { return priority < rhs.priority; }
|
||||
|
||||
template <class Ar>
|
||||
void serialize(Ar& ar) {
|
||||
serializer(ar, transactionCount, flags, tags, debugID, reply);
|
||||
serializer(ar, transactionCount, flags, tags, debugID, reply, spanContext);
|
||||
|
||||
if(ar.isDeserializing) {
|
||||
if((flags & PRIORITY_SYSTEM_IMMEDIATE) == PRIORITY_SYSTEM_IMMEDIATE) {
|
||||
|
@ -275,6 +279,7 @@ struct GetKeyServerLocationsReply {
|
|||
struct GetKeyServerLocationsRequest {
|
||||
constexpr static FileIdentifier file_identifier = 9144680;
|
||||
Arena arena;
|
||||
SpanID spanContext;
|
||||
KeyRef begin;
|
||||
Optional<KeyRef> end;
|
||||
int limit;
|
||||
|
@ -282,24 +287,28 @@ struct GetKeyServerLocationsRequest {
|
|||
ReplyPromise<GetKeyServerLocationsReply> reply;
|
||||
|
||||
GetKeyServerLocationsRequest() : limit(0), reverse(false) {}
|
||||
GetKeyServerLocationsRequest( KeyRef const& begin, Optional<KeyRef> const& end, int limit, bool reverse, Arena const& arena ) : begin( begin ), end( end ), limit( limit ), reverse( reverse ), arena( arena ) {}
|
||||
|
||||
template <class Ar>
|
||||
GetKeyServerLocationsRequest(SpanID spanContext, KeyRef const& begin, Optional<KeyRef> const& end, int limit,
|
||||
bool reverse, Arena const& arena)
|
||||
: spanContext(spanContext), begin(begin), end(end), limit(limit), reverse(reverse), arena(arena) {}
|
||||
|
||||
template <class Ar>
|
||||
void serialize(Ar& ar) {
|
||||
serializer(ar, begin, end, limit, reverse, reply, arena);
|
||||
serializer(ar, begin, end, limit, reverse, reply, spanContext, arena);
|
||||
}
|
||||
};
|
||||
|
||||
struct GetRawCommittedVersionRequest {
|
||||
constexpr static FileIdentifier file_identifier = 12954034;
|
||||
SpanID spanContext;
|
||||
Optional<UID> debugID;
|
||||
ReplyPromise<GetReadVersionReply> reply;
|
||||
|
||||
explicit GetRawCommittedVersionRequest(Optional<UID> const& debugID = Optional<UID>()) : debugID(debugID) {}
|
||||
explicit GetRawCommittedVersionRequest(SpanID spanContext, Optional<UID> const& debugID = Optional<UID>()) : spanContext(spanContext), debugID(debugID) {}
|
||||
explicit GetRawCommittedVersionRequest() : spanContext(), debugID() {}
|
||||
|
||||
template <class Ar>
|
||||
void serialize( Ar& ar ) {
|
||||
serializer(ar, debugID, reply);
|
||||
serializer(ar, debugID, reply, spanContext);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -20,14 +20,23 @@
|
|||
|
||||
#include "fdbclient/NativeAPI.actor.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <iterator>
|
||||
#include <regex>
|
||||
#include <unordered_set>
|
||||
#include <tuple>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "fdbclient/FDBTypes.h"
|
||||
#include "fdbrpc/FailureMonitor.h"
|
||||
#include "fdbrpc/MultiInterface.h"
|
||||
|
||||
#include "fdbclient/Atomic.h"
|
||||
#include "fdbclient/ClusterInterface.h"
|
||||
#include "fdbclient/CoordinationInterface.h"
|
||||
#include "fdbclient/DatabaseContext.h"
|
||||
#include "fdbclient/FDBOptions.g.h"
|
||||
#include "fdbclient/KeyRangeMap.h"
|
||||
#include "fdbclient/Knobs.h"
|
||||
#include "fdbclient/ManagementAPI.actor.h"
|
||||
|
@ -38,18 +47,23 @@
|
|||
#include "fdbclient/SpecialKeySpace.actor.h"
|
||||
#include "fdbclient/StorageServerInterface.h"
|
||||
#include "fdbclient/SystemData.h"
|
||||
#include "fdbclient/versions.h"
|
||||
#include "fdbrpc/LoadBalance.h"
|
||||
#include "fdbrpc/Net2FileSystem.h"
|
||||
#include "fdbrpc/simulator.h"
|
||||
#include "flow/Arena.h"
|
||||
#include "flow/ActorCollection.h"
|
||||
#include "flow/DeterministicRandom.h"
|
||||
#include "flow/Error.h"
|
||||
#include "flow/flow.h"
|
||||
#include "flow/genericactors.actor.h"
|
||||
#include "flow/Knobs.h"
|
||||
#include "flow/Platform.h"
|
||||
#include "flow/SystemMonitor.h"
|
||||
#include "flow/TLSConfig.actor.h"
|
||||
#include "flow/Tracing.h"
|
||||
#include "flow/UnitTest.h"
|
||||
|
||||
#include "fdbclient/versions.h"
|
||||
#include "flow/serialize.h"
|
||||
|
||||
#ifdef WIN32
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
|
@ -67,6 +81,33 @@ using std::max;
|
|||
using std::min;
|
||||
using std::pair;
|
||||
|
||||
namespace {
|
||||
|
||||
ACTOR template <class T, class Fun>
|
||||
Future<T> runAfter(Future<T> in, Fun func) {
|
||||
T res = wait(in);
|
||||
return func(res);
|
||||
}
|
||||
|
||||
template <class Interface, class Request>
|
||||
Future<REPLY_TYPE(Request)> loadBalance(
|
||||
DatabaseContext* ctx, const Reference<LocationInfo> alternatives, RequestStream<Request> Interface::*channel,
|
||||
const Request& request = Request(), TaskPriority taskID = TaskPriority::DefaultPromiseEndpoint,
|
||||
bool atMostOnce = false, // if true, throws request_maybe_delivered() instead of retrying automatically
|
||||
QueueModel* model = NULL) {
|
||||
if (alternatives->hasCaches) {
|
||||
return loadBalance(alternatives->locations(), channel, request, taskID, atMostOnce, model);
|
||||
}
|
||||
return runAfter(loadBalance(alternatives->locations(), channel, request, taskID, atMostOnce, model),
|
||||
[ctx](auto res) {
|
||||
if (res.cached) {
|
||||
ctx->updateCache.trigger();
|
||||
}
|
||||
return res;
|
||||
});
|
||||
}
|
||||
} // namespace
|
||||
|
||||
NetworkOptions networkOptions;
|
||||
TLSConfig tlsConfig(TLSEndpointType::CLIENT);
|
||||
|
||||
|
@ -454,6 +495,166 @@ ACTOR static Future<Void> monitorMasterProxiesChange(Reference<AsyncVar<ClientDB
|
|||
}
|
||||
}
|
||||
|
||||
void updateLocationCacheWithCaches(DatabaseContext* self, const std::map<UID, StorageServerInterface>& removed,
|
||||
const std::map<UID, StorageServerInterface>& added) {
|
||||
// TODO: this needs to be more clever in the future
|
||||
auto ranges = self->locationCache.ranges();
|
||||
for (auto iter = ranges.begin(); iter != ranges.end(); ++iter) {
|
||||
if (iter->value() && iter->value()->hasCaches) {
|
||||
auto& val = iter->value();
|
||||
std::vector<Reference<ReferencedInterface<StorageServerInterface>>> interfaces;
|
||||
interfaces.reserve(val->size() - removed.size() + added.size());
|
||||
for (int i = 0; i < val->size(); ++i) {
|
||||
const auto& interf = (*val)[i];
|
||||
if (removed.count(interf->interf.id()) == 0) {
|
||||
interfaces.emplace_back(interf);
|
||||
}
|
||||
}
|
||||
for (const auto& p : added) {
|
||||
interfaces.emplace_back(Reference<ReferencedInterface<StorageServerInterface>>{new ReferencedInterface<StorageServerInterface>{p.second}});
|
||||
}
|
||||
iter->value() = Reference<LocationInfo>{ new LocationInfo(interfaces, true) };
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference<LocationInfo> addCaches(const Reference<LocationInfo>& loc,
|
||||
const std::vector<Reference<ReferencedInterface<StorageServerInterface>>>& other) {
|
||||
std::vector<Reference<ReferencedInterface<StorageServerInterface>>> interfaces;
|
||||
interfaces.reserve(loc->size() + other.size());
|
||||
for (int i = 0; i < loc->size(); ++i) {
|
||||
interfaces.emplace_back((*loc)[i]);
|
||||
}
|
||||
interfaces.insert(interfaces.end(), other.begin(), other.end());
|
||||
return Reference<LocationInfo>{ new LocationInfo{ interfaces, true } };
|
||||
}
|
||||
|
||||
ACTOR Future<Void> updateCachedRanges(DatabaseContext* self, std::map<UID, StorageServerInterface>* cacheServers) {
|
||||
state Database db(self);
|
||||
state ReadYourWritesTransaction tr(db);
|
||||
state Value trueValue = storageCacheValue(std::vector<uint16_t>{ 0 });
|
||||
state Value falseValue = storageCacheValue(std::vector<uint16_t>{});
|
||||
try {
|
||||
loop {
|
||||
wait(self->updateCache.onTrigger());
|
||||
tr.reset();
|
||||
tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||
tr.setOption(FDBTransactionOptions::READ_LOCK_AWARE);
|
||||
try {
|
||||
Standalone<RangeResultRef> range = wait(tr.getRange(storageCacheKeys, CLIENT_KNOBS->TOO_MANY));
|
||||
ASSERT(!range.more);
|
||||
std::vector<Reference<ReferencedInterface<StorageServerInterface>>> cacheInterfaces;
|
||||
cacheInterfaces.reserve(cacheServers->size());
|
||||
for (const auto& p : *cacheServers) {
|
||||
cacheInterfaces.emplace_back(Reference<ReferencedInterface<StorageServerInterface>>{
|
||||
new ReferencedInterface<StorageServerInterface>{ p.second } });
|
||||
}
|
||||
bool currCached = false;
|
||||
KeyRef begin, end;
|
||||
for (const auto& kv : range) {
|
||||
// These booleans have to flip consistently
|
||||
ASSERT(currCached == (kv.value == falseValue));
|
||||
if (kv.value == trueValue) {
|
||||
begin = kv.key.substr(storageCacheKeys.begin.size());
|
||||
currCached = true;
|
||||
} else {
|
||||
currCached = false;
|
||||
end = kv.key.substr(storageCacheKeys.begin.size());
|
||||
KeyRangeRef cachedRange{begin, end};
|
||||
auto ranges = self->locationCache.containedRanges(cachedRange);
|
||||
KeyRef containedRangesBegin, containedRangesEnd, prevKey;
|
||||
if (!ranges.empty()) {
|
||||
containedRangesBegin = ranges.begin().range().begin;
|
||||
}
|
||||
for (auto iter = ranges.begin(); iter != ranges.end(); ++iter) {
|
||||
// We probably don't want to do the code below? Otherwise we would never
|
||||
// fetch the corresponding storages - which would give us a different semantics
|
||||
//if (containedRangesEnd > iter->range().begin) {
|
||||
// self->locationCache.insert(
|
||||
// KeyRangeRef{ containedRangesEnd, iter->range().begin },
|
||||
// Reference<LocationInfo>{ new LocationInfo{ cacheInterfaces, true } });
|
||||
//}
|
||||
containedRangesEnd = iter->range().end;
|
||||
if (iter->value() && !iter->value()->hasCaches) {
|
||||
iter->value() = addCaches(iter->value(), cacheInterfaces);
|
||||
}
|
||||
}
|
||||
auto iter = self->locationCache.rangeContaining(begin);
|
||||
if (iter->value() && !iter->value()->hasCaches) {
|
||||
if (end>=iter->range().end) {
|
||||
self->locationCache.insert(KeyRangeRef{ begin, iter->range().end },
|
||||
addCaches(iter->value(), cacheInterfaces));
|
||||
} else {
|
||||
self->locationCache.insert(KeyRangeRef{ begin, end },
|
||||
addCaches(iter->value(), cacheInterfaces));
|
||||
}
|
||||
}
|
||||
iter = self->locationCache.rangeContainingKeyBefore(end);
|
||||
if (iter->value() && !iter->value()->hasCaches) {
|
||||
self->locationCache.insert(KeyRangeRef{iter->range().begin, end}, addCaches(iter->value(), cacheInterfaces));
|
||||
}
|
||||
}
|
||||
}
|
||||
wait(delay(2.0)); // we want to wait at least some small amount of time before
|
||||
// updating this list again
|
||||
} catch (Error& e) {
|
||||
wait(tr.onError(e));
|
||||
}
|
||||
}
|
||||
} catch (Error& e) {
|
||||
TraceEvent(SevError, "UpdateCachedRangesFailed")
|
||||
.error(e);
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
ACTOR Future<Void> monitorCacheList(DatabaseContext* self) {
|
||||
state Database db(self);
|
||||
state Transaction tr(db);
|
||||
state std::map<UID, StorageServerInterface> cacheServerMap;
|
||||
state Future<Void> updateRanges = updateCachedRanges(self, &cacheServerMap);
|
||||
// if no caches are configured, we don't want to run this actor at all
|
||||
// so we just wait for the first trigger from a storage server
|
||||
wait(self->updateCache.onTrigger());
|
||||
try {
|
||||
loop {
|
||||
tr.reset();
|
||||
try {
|
||||
Standalone<RangeResultRef> cacheList =
|
||||
wait(tr.getRange(storageCacheServerKeys, CLIENT_KNOBS->TOO_MANY));
|
||||
ASSERT(!cacheList.more);
|
||||
bool hasChanges = false;
|
||||
std::map<UID, StorageServerInterface> allCacheServers;
|
||||
for (auto kv : cacheList) {
|
||||
auto ssi = BinaryReader::fromStringRef<StorageServerInterface>(kv.value, IncludeVersion());
|
||||
allCacheServers.emplace(ssi.id(), ssi);
|
||||
}
|
||||
std::map<UID, StorageServerInterface> newCacheServers;
|
||||
std::map<UID, StorageServerInterface> deletedCacheServers;
|
||||
std::set_difference(allCacheServers.begin(), allCacheServers.end(), cacheServerMap.begin(),
|
||||
cacheServerMap.end(),
|
||||
std::insert_iterator<std::map<UID, StorageServerInterface>>(
|
||||
newCacheServers, newCacheServers.begin()));
|
||||
std::set_difference(cacheServerMap.begin(), cacheServerMap.end(), allCacheServers.begin(),
|
||||
allCacheServers.end(),
|
||||
std::insert_iterator<std::map<UID, StorageServerInterface>>(
|
||||
deletedCacheServers, deletedCacheServers.begin()));
|
||||
hasChanges = !(newCacheServers.empty() && deletedCacheServers.empty());
|
||||
if (hasChanges) {
|
||||
updateLocationCacheWithCaches(self, deletedCacheServers, newCacheServers);
|
||||
}
|
||||
cacheServerMap = std::move(allCacheServers);
|
||||
wait(delay(5.0));
|
||||
} catch (Error& e) {
|
||||
wait(tr.onError(e));
|
||||
}
|
||||
}
|
||||
} catch (Error& e) {
|
||||
TraceEvent(SevError, "MonitorCacheListFailed").error(e);
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
ACTOR static Future<HealthMetrics> getHealthMetricsActor(DatabaseContext *cx, bool detailed) {
|
||||
if (now() - cx->healthMetricsLastUpdated < CLIENT_KNOBS->AGGREGATE_HEALTH_METRICS_MAX_STALENESS) {
|
||||
if (detailed) {
|
||||
|
@ -600,6 +801,7 @@ DatabaseContext::DatabaseContext(Reference<AsyncVar<Reference<ClusterConnectionF
|
|||
|
||||
monitorMasterProxiesInfoChange = monitorMasterProxiesChange(clientInfo, &masterProxiesChangeTrigger);
|
||||
clientStatusUpdater.actor = clientStatusUpdateActor(this);
|
||||
cacheListMonitor = monitorCacheList(this);
|
||||
if (apiVersionAtLeast(630)) {
|
||||
registerSpecialKeySpaceModule(SpecialKeySpace::MODULE::TRANSACTION, std::make_unique<ConflictingKeysImpl>(conflictingKeysRange));
|
||||
registerSpecialKeySpaceModule(SpecialKeySpace::MODULE::TRANSACTION, std::make_unique<ReadConflictRangeImpl>(readConflictRangeKeysRange));
|
||||
|
@ -681,14 +883,15 @@ Database DatabaseContext::create(Reference<AsyncVar<ClientDBInfo>> clientInfo, F
|
|||
}
|
||||
|
||||
DatabaseContext::~DatabaseContext() {
|
||||
cacheListMonitor.cancel();
|
||||
monitorMasterProxiesInfoChange.cancel();
|
||||
for(auto it = server_interf.begin(); it != server_interf.end(); it = server_interf.erase(it))
|
||||
it->second->notifyContextDestroyed();
|
||||
ASSERT_ABORT( server_interf.empty() );
|
||||
locationCache.insert( allKeys, Reference<LocationInfo>() );
|
||||
locationCache.insert(allKeys, Reference<LocationInfo>());
|
||||
}
|
||||
|
||||
pair<KeyRange,Reference<LocationInfo>> DatabaseContext::getCachedLocation( const KeyRef& key, bool isBackward ) {
|
||||
pair<KeyRange, Reference<LocationInfo>> DatabaseContext::getCachedLocation( const KeyRef& key, bool isBackward ) {
|
||||
if( isBackward ) {
|
||||
auto range = locationCache.rangeContainingKeyBefore(key);
|
||||
return std::make_pair(range->range(), range->value());
|
||||
|
@ -740,23 +943,24 @@ Reference<LocationInfo> DatabaseContext::setCachedLocation( const KeyRangeRef& k
|
|||
attempts++;
|
||||
auto r = locationCache.randomRange();
|
||||
Key begin = r.begin(), end = r.end(); // insert invalidates r, so can't be passed a mere reference into it
|
||||
locationCache.insert( KeyRangeRef(begin, end), Reference<LocationInfo>() );
|
||||
locationCache.insert(KeyRangeRef(begin, end), Reference<LocationInfo>());
|
||||
}
|
||||
locationCache.insert( keys, loc );
|
||||
return loc;
|
||||
}
|
||||
|
||||
void DatabaseContext::invalidateCache( const KeyRef& key, bool isBackward ) {
|
||||
if( isBackward )
|
||||
if( isBackward ) {
|
||||
locationCache.rangeContainingKeyBefore(key)->value() = Reference<LocationInfo>();
|
||||
else
|
||||
} else {
|
||||
locationCache.rangeContaining(key)->value() = Reference<LocationInfo>();
|
||||
}
|
||||
}
|
||||
|
||||
void DatabaseContext::invalidateCache( const KeyRangeRef& keys ) {
|
||||
auto rs = locationCache.intersectingRanges(keys);
|
||||
Key begin = rs.begin().begin(), end = rs.end().begin(); // insert invalidates rs, so can't be passed a mere reference into it
|
||||
locationCache.insert( KeyRangeRef(begin, end), Reference<LocationInfo>() );
|
||||
locationCache.insert(KeyRangeRef(begin, end), Reference<LocationInfo>());
|
||||
}
|
||||
|
||||
Future<Void> DatabaseContext::onMasterProxiesChanged() {
|
||||
|
@ -1335,6 +1539,7 @@ ACTOR Future<Optional<vector<StorageServerInterface>>> transactionalGetServerInt
|
|||
|
||||
//If isBackward == true, returns the shard containing the key before 'key' (an infinitely long, inexpressible key). Otherwise returns the shard containing key
|
||||
ACTOR Future< pair<KeyRange,Reference<LocationInfo>> > getKeyLocation_internal( Database cx, Key key, TransactionInfo info, bool isBackward = false ) {
|
||||
state Span span("NAPI:getKeyLocation"_loc, { info.span->context });
|
||||
if (isBackward) {
|
||||
ASSERT( key != allKeys.begin && key <= allKeys.end );
|
||||
} else {
|
||||
|
@ -1348,7 +1553,10 @@ ACTOR Future< pair<KeyRange,Reference<LocationInfo>> > getKeyLocation_internal(
|
|||
++cx->transactionKeyServerLocationRequests;
|
||||
choose {
|
||||
when ( wait( cx->onMasterProxiesChanged() ) ) {}
|
||||
when ( GetKeyServerLocationsReply rep = wait( basicLoadBalance( cx->getMasterProxies(info.useProvisionalProxies), &MasterProxyInterface::getKeyServersLocations, GetKeyServerLocationsRequest(key, Optional<KeyRef>(), 100, isBackward, key.arena()), TaskPriority::DefaultPromiseEndpoint ) ) ) {
|
||||
when(GetKeyServerLocationsReply rep = wait(basicLoadBalance(
|
||||
cx->getMasterProxies(info.useProvisionalProxies), &MasterProxyInterface::getKeyServersLocations,
|
||||
GetKeyServerLocationsRequest(span->context, key, Optional<KeyRef>(), 100, isBackward, key.arena()),
|
||||
TaskPriority::DefaultPromiseEndpoint))) {
|
||||
++cx->transactionKeyServerLocationRequestsCompleted;
|
||||
if( info.debugID.present() )
|
||||
g_traceBatch.addEvent("TransactionDebug", info.debugID.get().first(), "NativeAPI.getKeyLocation.After");
|
||||
|
@ -1362,7 +1570,11 @@ ACTOR Future< pair<KeyRange,Reference<LocationInfo>> > getKeyLocation_internal(
|
|||
}
|
||||
|
||||
template <class F>
|
||||
Future<pair<KeyRange, Reference<LocationInfo>>> getKeyLocation( Database const& cx, Key const& key, F StorageServerInterface::*member, TransactionInfo const& info, bool isBackward = false ) {
|
||||
Future<pair<KeyRange, Reference<LocationInfo>>> getKeyLocation(Database const& cx, Key const& key,
|
||||
F StorageServerInterface::*member,
|
||||
TransactionInfo const& info,
|
||||
bool isBackward = false) {
|
||||
// we first check whether this range is cached
|
||||
auto ssi = cx->getCachedLocation( key, isBackward );
|
||||
if (!ssi.second) {
|
||||
return getKeyLocation_internal( cx, key, info, isBackward );
|
||||
|
@ -1380,6 +1592,7 @@ Future<pair<KeyRange, Reference<LocationInfo>>> getKeyLocation( Database const&
|
|||
}
|
||||
|
||||
ACTOR Future< vector< pair<KeyRange,Reference<LocationInfo>> > > getKeyRangeLocations_internal( Database cx, KeyRange keys, int limit, bool reverse, TransactionInfo info ) {
|
||||
state Span span("NAPI:getKeyRangeLocations"_loc, { info.span->context });
|
||||
if( info.debugID.present() )
|
||||
g_traceBatch.addEvent("TransactionDebug", info.debugID.get().first(), "NativeAPI.getKeyLocations.Before");
|
||||
|
||||
|
@ -1387,7 +1600,10 @@ ACTOR Future< vector< pair<KeyRange,Reference<LocationInfo>> > > getKeyRangeLoca
|
|||
++cx->transactionKeyServerLocationRequests;
|
||||
choose {
|
||||
when ( wait( cx->onMasterProxiesChanged() ) ) {}
|
||||
when ( GetKeyServerLocationsReply _rep = wait( basicLoadBalance( cx->getMasterProxies(info.useProvisionalProxies), &MasterProxyInterface::getKeyServersLocations, GetKeyServerLocationsRequest(keys.begin, keys.end, limit, reverse, keys.arena()), TaskPriority::DefaultPromiseEndpoint ) ) ) {
|
||||
when(GetKeyServerLocationsReply _rep = wait(basicLoadBalance(
|
||||
cx->getMasterProxies(info.useProvisionalProxies), &MasterProxyInterface::getKeyServersLocations,
|
||||
GetKeyServerLocationsRequest(span->context, keys.begin, keys.end, limit, reverse, keys.arena()),
|
||||
TaskPriority::DefaultPromiseEndpoint))) {
|
||||
++cx->transactionKeyServerLocationRequestsCompleted;
|
||||
state GetKeyServerLocationsReply rep = _rep;
|
||||
if( info.debugID.present() )
|
||||
|
@ -1478,6 +1694,7 @@ Future<Void> Transaction::warmRange(Database cx, KeyRange keys) {
|
|||
ACTOR Future<Optional<Value>> getValue( Future<Version> version, Key key, Database cx, TransactionInfo info, Reference<TransactionLogInfo> trLogInfo, TagSet tags )
|
||||
{
|
||||
state Version ver = wait( version );
|
||||
state Span span("NAPI:getValue"_loc, { info.span->context });
|
||||
cx->validateVersion(ver);
|
||||
|
||||
loop {
|
||||
|
@ -1510,10 +1727,12 @@ ACTOR Future<Optional<Value>> getValue( Future<Version> version, Key key, Databa
|
|||
}
|
||||
choose {
|
||||
when(wait(cx->connectionFileChanged())) { throw transaction_too_old(); }
|
||||
when(GetValueReply _reply =
|
||||
wait(loadBalance(ssi.second, &StorageServerInterface::getValue,
|
||||
GetValueRequest(key, ver, cx->sampleReadTags() ? tags : Optional<TagSet>(), getValueID), TaskPriority::DefaultPromiseEndpoint, false,
|
||||
cx->enableLocalityLoadBalance ? &cx->queueModel : nullptr))) {
|
||||
when(GetValueReply _reply = wait(
|
||||
loadBalance(cx.getPtr(), ssi.second, &StorageServerInterface::getValue,
|
||||
GetValueRequest(span->context, key, ver,
|
||||
cx->sampleReadTags() ? tags : Optional<TagSet>(), getValueID),
|
||||
TaskPriority::DefaultPromiseEndpoint, false,
|
||||
cx->enableLocalityLoadBalance ? &cx->queueModel : nullptr))) {
|
||||
reply = _reply;
|
||||
}
|
||||
}
|
||||
|
@ -1571,6 +1790,7 @@ ACTOR Future<Key> getKey( Database cx, KeySelector k, Future<Version> version, T
|
|||
wait(success(version));
|
||||
|
||||
state Optional<UID> getKeyID = Optional<UID>();
|
||||
state Span span("NAPI:getKey"_loc, { info.span->context });
|
||||
if( info.debugID.present() ) {
|
||||
getKeyID = nondeterministicRandom()->randomUniqueID();
|
||||
|
||||
|
@ -1599,9 +1819,11 @@ ACTOR Future<Key> getKey( Database cx, KeySelector k, Future<Version> version, T
|
|||
choose {
|
||||
when(wait(cx->connectionFileChanged())) { throw transaction_too_old(); }
|
||||
when(GetKeyReply _reply =
|
||||
wait(loadBalance(ssi.second, &StorageServerInterface::getKey, GetKeyRequest(k, version.get(), cx->sampleReadTags() ? tags : Optional<TagSet>(), getKeyID),
|
||||
TaskPriority::DefaultPromiseEndpoint, false,
|
||||
cx->enableLocalityLoadBalance ? &cx->queueModel : nullptr))) {
|
||||
wait(loadBalance(cx.getPtr(), ssi.second, &StorageServerInterface::getKey,
|
||||
GetKeyRequest(span->context, k, version.get(),
|
||||
cx->sampleReadTags() ? tags : Optional<TagSet>(), getKeyID),
|
||||
TaskPriority::DefaultPromiseEndpoint, false,
|
||||
cx->enableLocalityLoadBalance ? &cx->queueModel : nullptr))) {
|
||||
reply = _reply;
|
||||
}
|
||||
}
|
||||
|
@ -1634,12 +1856,15 @@ ACTOR Future<Key> getKey( Database cx, KeySelector k, Future<Version> version, T
|
|||
}
|
||||
}
|
||||
|
||||
ACTOR Future<Version> waitForCommittedVersion( Database cx, Version version ) {
|
||||
ACTOR Future<Version> waitForCommittedVersion( Database cx, Version version, SpanID spanContext ) {
|
||||
state Span span("NAPI:waitForCommittedVersion"_loc, { spanContext });
|
||||
try {
|
||||
loop {
|
||||
choose {
|
||||
when ( wait( cx->onMasterProxiesChanged() ) ) {}
|
||||
when ( GetReadVersionReply v = wait( basicLoadBalance( cx->getMasterProxies(false), &MasterProxyInterface::getConsistentReadVersion, GetReadVersionRequest( 0, TransactionPriority::IMMEDIATE ), cx->taskID ) ) ) {
|
||||
when(GetReadVersionReply v = wait(basicLoadBalance(
|
||||
cx->getMasterProxies(false), &MasterProxyInterface::getConsistentReadVersion,
|
||||
GetReadVersionRequest(span->context, 0, TransactionPriority::IMMEDIATE), cx->taskID))) {
|
||||
cx->minAcceptableReadVersion = std::min(cx->minAcceptableReadVersion, v.version);
|
||||
|
||||
if (v.version >= version)
|
||||
|
@ -1655,11 +1880,14 @@ ACTOR Future<Version> waitForCommittedVersion( Database cx, Version version ) {
|
|||
}
|
||||
}
|
||||
|
||||
ACTOR Future<Version> getRawVersion( Database cx ) {
|
||||
ACTOR Future<Version> getRawVersion( Database cx, SpanID spanContext ) {
|
||||
state Span span("NAPI:getRawVersion"_loc, { spanContext });
|
||||
loop {
|
||||
choose {
|
||||
when ( wait( cx->onMasterProxiesChanged() ) ) {}
|
||||
when ( GetReadVersionReply v = wait( basicLoadBalance( cx->getMasterProxies(false), &MasterProxyInterface::getConsistentReadVersion, GetReadVersionRequest( 0, TransactionPriority::IMMEDIATE ), cx->taskID ) ) ) {
|
||||
when(GetReadVersionReply v =
|
||||
wait(basicLoadBalance(cx->getMasterProxies(false), &MasterProxyInterface::getConsistentReadVersion,
|
||||
GetReadVersionRequest(spanContext, 0, TransactionPriority::IMMEDIATE), cx->taskID))) {
|
||||
return v.version;
|
||||
}
|
||||
}
|
||||
|
@ -1673,6 +1901,7 @@ ACTOR Future<Void> readVersionBatcher(
|
|||
ACTOR Future<Void> watchValue(Future<Version> version, Key key, Optional<Value> value, Database cx,
|
||||
TransactionInfo info, TagSet tags) {
|
||||
state Version ver = wait( version );
|
||||
state Span span(deterministicRandom()->randomUniqueID(), "NAPI:watchValue"_loc, { info.span->context });
|
||||
cx->validateVersion(ver);
|
||||
ASSERT(ver != latestVersion);
|
||||
|
||||
|
@ -1689,9 +1918,11 @@ ACTOR Future<Void> watchValue(Future<Version> version, Key key, Optional<Value>
|
|||
}
|
||||
state WatchValueReply resp;
|
||||
choose {
|
||||
when(WatchValueReply r = wait(loadBalance(ssi.second, &StorageServerInterface::watchValue,
|
||||
WatchValueRequest(key, value, ver, cx->sampleReadTags() ? tags : Optional<TagSet>(), watchValueID),
|
||||
TaskPriority::DefaultPromiseEndpoint))) {
|
||||
when(WatchValueReply r = wait(
|
||||
loadBalance(cx.getPtr(), ssi.second, &StorageServerInterface::watchValue,
|
||||
WatchValueRequest(span->context, key, value, ver,
|
||||
cx->sampleReadTags() ? tags : Optional<TagSet>(), watchValueID),
|
||||
TaskPriority::DefaultPromiseEndpoint))) {
|
||||
resp = r;
|
||||
}
|
||||
when(wait(cx->connectionFile ? cx->connectionFile->onChange() : Never())) { wait(Never()); }
|
||||
|
@ -1702,7 +1933,7 @@ ACTOR Future<Void> watchValue(Future<Version> version, Key key, Optional<Value>
|
|||
|
||||
//FIXME: wait for known committed version on the storage server before replying,
|
||||
//cannot do this until the storage server is notified on knownCommittedVersion changes from tlog (faster than the current update loop)
|
||||
Version v = wait(waitForCommittedVersion(cx, resp.version));
|
||||
Version v = wait(waitForCommittedVersion(cx, resp.version, span->context));
|
||||
|
||||
//TraceEvent("WatcherCommitted").detail("CommittedVersion", v).detail("WatchVersion", resp.version).detail("Key", key ).detail("Value", value);
|
||||
|
||||
|
@ -1755,6 +1986,7 @@ ACTOR Future<Standalone<RangeResultRef>> getExactRange( Database cx, Version ver
|
|||
KeyRange keys, GetRangeLimits limits, bool reverse, TransactionInfo info, TagSet tags )
|
||||
{
|
||||
state Standalone<RangeResultRef> output;
|
||||
state Span span("NAPI:getExactRange"_loc, { info.span->context });
|
||||
|
||||
//printf("getExactRange( '%s', '%s' )\n", keys.begin.toString().c_str(), keys.end.toString().c_str());
|
||||
loop {
|
||||
|
@ -1768,6 +2000,7 @@ ACTOR Future<Standalone<RangeResultRef>> getExactRange( Database cx, Version ver
|
|||
req.version = version;
|
||||
req.begin = firstGreaterOrEqual( range.begin );
|
||||
req.end = firstGreaterOrEqual( range.end );
|
||||
req.spanContext = span->context;
|
||||
|
||||
transformRangeLimits(limits, reverse, req);
|
||||
ASSERT(req.limitBytes > 0 && req.limit != 0 && req.limit < 0 == reverse);
|
||||
|
@ -1793,10 +2026,10 @@ ACTOR Future<Standalone<RangeResultRef>> getExactRange( Database cx, Version ver
|
|||
try {
|
||||
choose {
|
||||
when(wait(cx->connectionFileChanged())) { throw transaction_too_old(); }
|
||||
when(GetKeyValuesReply _rep =
|
||||
wait(loadBalance(locations[shard].second, &StorageServerInterface::getKeyValues, req,
|
||||
TaskPriority::DefaultPromiseEndpoint, false,
|
||||
cx->enableLocalityLoadBalance ? &cx->queueModel : nullptr))) {
|
||||
when(GetKeyValuesReply _rep = wait(
|
||||
loadBalance(cx.getPtr(), locations[shard].second, &StorageServerInterface::getKeyValues,
|
||||
req, TaskPriority::DefaultPromiseEndpoint, false,
|
||||
cx->enableLocalityLoadBalance ? &cx->queueModel : nullptr))) {
|
||||
rep = _rep;
|
||||
}
|
||||
}
|
||||
|
@ -2012,6 +2245,7 @@ ACTOR Future<Standalone<RangeResultRef>> getRange( Database cx, Reference<Transa
|
|||
state KeySelector originalBegin = begin;
|
||||
state KeySelector originalEnd = end;
|
||||
state Standalone<RangeResultRef> output;
|
||||
state Span span("NAPI:getRange"_loc, info.span);
|
||||
|
||||
try {
|
||||
state Version version = wait( fVersion );
|
||||
|
@ -2064,6 +2298,7 @@ ACTOR Future<Standalone<RangeResultRef>> getRange( Database cx, Reference<Transa
|
|||
|
||||
req.tags = cx->sampleReadTags() ? tags : Optional<TagSet>();
|
||||
req.debugID = info.debugID;
|
||||
req.spanContext = span->context;
|
||||
try {
|
||||
if( info.debugID.present() ) {
|
||||
g_traceBatch.addEvent("TransactionDebug", info.debugID.get().first(), "NativeAPI.getRange.Before");
|
||||
|
@ -2092,7 +2327,10 @@ ACTOR Future<Standalone<RangeResultRef>> getRange( Database cx, Reference<Transa
|
|||
transaction_too_old(), future_version()
|
||||
});
|
||||
}
|
||||
GetKeyValuesReply _rep = wait( loadBalance(beginServer.second, &StorageServerInterface::getKeyValues, req, TaskPriority::DefaultPromiseEndpoint, false, cx->enableLocalityLoadBalance ? &cx->queueModel : NULL ) );
|
||||
GetKeyValuesReply _rep =
|
||||
wait(loadBalance(cx.getPtr(), beginServer.second, &StorageServerInterface::getKeyValues, req,
|
||||
TaskPriority::DefaultPromiseEndpoint, false,
|
||||
cx->enableLocalityLoadBalance ? &cx->queueModel : NULL));
|
||||
rep = _rep;
|
||||
++cx->transactionPhysicalReadsCompleted;
|
||||
} catch(Error&) {
|
||||
|
@ -2268,7 +2506,7 @@ Transaction::~Transaction() {
|
|||
cancelWatches();
|
||||
}
|
||||
|
||||
void Transaction::operator=(Transaction&& r) BOOST_NOEXCEPT {
|
||||
void Transaction::operator=(Transaction&& r) noexcept {
|
||||
flushTrLogsIfEnabled();
|
||||
cx = std::move(r.cx);
|
||||
tr = std::move(r.tr);
|
||||
|
@ -2364,7 +2602,6 @@ void Watch::setWatch(Future<Void> watchFuture) {
|
|||
|
||||
//FIXME: This seems pretty horrible. Now a Database can't die until all of its watches do...
|
||||
ACTOR Future<Void> watch(Reference<Watch> watch, Database cx, TagSet tags, TransactionInfo info) {
|
||||
cx->addWatch();
|
||||
try {
|
||||
choose {
|
||||
// RYOW write to value that is being watched (if applicable)
|
||||
|
@ -2399,7 +2636,7 @@ ACTOR Future<Void> watch(Reference<Watch> watch, Database cx, TagSet tags, Trans
|
|||
}
|
||||
|
||||
Future<Version> Transaction::getRawReadVersion() {
|
||||
return ::getRawVersion(cx);
|
||||
return ::getRawVersion(cx, info.span->context);
|
||||
}
|
||||
|
||||
Future< Void > Transaction::watch( Reference<Watch> watch ) {
|
||||
|
@ -2753,6 +2990,7 @@ void Transaction::reset() {
|
|||
|
||||
void Transaction::fullReset() {
|
||||
reset();
|
||||
info.span = Span(info.span->location);
|
||||
backoff = CLIENT_KNOBS->DEFAULT_BACKOFF;
|
||||
}
|
||||
|
||||
|
@ -2869,6 +3107,8 @@ ACTOR void checkWrites( Database cx, Future<Void> committed, Promise<Void> outCo
|
|||
ACTOR static Future<Void> commitDummyTransaction( Database cx, KeyRange range, TransactionInfo info, TransactionOptions options ) {
|
||||
state Transaction tr(cx);
|
||||
state int retries = 0;
|
||||
state Span span("NAPI:dummyTransaction"_loc, info.span);
|
||||
tr.info.span->parents.insert(span->context);
|
||||
loop {
|
||||
try {
|
||||
TraceEvent("CommitDummyTransaction").detail("Key", range.begin).detail("Retries", retries);
|
||||
|
@ -2915,6 +3155,8 @@ void Transaction::setupWatches() {
|
|||
ACTOR static Future<Void> tryCommit( Database cx, Reference<TransactionLogInfo> trLogInfo, CommitTransactionRequest req, Future<Version> readVersion, TransactionInfo info, Version* pCommittedVersion, Transaction* tr, TransactionOptions options) {
|
||||
state TraceInterval interval( "TransactionCommit" );
|
||||
state double startTime = now();
|
||||
state Span span("NAPI:tryCommit"_loc, { info.span->context });
|
||||
req.spanContext = span->context;
|
||||
if (info.debugID.present())
|
||||
TraceEvent(interval.begin()).detail( "Parent", info.debugID.get() );
|
||||
try {
|
||||
|
@ -3338,6 +3580,14 @@ void Transaction::setOption( FDBTransactionOptions::Option option, Optional<Stri
|
|||
options.readTags.addTag(value.get());
|
||||
break;
|
||||
|
||||
case FDBTransactionOptions::SPAN_PARENT:
|
||||
validateOptionValue(value, true);
|
||||
if (value.get().size() != 16) {
|
||||
throw invalid_option_value();
|
||||
}
|
||||
info.span->parents.emplace(BinaryReader::fromStringRef<UID>(value.get(), Unversioned()));
|
||||
break;
|
||||
|
||||
case FDBTransactionOptions::REPORT_CONFLICTING_KEYS:
|
||||
validateOptionValue(value, false);
|
||||
options.reportConflictingKeys = true;
|
||||
|
@ -3348,13 +3598,16 @@ void Transaction::setOption( FDBTransactionOptions::Option option, Optional<Stri
|
|||
}
|
||||
}
|
||||
|
||||
ACTOR Future<GetReadVersionReply> getConsistentReadVersion( DatabaseContext *cx, uint32_t transactionCount, TransactionPriority priority, uint32_t flags, TransactionTagMap<uint32_t> tags, Optional<UID> debugID ) {
|
||||
ACTOR Future<GetReadVersionReply> getConsistentReadVersion(Span parentSpan, DatabaseContext* cx, uint32_t transactionCount,
|
||||
TransactionPriority priority, uint32_t flags,
|
||||
TransactionTagMap<uint32_t> tags, Optional<UID> debugID) {
|
||||
state Span span("NAPI:getConsistentReadVersion"_loc, parentSpan);
|
||||
try {
|
||||
++cx->transactionReadVersionBatches;
|
||||
if( debugID.present() )
|
||||
g_traceBatch.addEvent("TransactionDebug", debugID.get().first(), "NativeAPI.getConsistentReadVersion.Before");
|
||||
loop {
|
||||
state GetReadVersionRequest req( transactionCount, priority, flags, tags, debugID );
|
||||
state GetReadVersionRequest req( span->context, transactionCount, priority, flags, tags, debugID );
|
||||
choose {
|
||||
when ( wait( cx->onMasterProxiesChanged() ) ) {}
|
||||
when ( GetReadVersionReply v = wait( basicLoadBalance( cx->getMasterProxies(flags & GetReadVersionRequest::FLAG_USE_PROVISIONAL_PROXIES), &MasterProxyInterface::getConsistentReadVersion, req, cx->taskID ) ) ) {
|
||||
|
@ -3405,6 +3658,7 @@ ACTOR Future<Void> readVersionBatcher( DatabaseContext *cx, FutureStream<Databas
|
|||
state PromiseStream<double> replyTimes;
|
||||
state PromiseStream<Error> _errorStream;
|
||||
state double batchTime = 0;
|
||||
state Span span("NAPI:readVersionBatcher"_loc);
|
||||
loop {
|
||||
send_batch = false;
|
||||
choose {
|
||||
|
@ -3415,6 +3669,7 @@ ACTOR Future<Void> readVersionBatcher( DatabaseContext *cx, FutureStream<Databas
|
|||
}
|
||||
g_traceBatch.addAttach("TransactionAttachID", req.debugID.get().first(), debugID.get().first());
|
||||
}
|
||||
span->parents.insert(req.spanContext);
|
||||
requests.push_back(req.reply);
|
||||
for(auto tag : req.tags) {
|
||||
++tags[tag];
|
||||
|
@ -3442,9 +3697,10 @@ ACTOR Future<Void> readVersionBatcher( DatabaseContext *cx, FutureStream<Databas
|
|||
addActor.send(ready(timeReply(GRVReply.getFuture(), replyTimes)));
|
||||
|
||||
Future<Void> batch = incrementalBroadcastWithError(
|
||||
getConsistentReadVersion(cx, count, priority, flags, std::move(tags), std::move(debugID)),
|
||||
getConsistentReadVersion(span, cx, count, priority, flags, std::move(tags), std::move(debugID)),
|
||||
std::move(requests), CLIENT_KNOBS->BROADCAST_BATCH_SIZE);
|
||||
|
||||
span = Span("NAPI:readVersionBatcher"_loc);
|
||||
tags.clear();
|
||||
debugID = Optional<UID>();
|
||||
requests.clear();
|
||||
|
@ -3454,7 +3710,11 @@ ACTOR Future<Void> readVersionBatcher( DatabaseContext *cx, FutureStream<Databas
|
|||
}
|
||||
}
|
||||
|
||||
ACTOR Future<Version> extractReadVersion(DatabaseContext* cx, TransactionPriority priority, Reference<TransactionLogInfo> trLogInfo, Future<GetReadVersionReply> f, bool lockAware, double startTime, Promise<Optional<Value>> metadataVersion, TagSet tags) {
|
||||
ACTOR Future<Version> extractReadVersion(Span parentSpan, DatabaseContext* cx, TransactionPriority priority,
|
||||
Reference<TransactionLogInfo> trLogInfo, Future<GetReadVersionReply> f,
|
||||
bool lockAware, double startTime, Promise<Optional<Value>> metadataVersion,
|
||||
TagSet tags) {
|
||||
// parentSpan here is only used to keep the parent alive until the request completes
|
||||
GetReadVersionReply rep = wait(f);
|
||||
double latency = now() - startTime;
|
||||
cx->GRVLatencies.addSample(latency);
|
||||
|
@ -3576,10 +3836,12 @@ Future<Version> Transaction::getReadVersion(uint32_t flags) {
|
|||
batcher.actor = readVersionBatcher( cx.getPtr(), batcher.stream.getFuture(), options.priority, flags );
|
||||
}
|
||||
|
||||
auto const req = DatabaseContext::VersionRequest(options.tags, info.debugID);
|
||||
Span span("NAPI:getReadVersion"_loc, info.span);
|
||||
auto const req = DatabaseContext::VersionRequest(span->context, options.tags, info.debugID);
|
||||
batcher.stream.send(req);
|
||||
startTime = now();
|
||||
readVersion = extractReadVersion( cx.getPtr(), options.priority, trLogInfo, req.reply.getFuture(), options.lockAware, startTime, metadataVersion, options.tags);
|
||||
readVersion = extractReadVersion(span, cx.getPtr(), options.priority, trLogInfo, req.reply.getFuture(),
|
||||
options.lockAware, startTime, metadataVersion, options.tags);
|
||||
}
|
||||
return readVersion;
|
||||
}
|
||||
|
@ -3660,7 +3922,7 @@ ACTOR Future<StorageMetrics> doGetStorageMetrics(Database cx, KeyRangeRef keys,
|
|||
req.min.bytes = 0;
|
||||
req.max.bytes = -1;
|
||||
StorageMetrics m = wait(
|
||||
loadBalance(locationInfo, &StorageServerInterface::waitMetrics, req, TaskPriority::DataDistribution));
|
||||
loadBalance(locationInfo->locations(), &StorageServerInterface::waitMetrics, req, TaskPriority::DataDistribution));
|
||||
return m;
|
||||
} catch (Error& e) {
|
||||
if (e.code() != error_code_wrong_shard_server && e.code() != error_code_all_alternatives_failed) {
|
||||
|
@ -3702,8 +3964,8 @@ ACTOR Future<Void> trackBoundedStorageMetrics(
|
|||
try {
|
||||
loop {
|
||||
WaitMetricsRequest req( keys, x - halfError, x + halfError );
|
||||
StorageMetrics nextX = wait( loadBalance( location, &StorageServerInterface::waitMetrics, req ) );
|
||||
deltaStream.send( nextX - x );
|
||||
StorageMetrics nextX = wait(loadBalance(location->locations(), &StorageServerInterface::waitMetrics, req));
|
||||
deltaStream.send(nextX - x);
|
||||
x = nextX;
|
||||
}
|
||||
} catch (Error& e) {
|
||||
|
@ -3728,8 +3990,8 @@ ACTOR Future<StorageMetrics> waitStorageMetricsMultipleLocations(
|
|||
WaitMetricsRequest req(locations[i].first, StorageMetrics(), StorageMetrics());
|
||||
req.min.bytes = 0;
|
||||
req.max.bytes = -1;
|
||||
fx[i] =
|
||||
loadBalance(locations[i].second, &StorageServerInterface::waitMetrics, req, TaskPriority::DataDistribution);
|
||||
fx[i] = loadBalance(locations[i].second->locations(), &StorageServerInterface::waitMetrics, req,
|
||||
TaskPriority::DataDistribution);
|
||||
}
|
||||
wait(waitForAll(fx));
|
||||
|
||||
|
@ -3777,7 +4039,7 @@ ACTOR Future<Standalone<VectorRef<KeyRangeRef>>> getReadHotRanges(Database cx, K
|
|||
state vector<Future<ReadHotSubRangeReply>> fReplies(nLocs);
|
||||
for (int i = 0; i < nLocs; i++) {
|
||||
ReadHotSubRangeRequest req(locations[i].first);
|
||||
fReplies[i] = loadBalance(locations[i].second, &StorageServerInterface::getReadHotRanges, req,
|
||||
fReplies[i] = loadBalance(locations[i].second->locations(), &StorageServerInterface::getReadHotRanges, req,
|
||||
TaskPriority::DataDistribution);
|
||||
}
|
||||
|
||||
|
@ -3823,7 +4085,8 @@ ACTOR Future< std::pair<Optional<StorageMetrics>, int> > waitStorageMetrics(
|
|||
fx = waitStorageMetricsMultipleLocations(locations, min, max, permittedError);
|
||||
} else {
|
||||
WaitMetricsRequest req( keys, min, max );
|
||||
fx = loadBalance( locations[0].second, &StorageServerInterface::waitMetrics, req, TaskPriority::DataDistribution );
|
||||
fx = loadBalance(locations[0].second->locations(), &StorageServerInterface::waitMetrics, req,
|
||||
TaskPriority::DataDistribution);
|
||||
}
|
||||
StorageMetrics x = wait(fx);
|
||||
return std::make_pair(x,-1);
|
||||
|
@ -3911,8 +4174,12 @@ ACTOR Future< Standalone<VectorRef<KeyRef>> > splitStorageMetrics( Database cx,
|
|||
state int i = 0;
|
||||
for(; i<locations.size(); i++) {
|
||||
SplitMetricsRequest req( locations[i].first, limit, used, estimated, i == locations.size() - 1 );
|
||||
SplitMetricsReply res = wait( loadBalance( locations[i].second, &StorageServerInterface::splitMetrics, req, TaskPriority::DataDistribution ) );
|
||||
if( res.splits.size() && res.splits[0] <= results.back() ) { // split points are out of order, possibly because of moving data, throw error to retry
|
||||
SplitMetricsReply res =
|
||||
wait(loadBalance(locations[i].second->locations(), &StorageServerInterface::splitMetrics, req,
|
||||
TaskPriority::DataDistribution));
|
||||
if (res.splits.size() &&
|
||||
res.splits[0] <= results.back()) { // split points are out of order, possibly because of moving
|
||||
// data, throw error to retry
|
||||
ASSERT_WE_THINK(false); // FIXME: This seems impossible and doesn't seem to be covered by testing
|
||||
throw all_alternatives_failed();
|
||||
}
|
||||
|
|
|
@ -19,6 +19,8 @@
|
|||
*/
|
||||
|
||||
#pragma once
|
||||
#include "flow/IRandom.h"
|
||||
#include "flow/Tracing.h"
|
||||
#if defined(NO_INTELLISENSE) && !defined(FDBCLIENT_NATIVEAPI_ACTOR_G_H)
|
||||
#define FDBCLIENT_NATIVEAPI_ACTOR_G_H
|
||||
#include "fdbclient/NativeAPI.actor.g.h"
|
||||
|
@ -77,8 +79,8 @@ public:
|
|||
Database() {} // an uninitialized database can be destructed or reassigned safely; that's it
|
||||
void operator= ( Database const& rhs ) { db = rhs.db; }
|
||||
Database( Database const& rhs ) : db(rhs.db) {}
|
||||
Database(Database&& r) BOOST_NOEXCEPT : db(std::move(r.db)) {}
|
||||
void operator= (Database&& r) BOOST_NOEXCEPT { db = std::move(r.db); }
|
||||
Database(Database&& r) noexcept : db(std::move(r.db)) {}
|
||||
void operator=(Database&& r) noexcept { db = std::move(r.db); }
|
||||
|
||||
// For internal use by the native client:
|
||||
explicit Database(Reference<DatabaseContext> cx) : db(cx) {}
|
||||
|
@ -147,13 +149,16 @@ class ReadYourWritesTransaction; // workaround cyclic dependency
|
|||
struct TransactionInfo {
|
||||
Optional<UID> debugID;
|
||||
TaskPriority taskID;
|
||||
Span span;
|
||||
bool useProvisionalProxies;
|
||||
// Used to save conflicting keys if FDBTransactionOptions::REPORT_CONFLICTING_KEYS is enabled
|
||||
// prefix/<key1> : '1' - any keys equal or larger than this key are (probably) conflicting keys
|
||||
// prefix/<key2> : '0' - any keys equal or larger than this key are (definitely) not conflicting keys
|
||||
std::shared_ptr<CoalescedKeyRangeMap<Value>> conflictingKeys;
|
||||
|
||||
explicit TransactionInfo( TaskPriority taskID ) : taskID(taskID), useProvisionalProxies(false) {}
|
||||
explicit TransactionInfo(TaskPriority taskID)
|
||||
: taskID(taskID), span(deterministicRandom()->randomUniqueID(), "Transaction"_loc), useProvisionalProxies(false) {
|
||||
}
|
||||
};
|
||||
|
||||
struct TransactionLogInfo : public ReferenceCounted<TransactionLogInfo>, NonCopyable {
|
||||
|
@ -279,7 +284,7 @@ public:
|
|||
|
||||
// These are to permit use as state variables in actors:
|
||||
Transaction() : info( TaskPriority::DefaultEndpoint ) {}
|
||||
void operator=(Transaction&& r) BOOST_NOEXCEPT;
|
||||
void operator=(Transaction&& r) noexcept;
|
||||
|
||||
void reset();
|
||||
void fullReset();
|
||||
|
@ -329,7 +334,7 @@ private:
|
|||
Future<Void> committing;
|
||||
};
|
||||
|
||||
ACTOR Future<Version> waitForCommittedVersion(Database cx, Version version);
|
||||
ACTOR Future<Version> waitForCommittedVersion(Database cx, Version version, SpanID spanContext);
|
||||
ACTOR Future<Standalone<VectorRef<DDMetricsRef>>> waitDataDistributionMetricsList(Database cx, KeyRange keys,
|
||||
int shardLimit);
|
||||
|
||||
|
|
|
@ -73,8 +73,8 @@ struct Notified {
|
|||
|
||||
void operator=(const ValueType& v) { set(v); }
|
||||
|
||||
Notified(Notified&& r) BOOST_NOEXCEPT : waiting(std::move(r.waiting)), val(std::move(r.val)) {}
|
||||
void operator=(Notified&& r) BOOST_NOEXCEPT {
|
||||
Notified(Notified&& r) noexcept : waiting(std::move(r.waiting)), val(std::move(r.val)) {}
|
||||
void operator=(Notified&& r) noexcept {
|
||||
waiting = std::move(r.waiting);
|
||||
val = std::move(r.val);
|
||||
}
|
||||
|
|
|
@ -1119,8 +1119,7 @@ public:
|
|||
}
|
||||
|
||||
bool retry_limit_hit = ryw->options.maxRetries != -1 && ryw->retries >= ryw->options.maxRetries;
|
||||
if (ryw->retries < std::numeric_limits<int>::max())
|
||||
ryw->retries++;
|
||||
if (ryw->retries < std::numeric_limits<int>::max()) ryw->retries++;
|
||||
if(retry_limit_hit) {
|
||||
throw e;
|
||||
}
|
||||
|
@ -1130,7 +1129,7 @@ public:
|
|||
ryw->debugLogRetries(e);
|
||||
|
||||
ryw->resetRyow();
|
||||
return Void();
|
||||
return Void();
|
||||
} catch( Error &e ) {
|
||||
if ( !ryw->resetPromise.isSet() ) {
|
||||
if(ryw->tr.apiVersionAtLeast(610)) {
|
||||
|
@ -2025,7 +2024,7 @@ void ReadYourWritesTransaction::setOptionImpl( FDBTransactionOptions::Option opt
|
|||
tr.setOption( option, value );
|
||||
}
|
||||
|
||||
void ReadYourWritesTransaction::operator=(ReadYourWritesTransaction&& r) BOOST_NOEXCEPT {
|
||||
void ReadYourWritesTransaction::operator=(ReadYourWritesTransaction&& r) noexcept {
|
||||
cache = std::move( r.cache );
|
||||
writes = std::move( r.writes );
|
||||
arena = std::move( r.arena );
|
||||
|
@ -2051,21 +2050,12 @@ void ReadYourWritesTransaction::operator=(ReadYourWritesTransaction&& r) BOOST_N
|
|||
versionStampKeys = std::move(r.versionStampKeys);
|
||||
}
|
||||
|
||||
ReadYourWritesTransaction::ReadYourWritesTransaction(ReadYourWritesTransaction&& r) BOOST_NOEXCEPT :
|
||||
cache( std::move(r.cache) ),
|
||||
writes( std::move(r.writes) ),
|
||||
arena( std::move(r.arena) ),
|
||||
reading( std::move(r.reading) ),
|
||||
retries( r.retries ),
|
||||
approximateSize(r.approximateSize),
|
||||
creationTime( r.creationTime ),
|
||||
deferredError( std::move(r.deferredError) ),
|
||||
timeoutActor( std::move(r.timeoutActor) ),
|
||||
resetPromise( std::move(r.resetPromise) ),
|
||||
commitStarted( r.commitStarted ),
|
||||
options( r.options ),
|
||||
transactionDebugInfo( r.transactionDebugInfo )
|
||||
{
|
||||
ReadYourWritesTransaction::ReadYourWritesTransaction(ReadYourWritesTransaction&& r) noexcept
|
||||
: cache(std::move(r.cache)), writes(std::move(r.writes)), arena(std::move(r.arena)), reading(std::move(r.reading)),
|
||||
retries(r.retries), approximateSize(r.approximateSize), creationTime(r.creationTime),
|
||||
deferredError(std::move(r.deferredError)), timeoutActor(std::move(r.timeoutActor)),
|
||||
resetPromise(std::move(r.resetPromise)), commitStarted(r.commitStarted), options(r.options),
|
||||
transactionDebugInfo(r.transactionDebugInfo) {
|
||||
cache.arena = &arena;
|
||||
writes.arena = &arena;
|
||||
tr = std::move( r.tr );
|
||||
|
|
|
@ -110,8 +110,8 @@ public:
|
|||
|
||||
// These are to permit use as state variables in actors:
|
||||
ReadYourWritesTransaction() : cache(&arena), writes(&arena) {}
|
||||
void operator=(ReadYourWritesTransaction&& r) BOOST_NOEXCEPT;
|
||||
ReadYourWritesTransaction(ReadYourWritesTransaction&& r) BOOST_NOEXCEPT;
|
||||
void operator=(ReadYourWritesTransaction&& r) noexcept;
|
||||
ReadYourWritesTransaction(ReadYourWritesTransaction&& r) noexcept;
|
||||
|
||||
virtual void addref() { ReferenceCounted<ReadYourWritesTransaction>::addref(); }
|
||||
virtual void delref() { ReferenceCounted<ReadYourWritesTransaction>::delref(); }
|
||||
|
|
|
@ -292,8 +292,12 @@ public:
|
|||
entries.insert( Entry( allKeys.end, afterAllKeys, VectorRef<KeyValueRef>() ), NoMetric(), true );
|
||||
}
|
||||
// Visual Studio refuses to generate these, apparently despite the standard
|
||||
SnapshotCache(SnapshotCache&& r) BOOST_NOEXCEPT : entries(std::move(r.entries)), arena(r.arena) {}
|
||||
SnapshotCache& operator=(SnapshotCache&& r) BOOST_NOEXCEPT { entries = std::move(r.entries); arena = r.arena; return *this; }
|
||||
SnapshotCache(SnapshotCache&& r) noexcept : entries(std::move(r.entries)), arena(r.arena) {}
|
||||
SnapshotCache& operator=(SnapshotCache&& r) noexcept {
|
||||
entries = std::move(r.entries);
|
||||
arena = r.arena;
|
||||
return *this;
|
||||
}
|
||||
|
||||
bool empty() const {
|
||||
// Returns true iff anything is known about the contents of the snapshot
|
||||
|
|
|
@ -72,7 +72,6 @@ struct StorageServerInterface {
|
|||
RequestStream<ReplyPromise<KeyValueStoreType>> getKeyValueStoreType;
|
||||
RequestStream<struct WatchValueRequest> watchValue;
|
||||
RequestStream<struct ReadHotSubRangeRequest> getReadHotRanges;
|
||||
|
||||
explicit StorageServerInterface(UID uid) : uniqueID( uid ) {}
|
||||
StorageServerInterface() : uniqueID( deterministicRandom()->randomUniqueID() ) {}
|
||||
NetworkAddress address() const { return getValue.getEndpoint().getPrimaryAddress(); }
|
||||
|
@ -157,18 +156,20 @@ struct ServerCacheInfo {
|
|||
struct GetValueReply : public LoadBalancedReply {
|
||||
constexpr static FileIdentifier file_identifier = 1378929;
|
||||
Optional<Value> value;
|
||||
bool cached;
|
||||
|
||||
GetValueReply() {}
|
||||
GetValueReply(Optional<Value> value) : value(value) {}
|
||||
GetValueReply() : cached(false) {}
|
||||
GetValueReply(Optional<Value> value, bool cached) : value(value), cached(cached) {}
|
||||
|
||||
template <class Ar>
|
||||
void serialize( Ar& ar ) {
|
||||
serializer(ar, LoadBalancedReply::penalty, LoadBalancedReply::error, value);
|
||||
serializer(ar, LoadBalancedReply::penalty, LoadBalancedReply::error, value, cached);
|
||||
}
|
||||
};
|
||||
|
||||
struct GetValueRequest : TimedRequest {
|
||||
constexpr static FileIdentifier file_identifier = 8454530;
|
||||
SpanID spanContext;
|
||||
Key key;
|
||||
Version version;
|
||||
Optional<TagSet> tags;
|
||||
|
@ -176,11 +177,12 @@ struct GetValueRequest : TimedRequest {
|
|||
ReplyPromise<GetValueReply> reply;
|
||||
|
||||
GetValueRequest(){}
|
||||
GetValueRequest(const Key& key, Version ver, Optional<TagSet> tags, Optional<UID> debugID) : key(key), version(ver), tags(tags), debugID(debugID) {}
|
||||
|
||||
template <class Ar>
|
||||
GetValueRequest(SpanID spanContext, const Key& key, Version ver, Optional<TagSet> tags, Optional<UID> debugID)
|
||||
: spanContext(spanContext), key(key), version(ver), tags(tags), debugID(debugID) {}
|
||||
|
||||
template <class Ar>
|
||||
void serialize( Ar& ar ) {
|
||||
serializer(ar, key, version, tags, debugID, reply);
|
||||
serializer(ar, key, version, tags, debugID, reply, spanContext);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -188,17 +190,19 @@ struct WatchValueReply {
|
|||
constexpr static FileIdentifier file_identifier = 3;
|
||||
|
||||
Version version;
|
||||
bool cached = false;
|
||||
WatchValueReply() = default;
|
||||
explicit WatchValueReply(Version version) : version(version) {}
|
||||
|
||||
template <class Ar>
|
||||
void serialize(Ar& ar) {
|
||||
serializer(ar, version);
|
||||
serializer(ar, version, cached);
|
||||
}
|
||||
};
|
||||
|
||||
struct WatchValueRequest {
|
||||
constexpr static FileIdentifier file_identifier = 14747733;
|
||||
SpanID spanContext;
|
||||
Key key;
|
||||
Optional<Value> value;
|
||||
Version version;
|
||||
|
@ -207,11 +211,13 @@ struct WatchValueRequest {
|
|||
ReplyPromise<WatchValueReply> reply;
|
||||
|
||||
WatchValueRequest(){}
|
||||
WatchValueRequest(const Key& key, Optional<Value> value, Version ver, Optional<TagSet> tags, Optional<UID> debugID) : key(key), value(value), version(ver), tags(tags), debugID(debugID) {}
|
||||
|
||||
template <class Ar>
|
||||
WatchValueRequest(SpanID spanContext, const Key& key, Optional<Value> value, Version ver, Optional<TagSet> tags,
|
||||
Optional<UID> debugID)
|
||||
: spanContext(spanContext), key(key), value(value), version(ver), tags(tags), debugID(debugID) {}
|
||||
|
||||
template <class Ar>
|
||||
void serialize( Ar& ar ) {
|
||||
serializer(ar, key, value, version, tags, debugID, reply);
|
||||
serializer(ar, key, value, version, tags, debugID, reply, spanContext);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -221,18 +227,19 @@ struct GetKeyValuesReply : public LoadBalancedReply {
|
|||
VectorRef<KeyValueRef, VecSerStrategy::String> data;
|
||||
Version version; // useful when latestVersion was requested
|
||||
bool more;
|
||||
bool cached;
|
||||
bool cached = false;
|
||||
|
||||
GetKeyValuesReply() : version(invalidVersion), more(false), cached(false) {}
|
||||
|
||||
template <class Ar>
|
||||
void serialize( Ar& ar ) {
|
||||
serializer(ar, LoadBalancedReply::penalty, LoadBalancedReply::error, data, version, more, arena);
|
||||
serializer(ar, LoadBalancedReply::penalty, LoadBalancedReply::error, data, version, more, cached, arena);
|
||||
}
|
||||
};
|
||||
|
||||
struct GetKeyValuesRequest : TimedRequest {
|
||||
constexpr static FileIdentifier file_identifier = 6795746;
|
||||
SpanID spanContext;
|
||||
Arena arena;
|
||||
KeySelectorRef begin, end;
|
||||
Version version; // or latestVersion
|
||||
|
@ -245,25 +252,27 @@ struct GetKeyValuesRequest : TimedRequest {
|
|||
GetKeyValuesRequest() : isFetchKeys(false) {}
|
||||
template <class Ar>
|
||||
void serialize( Ar& ar ) {
|
||||
serializer(ar, begin, end, version, limit, limitBytes, isFetchKeys, tags, debugID, reply, arena);
|
||||
serializer(ar, begin, end, version, limit, limitBytes, isFetchKeys, tags, debugID, reply, spanContext, arena);
|
||||
}
|
||||
};
|
||||
|
||||
struct GetKeyReply : public LoadBalancedReply {
|
||||
constexpr static FileIdentifier file_identifier = 11226513;
|
||||
KeySelector sel;
|
||||
bool cached;
|
||||
|
||||
GetKeyReply() {}
|
||||
GetKeyReply(KeySelector sel) : sel(sel) {}
|
||||
GetKeyReply() : cached(false) {}
|
||||
GetKeyReply(KeySelector sel, bool cached) : sel(sel), cached(cached) {}
|
||||
|
||||
template <class Ar>
|
||||
void serialize( Ar& ar ) {
|
||||
serializer(ar, LoadBalancedReply::penalty, LoadBalancedReply::error, sel);
|
||||
serializer(ar, LoadBalancedReply::penalty, LoadBalancedReply::error, sel, cached);
|
||||
}
|
||||
};
|
||||
|
||||
struct GetKeyRequest : TimedRequest {
|
||||
constexpr static FileIdentifier file_identifier = 10457870;
|
||||
SpanID spanContext;
|
||||
Arena arena;
|
||||
KeySelectorRef sel;
|
||||
Version version; // or latestVersion
|
||||
|
@ -272,11 +281,13 @@ struct GetKeyRequest : TimedRequest {
|
|||
ReplyPromise<GetKeyReply> reply;
|
||||
|
||||
GetKeyRequest() {}
|
||||
GetKeyRequest(KeySelectorRef const& sel, Version version, Optional<TagSet> tags, Optional<UID> debugID) : sel(sel), version(version), debugID(debugID) {}
|
||||
GetKeyRequest(SpanID spanContext, KeySelectorRef const& sel, Version version, Optional<TagSet> tags,
|
||||
Optional<UID> debugID)
|
||||
: spanContext(spanContext), sel(sel), version(version), debugID(debugID) {}
|
||||
|
||||
template <class Ar>
|
||||
void serialize( Ar& ar ) {
|
||||
serializer(ar, sel, version, tags, debugID, reply, arena);
|
||||
serializer(ar, sel, version, tags, debugID, reply, spanContext, arena);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -19,10 +19,12 @@
|
|||
*/
|
||||
|
||||
#include "fdbclient/SystemData.h"
|
||||
#include "fdbclient/StorageServerInterface.h"
|
||||
#include "flow/TDMetric.actor.h"
|
||||
#include "fdbclient/FDBTypes.h"
|
||||
#include "fdbclient/NativeAPI.actor.h"
|
||||
|
||||
#include "fdbclient/StorageServerInterface.h"
|
||||
#include "flow/Arena.h"
|
||||
#include "flow/TDMetric.actor.h"
|
||||
#include "flow/serialize.h"
|
||||
|
||||
const KeyRef systemKeysPrefix = LiteralStringRef("\xff");
|
||||
const KeyRangeRef normalKeys(KeyRef(), systemKeysPrefix);
|
||||
|
@ -200,6 +202,29 @@ const KeyRangeRef writeConflictRangeKeysRange =
|
|||
KeyRangeRef(LiteralStringRef("\xff\xff/transaction/write_conflict_range/"),
|
||||
LiteralStringRef("\xff\xff/transaction/write_conflict_range/\xff\xff"));
|
||||
|
||||
// "\xff/cacheServer/[[UID]] := StorageServerInterface"
|
||||
// This will be added by the cache server on initialization and removed by DD
|
||||
// TODO[mpilman]: We will need a way to map uint16_t ids to UIDs in a future
|
||||
// versions. For now caches simply cache everything so the ids
|
||||
// are not yet meaningful.
|
||||
const KeyRangeRef storageCacheServerKeys(LiteralStringRef("\xff/cacheServer/"),
|
||||
LiteralStringRef("\xff/cacheServer0"));
|
||||
const KeyRef storageCacheServersPrefix = storageCacheServerKeys.begin;
|
||||
const KeyRef storageCacheServersEnd = storageCacheServerKeys.end;
|
||||
|
||||
const Key storageCacheServerKey(UID id) {
|
||||
BinaryWriter wr(Unversioned());
|
||||
wr.serializeBytes(storageCacheServersPrefix);
|
||||
wr << id;
|
||||
return wr.toValue();
|
||||
}
|
||||
|
||||
const Value storageCacheServerValue(const StorageServerInterface& ssi) {
|
||||
BinaryWriter wr(IncludeVersion());
|
||||
wr << ssi;
|
||||
return wr.toValue();
|
||||
}
|
||||
|
||||
const KeyRangeRef ddStatsRange = KeyRangeRef(LiteralStringRef("\xff\xff/metrics/data_distribution_stats/"),
|
||||
LiteralStringRef("\xff\xff/metrics/data_distribution_stats/\xff\xff"));
|
||||
|
||||
|
@ -526,6 +551,7 @@ StorageServerInterface decodeServerListValue( ValueRef const& value ) {
|
|||
return s;
|
||||
}
|
||||
|
||||
|
||||
// processClassKeys.contains(k) iff k.startsWith( processClassKeys.begin ) because '/'+1 == '0'
|
||||
const KeyRangeRef processClassKeys(
|
||||
LiteralStringRef("\xff/processClass/"),
|
||||
|
|
|
@ -62,6 +62,12 @@ void decodeKeyServersValue( Standalone<RangeResultRef> result, const ValueRef& v
|
|||
void decodeKeyServersValue( std::map<Tag, UID> const& tag_uid, const ValueRef& value,
|
||||
std::vector<UID>& src, std::vector<UID>& dest );
|
||||
|
||||
// "\xff/storageCacheServer/[[UID]] := StorageServerInterface"
|
||||
extern const KeyRangeRef storageCacheServerKeys;
|
||||
extern const KeyRef storageCacheServersPrefix, storageCacheServersEnd;
|
||||
const Key storageCacheServerKey(UID id);
|
||||
const Value storageCacheServerValue(const StorageServerInterface& ssi);
|
||||
|
||||
// "\xff/storageCache/[[begin]]" := "[[vector<uint16_t>]]"
|
||||
extern const KeyRangeRef storageCacheKeys;
|
||||
extern const KeyRef storageCachePrefix;
|
||||
|
|
|
@ -326,12 +326,12 @@ ThreadFuture<Void> ThreadSafeTransaction::onError( Error const& e ) {
|
|||
return onMainThread( [tr, e](){ return tr->onError(e); } );
|
||||
}
|
||||
|
||||
void ThreadSafeTransaction::operator=(ThreadSafeTransaction&& r) BOOST_NOEXCEPT {
|
||||
void ThreadSafeTransaction::operator=(ThreadSafeTransaction&& r) noexcept {
|
||||
tr = r.tr;
|
||||
r.tr = NULL;
|
||||
}
|
||||
|
||||
ThreadSafeTransaction::ThreadSafeTransaction(ThreadSafeTransaction&& r) BOOST_NOEXCEPT {
|
||||
ThreadSafeTransaction::ThreadSafeTransaction(ThreadSafeTransaction&& r) noexcept {
|
||||
tr = r.tr;
|
||||
r.tr = NULL;
|
||||
}
|
||||
|
|
|
@ -97,8 +97,8 @@ public:
|
|||
|
||||
// These are to permit use as state variables in actors:
|
||||
ThreadSafeTransaction() : tr(NULL) {}
|
||||
void operator=(ThreadSafeTransaction&& r) BOOST_NOEXCEPT;
|
||||
ThreadSafeTransaction(ThreadSafeTransaction&& r) BOOST_NOEXCEPT;
|
||||
void operator=(ThreadSafeTransaction&& r) noexcept;
|
||||
ThreadSafeTransaction(ThreadSafeTransaction&& r) noexcept;
|
||||
|
||||
void reset() override;
|
||||
|
||||
|
|
|
@ -538,7 +538,8 @@ namespace PTreeImpl {
|
|||
return;
|
||||
}
|
||||
if (p->updated && p->lastUpdateVersion <= newOldestVersion) {
|
||||
/* If the node has been updated, figure out which pointer was repalced. And delete that pointer */
|
||||
/* If the node has been updated, figure out which pointer was replaced. And replace that pointer with the updated pointer.
|
||||
Then we can get rid of the updated child pointer and then make room in the node for future updates */
|
||||
auto which = p->replacedPointer;
|
||||
p->pointer[which] = p->pointer[2];
|
||||
p->updated = false;
|
||||
|
@ -611,9 +612,9 @@ public:
|
|||
VersionedMap() : oldestVersion(0), latestVersion(0) {
|
||||
roots.emplace_back(0, Tree());
|
||||
}
|
||||
VersionedMap( VersionedMap&& v ) BOOST_NOEXCEPT : oldestVersion(v.oldestVersion), latestVersion(v.latestVersion), roots(std::move(v.roots)) {
|
||||
}
|
||||
void operator = (VersionedMap && v) BOOST_NOEXCEPT {
|
||||
VersionedMap(VersionedMap&& v) noexcept
|
||||
: oldestVersion(v.oldestVersion), latestVersion(v.latestVersion), roots(std::move(v.roots)) {}
|
||||
void operator=(VersionedMap&& v) noexcept {
|
||||
oldestVersion = v.oldestVersion;
|
||||
latestVersion = v.latestVersion;
|
||||
roots = std::move(v.roots);
|
||||
|
|
|
@ -145,8 +145,17 @@ public:
|
|||
PTreeImpl::insert( writes, ver, WriteMapEntry( afterAllKeys, OperationStack(), false, false, false, false, false ) );
|
||||
}
|
||||
|
||||
WriteMap(WriteMap&& r) BOOST_NOEXCEPT : writeMapEmpty(r.writeMapEmpty), writes(std::move(r.writes)), ver(r.ver), scratch_iterator(std::move(r.scratch_iterator)), arena(r.arena) {}
|
||||
WriteMap& operator=(WriteMap&& r) BOOST_NOEXCEPT { writeMapEmpty = r.writeMapEmpty; writes = std::move(r.writes); ver = r.ver; scratch_iterator = std::move(r.scratch_iterator); arena = r.arena; return *this; }
|
||||
WriteMap(WriteMap&& r) noexcept
|
||||
: writeMapEmpty(r.writeMapEmpty), writes(std::move(r.writes)), ver(r.ver),
|
||||
scratch_iterator(std::move(r.scratch_iterator)), arena(r.arena) {}
|
||||
WriteMap& operator=(WriteMap&& r) noexcept {
|
||||
writeMapEmpty = r.writeMapEmpty;
|
||||
writes = std::move(r.writes);
|
||||
ver = r.ver;
|
||||
scratch_iterator = std::move(r.scratch_iterator);
|
||||
arena = r.arena;
|
||||
return *this;
|
||||
}
|
||||
|
||||
//a write with addConflict false on top of an existing write with a conflict range will not remove the conflict
|
||||
void mutate( KeyRef key, MutationRef::Type operation, ValueRef param, bool addConflict ) {
|
||||
|
|
|
@ -268,6 +268,8 @@ description is not currently required but encouraged.
|
|||
description="Adds a tag to the transaction that can be used to apply manual targeted throttling. At most 5 tags can be set on a transaction." />
|
||||
<Option name="auto_throttle_tag" code="801" paramType="String" paramDescription="String identifier used to associated this transaction with a throttling group. Must not exceed 16 characters."
|
||||
description="Adds a tag to the transaction that can be used to apply manual or automatic targeted throttling. At most 5 tags can be set on a transaction." />
|
||||
<Option name="span_parent" code="900" paramType="Bytes" paramDescription="A byte string of length 16 used to associate the span of this transaction with a parent"
|
||||
description="Adds a parent to the Span of this transaction. Used for transaction tracing. A span can be identified with any 16 bytes"/>
|
||||
</Scope>
|
||||
|
||||
<!-- The enumeration values matter - do not change them without
|
||||
|
|
|
@ -132,7 +132,7 @@ struct OpenFileInfo : NonCopyable {
|
|||
Future<Reference<IAsyncFile>> opened; // Only valid until the file is fully opened
|
||||
|
||||
OpenFileInfo() : f(0) {}
|
||||
OpenFileInfo(OpenFileInfo && r) BOOST_NOEXCEPT : f(r.f), opened(std::move(r.opened)) { r.f = 0; }
|
||||
OpenFileInfo(OpenFileInfo&& r) noexcept : f(r.f), opened(std::move(r.opened)) { r.f = 0; }
|
||||
|
||||
Future<Reference<IAsyncFile>> get() {
|
||||
if (f) return Reference<IAsyncFile>::addRef(f);
|
||||
|
|
|
@ -22,6 +22,11 @@
|
|||
#define FLOW_MULTIINTERFACE_H
|
||||
#pragma once
|
||||
|
||||
#include "flow/FastRef.h"
|
||||
#include "fdbrpc/Locality.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
extern uint64_t debug_lastLoadBalanceResultEndpointToken;
|
||||
|
||||
template <class K, class V>
|
||||
|
@ -168,7 +173,7 @@ class MultiInterface : public ReferenceCounted<MultiInterface<T>> {
|
|||
template <class T>
|
||||
class MultiInterface<ReferencedInterface<T>> : public ReferenceCounted<MultiInterface<ReferencedInterface<T>>> {
|
||||
public:
|
||||
MultiInterface( const vector<Reference<ReferencedInterface<T>>>& v ) : alternatives(v), bestCount(0) {
|
||||
MultiInterface( const std::vector<Reference<ReferencedInterface<T>>>& v ) : alternatives(v), bestCount(0) {
|
||||
deterministicRandom()->randomShuffle(alternatives);
|
||||
if ( LBLocalityData<T>::Present ) {
|
||||
std::stable_sort( alternatives.begin(), alternatives.end(), ReferencedInterface<T>::sort_by_distance );
|
||||
|
@ -204,6 +209,18 @@ public:
|
|||
|
||||
T const& getInterface(int index) { return alternatives[index]->interf; }
|
||||
UID getId( int index ) const { return alternatives[index]->interf.id(); }
|
||||
bool hasInterface(UID id) const {
|
||||
for (const auto& ref : alternatives) {
|
||||
if (ref->interf.id() == id) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
Reference<ReferencedInterface<T>>& operator[](int i) { return alternatives[i]; }
|
||||
|
||||
const Reference<ReferencedInterface<T>>& operator[](int i) const { return alternatives[i]; }
|
||||
|
||||
virtual ~MultiInterface() {}
|
||||
|
||||
|
@ -211,7 +228,7 @@ public:
|
|||
return describe( alternatives );
|
||||
}
|
||||
private:
|
||||
vector<Reference<ReferencedInterface<T>>> alternatives;
|
||||
std::vector<Reference<ReferencedInterface<T>>> alternatives;
|
||||
int16_t bestCount;
|
||||
};
|
||||
|
||||
|
|
|
@ -150,7 +150,7 @@ public:
|
|||
void coalesce( const Range& k );
|
||||
void validateCoalesced();
|
||||
|
||||
void operator=(RangeMap&& r) BOOST_NOEXCEPT { map = std::move(r.map); }
|
||||
void operator=(RangeMap&& r) noexcept { map = std::move(r.map); }
|
||||
//void clear( const Val& value ) { ranges.clear(); ranges.insert(std::make_pair(Key(),value)); }
|
||||
|
||||
void insert( const Range& keys, const Val& value );
|
||||
|
|
|
@ -121,7 +121,7 @@ public:
|
|||
bool isValid() const { return sav != NULL; }
|
||||
ReplyPromise() : sav(new NetSAV<T>(0, 1)) {}
|
||||
ReplyPromise(const ReplyPromise& rhs) : sav(rhs.sav) { sav->addPromiseRef(); }
|
||||
ReplyPromise(ReplyPromise&& rhs) BOOST_NOEXCEPT : sav(rhs.sav) { rhs.sav = 0; }
|
||||
ReplyPromise(ReplyPromise&& rhs) noexcept : sav(rhs.sav) { rhs.sav = 0; }
|
||||
~ReplyPromise() { if (sav) sav->delPromiseRef(); }
|
||||
|
||||
ReplyPromise(const Endpoint& endpoint) : sav(new NetSAV<T>(0, 1, endpoint)) {}
|
||||
|
@ -132,7 +132,7 @@ public:
|
|||
if (sav) sav->delPromiseRef();
|
||||
sav = rhs.sav;
|
||||
}
|
||||
void operator=(ReplyPromise && rhs) BOOST_NOEXCEPT {
|
||||
void operator=(ReplyPromise&& rhs) noexcept {
|
||||
if (sav != rhs.sav) {
|
||||
if (sav) sav->delPromiseRef();
|
||||
sav = rhs.sav;
|
||||
|
@ -363,13 +363,13 @@ public:
|
|||
FutureStream<T> getFuture() const { queue->addFutureRef(); return FutureStream<T>(queue); }
|
||||
RequestStream() : queue(new NetNotifiedQueue<T>(0, 1)) {}
|
||||
RequestStream(const RequestStream& rhs) : queue(rhs.queue) { queue->addPromiseRef(); }
|
||||
RequestStream(RequestStream&& rhs) BOOST_NOEXCEPT : queue(rhs.queue) { rhs.queue = 0; }
|
||||
RequestStream(RequestStream&& rhs) noexcept : queue(rhs.queue) { rhs.queue = 0; }
|
||||
void operator=(const RequestStream& rhs) {
|
||||
rhs.queue->addPromiseRef();
|
||||
if (queue) queue->delPromiseRef();
|
||||
queue = rhs.queue;
|
||||
}
|
||||
void operator=(RequestStream&& rhs) BOOST_NOEXCEPT {
|
||||
void operator=(RequestStream&& rhs) noexcept {
|
||||
if (queue != rhs.queue) {
|
||||
if (queue) queue->delPromiseRef();
|
||||
queue = rhs.queue;
|
||||
|
|
|
@ -1630,10 +1630,18 @@ public:
|
|||
Promise<Void> action;
|
||||
Task( double time, TaskPriority taskID, uint64_t stable, ProcessInfo* machine, Promise<Void>&& action ) : time(time), taskID(taskID), stable(stable), machine(machine), action(std::move(action)) {}
|
||||
Task( double time, TaskPriority taskID, uint64_t stable, ProcessInfo* machine, Future<Void>& future ) : time(time), taskID(taskID), stable(stable), machine(machine) { future = action.getFuture(); }
|
||||
Task(Task&& rhs) BOOST_NOEXCEPT : time(rhs.time), taskID(rhs.taskID), stable(rhs.stable), machine(rhs.machine), action(std::move(rhs.action)) {}
|
||||
Task(Task&& rhs) noexcept
|
||||
: time(rhs.time), taskID(rhs.taskID), stable(rhs.stable), machine(rhs.machine),
|
||||
action(std::move(rhs.action)) {}
|
||||
void operator= ( Task const& rhs ) { taskID = rhs.taskID; time = rhs.time; stable = rhs.stable; machine = rhs.machine; action = rhs.action; }
|
||||
Task( Task const& rhs ) : taskID(rhs.taskID), time(rhs.time), stable(rhs.stable), machine(rhs.machine), action(rhs.action) {}
|
||||
void operator= (Task&& rhs) BOOST_NOEXCEPT { time = rhs.time; taskID = rhs.taskID; stable = rhs.stable; machine = rhs.machine; action = std::move(rhs.action); }
|
||||
void operator=(Task&& rhs) noexcept {
|
||||
time = rhs.time;
|
||||
taskID = rhs.taskID;
|
||||
stable = rhs.stable;
|
||||
machine = rhs.machine;
|
||||
action = std::move(rhs.action);
|
||||
}
|
||||
|
||||
bool operator < (Task const& rhs) const {
|
||||
// Ordering is reversed for priority_queue
|
||||
|
|
|
@ -144,7 +144,7 @@ void applyMetadataMutations(UID const& dbgid, Arena &arena, VectorRef<MutationRe
|
|||
{
|
||||
MutationRef privatized = m;
|
||||
privatized.param1 = m.param1.withPrefix(systemKeys.begin, arena);
|
||||
TraceEvent(SevDebug, "SendingPrivateMutation", dbgid).detail("Original", m.toString()).detail("Privatized", privatized.toString());
|
||||
//TraceEvent(SevDebug, "SendingPrivateMutation", dbgid).detail("Original", m.toString()).detail("Privatized", privatized.toString());
|
||||
cachedRangeInfo[k] = privatized;
|
||||
}
|
||||
if(k != allKeys.end) {
|
||||
|
@ -161,7 +161,7 @@ void applyMetadataMutations(UID const& dbgid, Arena &arena, VectorRef<MutationRe
|
|||
if(toCommit) {
|
||||
MutationRef privatized = m;
|
||||
privatized.param1 = m.param1.withPrefix(systemKeys.begin, arena);
|
||||
TraceEvent(SevDebug, "SendingPrivateMutation", dbgid).detail("Original", m.toString()).detail("Privatized", privatized.toString());
|
||||
//TraceEvent(SevDebug, "SendingPrivateMutation", dbgid).detail("Original", m.toString()).detail("Privatized", privatized.toString());
|
||||
toCommit->addTag( cacheTag );
|
||||
toCommit->addTypedMessage(privatized);
|
||||
}
|
||||
|
@ -276,6 +276,7 @@ void applyMetadataMutations(UID const& dbgid, Arena &arena, VectorRef<MutationRe
|
|||
allTags.insert(decodeServerTagValue(kv.value));
|
||||
}
|
||||
}
|
||||
allTags.insert(cacheTag);
|
||||
|
||||
if (m.param1 == lastEpochEndKey) {
|
||||
toCommit->addTags(allTags);
|
||||
|
@ -494,14 +495,24 @@ void applyMetadataMutations(UID const& dbgid, Arena &arena, VectorRef<MutationRe
|
|||
keyBegin = itr->first;
|
||||
mutationBegin = itr->second;
|
||||
++itr;
|
||||
keyEnd = itr->first;
|
||||
mutationEnd = itr->second;
|
||||
if (itr != cachedRangeInfo.end()) {
|
||||
keyEnd = itr->first;
|
||||
mutationEnd = itr->second;
|
||||
} else {
|
||||
//TraceEvent(SevDebug, "EndKeyNotFound", dbgid).detail("KeyBegin", keyBegin.toString());
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
keyEnd = itr->first;
|
||||
mutationEnd = itr->second;
|
||||
++itr;
|
||||
keyBegin = itr->first;
|
||||
mutationBegin = itr->second;
|
||||
if (itr != cachedRangeInfo.end()) {
|
||||
keyBegin = itr->first;
|
||||
mutationBegin = itr->second;
|
||||
} else {
|
||||
//TraceEvent(SevDebug, "BeginKeyNotFound", dbgid).detail("KeyEnd", keyEnd.toString());
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Now get all the storage server tags for the cached key-ranges
|
||||
|
|
|
@ -115,7 +115,7 @@ std::map<std::tuple<LogEpoch, Version, int>, std::map<Tag, Version>> BackupProgr
|
|||
// ASSERT(info.logRouterTags == epochTags[rit->first]);
|
||||
|
||||
updateTagVersions(&tagVersions, &tags, rit->second, info.epochEnd, adjustedBeginVersion, epoch);
|
||||
break;
|
||||
if (tags.empty()) break;
|
||||
}
|
||||
rit++;
|
||||
}
|
||||
|
|
|
@ -32,6 +32,8 @@
|
|||
#include "fdbserver/WorkerInterface.actor.h"
|
||||
#include "flow/Error.h"
|
||||
|
||||
#include "flow/IRandom.h"
|
||||
#include "flow/Tracing.h"
|
||||
#include "flow/actorcompiler.h" // This must be the last #include.
|
||||
|
||||
#define SevDebugMemory SevVerbose
|
||||
|
@ -429,8 +431,9 @@ struct BackupData {
|
|||
}
|
||||
|
||||
ACTOR static Future<Version> _getMinKnownCommittedVersion(BackupData* self) {
|
||||
state Span span(deterministicRandom()->randomUniqueID(), "BA:GetMinCommittedVersion"_loc);
|
||||
loop {
|
||||
GetReadVersionRequest request(1, TransactionPriority::DEFAULT,
|
||||
GetReadVersionRequest request(span->context, 1, TransactionPriority::DEFAULT,
|
||||
GetReadVersionRequest::FLAG_USE_MIN_KNOWN_COMMITTED_VERSION);
|
||||
choose {
|
||||
when(wait(self->cx->onMasterProxiesChanged())) {}
|
||||
|
|
|
@ -124,6 +124,7 @@ set(FDBSERVER_SRCS
|
|||
workloads/BackupToDBUpgrade.actor.cpp
|
||||
workloads/BulkLoad.actor.cpp
|
||||
workloads/BulkSetup.actor.h
|
||||
workloads/Cache.actor.cpp
|
||||
workloads/ChangeConfig.actor.cpp
|
||||
workloads/ClientTransactionProfileCorrectness.actor.cpp
|
||||
workloads/TriggerRecovery.actor.cpp
|
||||
|
|
|
@ -61,17 +61,17 @@ struct WorkerInfo : NonCopyable {
|
|||
WorkerDetails details;
|
||||
Future<Void> haltRatekeeper;
|
||||
Future<Void> haltDistributor;
|
||||
Optional<uint16_t> storageCacheInfo;
|
||||
Standalone<VectorRef<StringRef>> issues;
|
||||
|
||||
WorkerInfo() : gen(-1), reboots(0), priorityInfo(ProcessClass::UnsetFit, false, ClusterControllerPriorityInfo::FitnessUnknown) {}
|
||||
WorkerInfo( Future<Void> watcher, ReplyPromise<RegisterWorkerReply> reply, Generation gen, WorkerInterface interf, ProcessClass initialClass, ProcessClass processClass, ClusterControllerPriorityInfo priorityInfo, bool degraded, Standalone<VectorRef<StringRef>> issues ) :
|
||||
watcher(watcher), reply(reply), gen(gen), reboots(0), initialClass(initialClass), priorityInfo(priorityInfo), details(interf, processClass, degraded), issues(issues) {}
|
||||
|
||||
WorkerInfo( WorkerInfo&& r ) BOOST_NOEXCEPT : watcher(std::move(r.watcher)), reply(std::move(r.reply)), gen(r.gen),
|
||||
reboots(r.reboots), initialClass(r.initialClass), priorityInfo(r.priorityInfo), details(std::move(r.details)),
|
||||
haltRatekeeper(r.haltRatekeeper), haltDistributor(r.haltDistributor), storageCacheInfo(r.storageCacheInfo), issues(r.issues) {}
|
||||
void operator=( WorkerInfo&& r ) BOOST_NOEXCEPT {
|
||||
WorkerInfo(WorkerInfo&& r) noexcept
|
||||
: watcher(std::move(r.watcher)), reply(std::move(r.reply)), gen(r.gen), reboots(r.reboots),
|
||||
initialClass(r.initialClass), priorityInfo(r.priorityInfo), details(std::move(r.details)),
|
||||
haltRatekeeper(r.haltRatekeeper), haltDistributor(r.haltDistributor), issues(r.issues) {}
|
||||
void operator=(WorkerInfo&& r) noexcept {
|
||||
watcher = std::move(r.watcher);
|
||||
reply = std::move(r.reply);
|
||||
gen = r.gen;
|
||||
|
@ -81,7 +81,6 @@ struct WorkerInfo : NonCopyable {
|
|||
details = std::move(r.details);
|
||||
haltRatekeeper = r.haltRatekeeper;
|
||||
haltDistributor = r.haltDistributor;
|
||||
storageCacheInfo = r.storageCacheInfo;
|
||||
issues = r.issues;
|
||||
}
|
||||
};
|
||||
|
@ -111,7 +110,6 @@ public:
|
|||
Database db;
|
||||
int unfinishedRecoveries;
|
||||
int logGenerations;
|
||||
std::map<uint16_t, std::pair<Optional<StorageServerInterface>, Optional<Key>>> cacheInterfaces;
|
||||
bool cachePopulated;
|
||||
std::map<NetworkAddress, std::pair<double, OpenDatabaseRequest>> clientStatus;
|
||||
|
||||
|
@ -138,28 +136,6 @@ public:
|
|||
serverInfo->set( newInfo );
|
||||
}
|
||||
|
||||
void setStorageCache(uint16_t id, const StorageServerInterface& interf) {
|
||||
auto newInfo = serverInfo->get();
|
||||
bool found = false;
|
||||
for(auto& it : newInfo.storageCaches) {
|
||||
if(it.first == id) {
|
||||
if(it.second != interf) {
|
||||
newInfo.id = deterministicRandom()->randomUniqueID();
|
||||
newInfo.infoGeneration = ++dbInfoCount;
|
||||
it.second = interf;
|
||||
}
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if(!found) {
|
||||
newInfo.id = deterministicRandom()->randomUniqueID();
|
||||
newInfo.infoGeneration = ++dbInfoCount;
|
||||
newInfo.storageCaches.push_back(std::make_pair(id, interf));
|
||||
}
|
||||
serverInfo->set( newInfo );
|
||||
}
|
||||
|
||||
void clearInterf(ProcessClass::ClassType t) {
|
||||
auto newInfo = serverInfo->get();
|
||||
newInfo.id = deterministicRandom()->randomUniqueID();
|
||||
|
@ -172,18 +148,6 @@ public:
|
|||
serverInfo->set( newInfo );
|
||||
}
|
||||
|
||||
void clearStorageCache(uint16_t id) {
|
||||
auto newInfo = serverInfo->get();
|
||||
for(auto it = newInfo.storageCaches.begin(); it != newInfo.storageCaches.end(); ++it) {
|
||||
if(it->first == id) {
|
||||
newInfo.id = deterministicRandom()->randomUniqueID();
|
||||
newInfo.infoGeneration = ++dbInfoCount;
|
||||
newInfo.storageCaches.erase(it);
|
||||
break;
|
||||
}
|
||||
}
|
||||
serverInfo->set( newInfo );
|
||||
}
|
||||
};
|
||||
|
||||
struct UpdateWorkerList {
|
||||
|
@ -365,7 +329,7 @@ public:
|
|||
logServerMap->add(worker.interf.locality, &worker);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (logServerSet->size() < (addingDegraded == 0 ? desired : required)) {
|
||||
}
|
||||
else if (logServerSet->size() == required || logServerSet->size() <= desired) {
|
||||
|
@ -1441,7 +1405,6 @@ ACTOR Future<Void> clusterWatchDatabase( ClusterControllerData* cluster, Cluster
|
|||
dbInfo.clusterInterface = db->serverInfo->get().clusterInterface;
|
||||
dbInfo.distributor = db->serverInfo->get().distributor;
|
||||
dbInfo.ratekeeper = db->serverInfo->get().ratekeeper;
|
||||
dbInfo.storageCaches = db->serverInfo->get().storageCaches;
|
||||
dbInfo.latencyBandConfig = db->serverInfo->get().latencyBandConfig;
|
||||
|
||||
TraceEvent("CCWDB", cluster->id).detail("Lifetime", dbInfo.masterLifetime.toString()).detail("ChangeID", dbInfo.id);
|
||||
|
@ -1496,7 +1459,7 @@ ACTOR Future<Void> clusterOpenDatabase(ClusterControllerData::DBInfo* db, OpenDa
|
|||
if(db->clientStatus.size() > 10000) {
|
||||
TraceEvent(SevWarnAlways, "TooManyClientStatusEntries").suppressFor(1.0);
|
||||
}
|
||||
|
||||
|
||||
while (db->clientInfo->get().id == req.knownClientInfoID) {
|
||||
choose {
|
||||
when (wait( db->clientInfo->onChange() )) {}
|
||||
|
@ -1747,27 +1710,9 @@ ACTOR Future<Void> workerAvailabilityWatch( WorkerInterface worker, ProcessClass
|
|||
}
|
||||
when( wait( failed ) ) { // remove workers that have failed
|
||||
WorkerInfo& failedWorkerInfo = cluster->id_worker[ worker.locality.processId() ];
|
||||
if(failedWorkerInfo.storageCacheInfo.present()) {
|
||||
bool found = false;
|
||||
for(auto& it : cluster->id_worker) {
|
||||
if(!it.second.storageCacheInfo.present() && it.second.details.processClass == ProcessClass::StorageCacheClass) {
|
||||
found = true;
|
||||
it.second.storageCacheInfo = failedWorkerInfo.storageCacheInfo;
|
||||
cluster->db.cacheInterfaces[failedWorkerInfo.storageCacheInfo.get()] = std::make_pair(Optional<StorageServerInterface>(), it.first);
|
||||
if(!it.second.reply.isSet()) {
|
||||
it.second.reply.send( RegisterWorkerReply(it.second.details.processClass, it.second.priorityInfo, failedWorkerInfo.storageCacheInfo) );
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
if(!found) {
|
||||
cluster->db.cacheInterfaces[failedWorkerInfo.storageCacheInfo.get()] = std::make_pair(Optional<StorageServerInterface>(), Optional<Key>());
|
||||
}
|
||||
cluster->db.clearStorageCache(failedWorkerInfo.storageCacheInfo.get());
|
||||
}
|
||||
|
||||
|
||||
if (!failedWorkerInfo.reply.isSet()) {
|
||||
failedWorkerInfo.reply.send( RegisterWorkerReply(failedWorkerInfo.details.processClass, failedWorkerInfo.priorityInfo, Optional<uint16_t>()) );
|
||||
failedWorkerInfo.reply.send( RegisterWorkerReply(failedWorkerInfo.details.processClass, failedWorkerInfo.priorityInfo) );
|
||||
}
|
||||
if (worker.locality.processId() == cluster->masterProcessId) {
|
||||
cluster->masterProcessId = Optional<Key>();
|
||||
|
@ -2055,7 +2000,7 @@ void clusterRegisterMaster( ClusterControllerData* self, RegisterMasterRequest c
|
|||
if ( it.second.priorityInfo.isExcluded != isExcludedFromConfig ) {
|
||||
it.second.priorityInfo.isExcluded = isExcludedFromConfig;
|
||||
if( !it.second.reply.isSet() ) {
|
||||
it.second.reply.send( RegisterWorkerReply( it.second.details.processClass, it.second.priorityInfo, it.second.storageCacheInfo ) );
|
||||
it.second.reply.send( RegisterWorkerReply( it.second.details.processClass, it.second.priorityInfo ) );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -2228,56 +2173,10 @@ void registerWorker( RegisterWorkerRequest req, ClusterControllerData *self ) {
|
|||
}
|
||||
}
|
||||
}
|
||||
Optional<uint16_t> newStorageCache = req.storageCacheInterf.present() ? req.storageCacheInterf.get().first : Optional<uint16_t>();
|
||||
auto& cacheInfo = self->id_worker[w.locality.processId()].storageCacheInfo;
|
||||
if (req.storageCacheInterf.present()) {
|
||||
auto it = self->db.cacheInterfaces.find(req.storageCacheInterf.get().first);
|
||||
if(it == self->db.cacheInterfaces.end()) {
|
||||
if(self->db.cachePopulated) {
|
||||
if(cacheInfo.present()) {
|
||||
self->db.clearStorageCache(cacheInfo.get());
|
||||
}
|
||||
newStorageCache = Optional<uint16_t>();
|
||||
cacheInfo = Optional<uint16_t>();
|
||||
} else {
|
||||
self->db.setStorageCache(req.storageCacheInterf.get().first, req.storageCacheInterf.get().second);
|
||||
self->db.cacheInterfaces[req.storageCacheInterf.get().first] = std::make_pair(req.storageCacheInterf.get().second, w.locality.processId());
|
||||
cacheInfo = req.storageCacheInterf.get().first;
|
||||
}
|
||||
} else {
|
||||
if(!it->second.second.present() || (cacheInfo.present() && cacheInfo.get() == it->first) ) {
|
||||
self->db.setStorageCache(req.storageCacheInterf.get().first, req.storageCacheInterf.get().second);
|
||||
it->second = std::make_pair(req.storageCacheInterf.get().second, w.locality.processId());
|
||||
cacheInfo = req.storageCacheInterf.get().first;
|
||||
}
|
||||
else {
|
||||
if(cacheInfo.present()) {
|
||||
self->db.clearStorageCache(cacheInfo.get());
|
||||
}
|
||||
newStorageCache = Optional<uint16_t>();
|
||||
cacheInfo = Optional<uint16_t>();
|
||||
}
|
||||
}
|
||||
} else {
|
||||
newStorageCache = cacheInfo;
|
||||
}
|
||||
|
||||
if(self->gotProcessClasses && newProcessClass == ProcessClass::StorageCacheClass && !newStorageCache.present()) {
|
||||
for(auto& it : self->db.cacheInterfaces) {
|
||||
if(!it.second.second.present()) {
|
||||
it.second.second = w.locality.processId();
|
||||
self->id_worker[w.locality.processId()].storageCacheInfo = it.first;
|
||||
newStorageCache = it.first;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Notify the worker to register again with new process class/exclusive property
|
||||
if ( !req.reply.isSet() && ( newPriorityInfo != req.priorityInfo ||
|
||||
newStorageCache.present() != req.storageCacheInterf.present() ||
|
||||
(newStorageCache.present() && newStorageCache.get() != req.storageCacheInterf.get().first) ) ) {
|
||||
req.reply.send( RegisterWorkerReply(newProcessClass, newPriorityInfo, newStorageCache) );
|
||||
if ( !req.reply.isSet() && newPriorityInfo != req.priorityInfo ) {
|
||||
req.reply.send( RegisterWorkerReply(newProcessClass, newPriorityInfo) );
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2504,7 +2403,7 @@ ACTOR Future<Void> monitorProcessClasses(ClusterControllerData *self) {
|
|||
w.second.details.processClass = newProcessClass;
|
||||
w.second.priorityInfo.processClassFitness = newProcessClass.machineClassFitness(ProcessClass::ClusterController);
|
||||
if (!w.second.reply.isSet()) {
|
||||
w.second.reply.send( RegisterWorkerReply(w.second.details.processClass, w.second.priorityInfo, w.second.storageCacheInfo) );
|
||||
w.second.reply.send( RegisterWorkerReply(w.second.details.processClass, w.second.priorityInfo) );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -2558,81 +2457,7 @@ ACTOR Future<Void> monitorServerInfoConfig(ClusterControllerData::DBInfo* db) {
|
|||
break;
|
||||
}
|
||||
catch (Error &e) {
|
||||
wait(tr.onError(e));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ACTOR Future<Void> monitorStorageCache(ClusterControllerData* self) {
|
||||
loop {
|
||||
state ReadYourWritesTransaction tr(self->db.db);
|
||||
loop {
|
||||
try {
|
||||
tr.setOption(FDBTransactionOptions::READ_SYSTEM_KEYS);
|
||||
tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
|
||||
tr.setOption(FDBTransactionOptions::READ_LOCK_AWARE);
|
||||
|
||||
Optional<Value> changeVal = wait(tr.get(cacheChangeKey));
|
||||
Standalone<RangeResultRef> changeKeys = wait(tr.getRange(cacheChangeKeys, CLIENT_KNOBS->TOO_MANY));
|
||||
ASSERT( !changeKeys.more && changeKeys.size() < CLIENT_KNOBS->TOO_MANY );
|
||||
|
||||
std::set<uint16_t> changeIDs;
|
||||
for(auto& it : changeKeys) {
|
||||
changeIDs.insert(cacheChangeKeyDecodeIndex(it.key));
|
||||
}
|
||||
|
||||
for(auto& it : changeIDs) {
|
||||
if(!self->db.cacheInterfaces.count(it)) {
|
||||
self->db.cacheInterfaces[it] = std::make_pair(Optional<StorageServerInterface>(), Optional<Key>());
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<uint16_t> removeIDs;
|
||||
for(auto& it : self->db.cacheInterfaces) {
|
||||
if(!changeIDs.count(it.first)) {
|
||||
removeIDs.push_back(it.first);
|
||||
if(it.second.second.present()) {
|
||||
self->id_worker[it.second.second.get()].storageCacheInfo = Optional<uint16_t>();
|
||||
}
|
||||
self->db.clearStorageCache(it.first);
|
||||
}
|
||||
}
|
||||
|
||||
for(auto& it : removeIDs) {
|
||||
self->db.cacheInterfaces.erase(it);
|
||||
}
|
||||
|
||||
for(auto& c : self->db.cacheInterfaces) {
|
||||
if(!c.second.second.present()) {
|
||||
bool found = false;
|
||||
for(auto& it : self->id_worker) {
|
||||
if(!it.second.storageCacheInfo.present() && it.second.details.processClass == ProcessClass::StorageCacheClass) {
|
||||
found = true;
|
||||
it.second.storageCacheInfo = c.first;
|
||||
c.second.second = it.first;
|
||||
if(!it.second.reply.isSet()) {
|
||||
it.second.reply.send( RegisterWorkerReply(it.second.details.processClass, it.second.priorityInfo, c.first) );
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
if(!found) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
state Future<Void> configChangeFuture = tr.watch(cacheChangeKey);
|
||||
|
||||
self->db.cachePopulated = true;
|
||||
wait(tr.commit());
|
||||
wait(configChangeFuture);
|
||||
|
||||
break;
|
||||
}
|
||||
catch (Error &e) {
|
||||
wait(tr.onError(e));
|
||||
wait(tr.onError(e));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -2688,7 +2513,7 @@ ACTOR Future<Void> updatedChangingDatacenters(ClusterControllerData *self) {
|
|||
if ( worker.priorityInfo.dcFitness > newFitness ) {
|
||||
worker.priorityInfo.dcFitness = newFitness;
|
||||
if(!worker.reply.isSet()) {
|
||||
worker.reply.send( RegisterWorkerReply( worker.details.processClass, worker.priorityInfo, worker.storageCacheInfo ) );
|
||||
worker.reply.send( RegisterWorkerReply( worker.details.processClass, worker.priorityInfo ) );
|
||||
}
|
||||
} else {
|
||||
state int currentFit = ProcessClass::BestFit;
|
||||
|
@ -2701,7 +2526,7 @@ ACTOR Future<Void> updatedChangingDatacenters(ClusterControllerData *self) {
|
|||
updated = true;
|
||||
it.second.priorityInfo.dcFitness = fitness;
|
||||
if(!it.second.reply.isSet()) {
|
||||
it.second.reply.send( RegisterWorkerReply( it.second.details.processClass, it.second.priorityInfo, it.second.storageCacheInfo ) );
|
||||
it.second.reply.send( RegisterWorkerReply( it.second.details.processClass, it.second.priorityInfo ) );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -2740,7 +2565,7 @@ ACTOR Future<Void> updatedChangedDatacenters(ClusterControllerData *self) {
|
|||
if( worker.priorityInfo.dcFitness != newFitness ) {
|
||||
worker.priorityInfo.dcFitness = newFitness;
|
||||
if(!worker.reply.isSet()) {
|
||||
worker.reply.send( RegisterWorkerReply( worker.details.processClass, worker.priorityInfo, worker.storageCacheInfo ) );
|
||||
worker.reply.send( RegisterWorkerReply( worker.details.processClass, worker.priorityInfo ) );
|
||||
}
|
||||
}
|
||||
} else {
|
||||
|
@ -2754,7 +2579,7 @@ ACTOR Future<Void> updatedChangedDatacenters(ClusterControllerData *self) {
|
|||
updated = true;
|
||||
it.second.priorityInfo.dcFitness = fitness;
|
||||
if(!it.second.reply.isSet()) {
|
||||
it.second.reply.send( RegisterWorkerReply( it.second.details.processClass, it.second.priorityInfo, it.second.storageCacheInfo ) );
|
||||
it.second.reply.send( RegisterWorkerReply( it.second.details.processClass, it.second.priorityInfo ) );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -2908,7 +2733,7 @@ ACTOR Future<DataDistributorInterface> startDataDistributor( ClusterControllerDa
|
|||
if (self->onMasterIsBetter(worker, ProcessClass::DataDistributor)) {
|
||||
worker = self->id_worker[self->masterProcessId.get()].details;
|
||||
}
|
||||
|
||||
|
||||
InitializeDataDistributorRequest req(deterministicRandom()->randomUniqueID());
|
||||
TraceEvent("CCDataDistributorRecruit", self->id).detail("Addr", worker.interf.address());
|
||||
|
||||
|
@ -3091,7 +2916,6 @@ ACTOR Future<Void> clusterControllerCore( ClusterControllerFullInterface interf,
|
|||
self.addActor.send( handleForcedRecoveries(&self, interf) );
|
||||
self.addActor.send( monitorDataDistributor(&self) );
|
||||
self.addActor.send( monitorRatekeeper(&self) );
|
||||
self.addActor.send( monitorStorageCache(&self) );
|
||||
self.addActor.send( dbInfoUpdater(&self) );
|
||||
self.addActor.send( traceCounters("ClusterControllerMetrics", self.id, SERVER_KNOBS->STORAGE_LOGGING_DELAY, &self.clusterControllerMetrics, self.id.toString() + "/ClusterControllerMetrics") );
|
||||
self.addActor.send( traceRole(Role::CLUSTER_CONTROLLER, interf.id()) );
|
||||
|
|
|
@ -20,6 +20,10 @@
|
|||
|
||||
#include <set>
|
||||
#include <sstream>
|
||||
#include "fdbclient/FDBOptions.g.h"
|
||||
#include "fdbclient/FDBTypes.h"
|
||||
#include "fdbclient/Knobs.h"
|
||||
#include "fdbclient/StorageServerInterface.h"
|
||||
#include "fdbclient/SystemData.h"
|
||||
#include "fdbclient/DatabaseContext.h"
|
||||
#include "fdbclient/ManagementAPI.actor.h"
|
||||
|
@ -35,9 +39,11 @@
|
|||
#include "fdbserver/TLogInterface.h"
|
||||
#include "fdbserver/WaitFailure.h"
|
||||
#include "flow/ActorCollection.h"
|
||||
#include "flow/Arena.h"
|
||||
#include "flow/Trace.h"
|
||||
#include "flow/UnitTest.h"
|
||||
#include "flow/actorcompiler.h" // This must be the last #include.
|
||||
#include "flow/serialize.h"
|
||||
|
||||
class TCTeamInfo;
|
||||
struct TCMachineInfo;
|
||||
|
@ -4853,6 +4859,56 @@ ACTOR Future<Void> ddExclusionSafetyCheck(DistributorExclusionSafetyCheckRequest
|
|||
return Void();
|
||||
}
|
||||
|
||||
ACTOR Future<Void> waitFailCacheServer(Database* db, StorageServerInterface ssi) {
|
||||
state Transaction tr(*db);
|
||||
state Key key = storageCacheServerKey(ssi.id());
|
||||
wait(waitFailureClient(ssi.waitFailure));
|
||||
loop {
|
||||
tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||
try {
|
||||
tr.addReadConflictRange(storageCacheServerKeys);
|
||||
tr.clear(key);
|
||||
wait(tr.commit());
|
||||
break;
|
||||
} catch (Error& e) {
|
||||
wait(tr.onError(e));
|
||||
}
|
||||
}
|
||||
return Void();
|
||||
}
|
||||
|
||||
ACTOR Future<Void> cacheServerWatcher(Database* db) {
|
||||
state Transaction tr(*db);
|
||||
state ActorCollection actors(false);
|
||||
state std::set<UID> knownCaches;
|
||||
loop {
|
||||
tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||
try {
|
||||
Standalone<RangeResultRef> range = wait(tr.getRange(storageCacheServerKeys, CLIENT_KNOBS->TOO_MANY));
|
||||
ASSERT(!range.more);
|
||||
std::set<UID> caches;
|
||||
for (auto& kv : range) {
|
||||
UID id;
|
||||
BinaryReader reader{kv.key.removePrefix(storageCacheServersPrefix), Unversioned()};
|
||||
reader >> id;
|
||||
caches.insert(id);
|
||||
if (knownCaches.find(id) == knownCaches.end()) {
|
||||
StorageServerInterface ssi;
|
||||
BinaryReader reader{kv.value, IncludeVersion()};
|
||||
reader >> ssi;
|
||||
actors.add(waitFailCacheServer(db, ssi));
|
||||
}
|
||||
}
|
||||
knownCaches = std::move(caches);
|
||||
tr.reset();
|
||||
wait(delay(5.0) || actors.getResult());
|
||||
ASSERT(!actors.getResult().isReady());
|
||||
} catch (Error& e) {
|
||||
wait(tr.onError(e));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ACTOR Future<Void> dataDistributor(DataDistributorInterface di, Reference<AsyncVar<struct ServerDBInfo>> db ) {
|
||||
state Reference<DataDistributorData> self( new DataDistributorData(db, di.id()) );
|
||||
state Future<Void> collection = actorCollection( self->addActor.getFuture() );
|
||||
|
@ -4865,6 +4921,7 @@ ACTOR Future<Void> dataDistributor(DataDistributorInterface di, Reference<AsyncV
|
|||
try {
|
||||
TraceEvent("DataDistributorRunning", di.id());
|
||||
self->addActor.send( waitFailureServer(di.waitFailure.getFuture()) );
|
||||
self->addActor.send(cacheServerWatcher(&cx));
|
||||
state Future<Void> distributor = reportErrorsExcept( dataDistribution(self, getShardMetricsList), "DataDistribution", di.id(), &normalDataDistributorErrors() );
|
||||
|
||||
loop choose {
|
||||
|
|
|
@ -1013,7 +1013,7 @@ private:
|
|||
ASSERT( nextPageSeq%sizeof(Page)==0 );
|
||||
|
||||
auto& p = backPage();
|
||||
memset(&p, 0, sizeof(Page)); // FIXME: unnecessary?
|
||||
memset(static_cast<void*>(&p), 0, sizeof(Page)); // FIXME: unnecessary?
|
||||
p.magic = 0xFDB;
|
||||
switch (diskQueueVersion) {
|
||||
case DiskQueueVersion::V0:
|
||||
|
|
|
@ -565,8 +565,8 @@ void ServerKnobs::initialize(bool randomize, ClientKnobs* clientKnobs, bool isSi
|
|||
init( DEGRADED_WARNING_RESET_DELAY, 7*24*60*60 );
|
||||
init( TRACE_LOG_FLUSH_FAILURE_CHECK_INTERVAL_SECONDS, 10 );
|
||||
init( TRACE_LOG_PING_TIMEOUT_SECONDS, 5.0 );
|
||||
init( MIN_DELAY_STORAGE_CANDIDACY_SECONDS, 10.0 );
|
||||
init( MAX_DELAY_STORAGE_CANDIDACY_SECONDS, 30.0 );
|
||||
init( MIN_DELAY_CC_WORST_FIT_CANDIDACY_SECONDS, 10.0 );
|
||||
init( MAX_DELAY_CC_WORST_FIT_CANDIDACY_SECONDS, 30.0 );
|
||||
init( DBINFO_FAILED_DELAY, 1.0 );
|
||||
|
||||
// Test harness
|
||||
|
@ -630,6 +630,7 @@ void ServerKnobs::initialize(bool randomize, ClientKnobs* clientKnobs, bool isSi
|
|||
|
||||
init( REDWOOD_DEFAULT_PAGE_SIZE, 4096 );
|
||||
init( REDWOOD_KVSTORE_CONCURRENT_READS, 64 );
|
||||
init( REDWOOD_COMMIT_CONCURRENT_READS, 64 );
|
||||
init( REDWOOD_PAGE_REBUILD_FILL_FACTOR, 0.66 );
|
||||
init( REDWOOD_LAZY_CLEAR_BATCH_SIZE_PAGES, 10 );
|
||||
init( REDWOOD_LAZY_CLEAR_MIN_PAGES, 0 );
|
||||
|
|
|
@ -494,8 +494,8 @@ public:
|
|||
double DEGRADED_WARNING_RESET_DELAY;
|
||||
int64_t TRACE_LOG_FLUSH_FAILURE_CHECK_INTERVAL_SECONDS;
|
||||
double TRACE_LOG_PING_TIMEOUT_SECONDS;
|
||||
double MIN_DELAY_STORAGE_CANDIDACY_SECONDS; // Listen for a leader for N seconds, and if not heard, then try to become the leader.
|
||||
double MAX_DELAY_STORAGE_CANDIDACY_SECONDS;
|
||||
double MIN_DELAY_CC_WORST_FIT_CANDIDACY_SECONDS; // Listen for a leader for N seconds, and if not heard, then try to become the leader.
|
||||
double MAX_DELAY_CC_WORST_FIT_CANDIDACY_SECONDS;
|
||||
double DBINFO_FAILED_DELAY;
|
||||
|
||||
// Test harness
|
||||
|
@ -562,6 +562,7 @@ public:
|
|||
|
||||
int REDWOOD_DEFAULT_PAGE_SIZE; // Page size for new Redwood files
|
||||
int REDWOOD_KVSTORE_CONCURRENT_READS; // Max number of simultaneous point or range reads in progress.
|
||||
int REDWOOD_COMMIT_CONCURRENT_READS; // Max number of concurrent reads done to support commit operations
|
||||
double REDWOOD_PAGE_REBUILD_FILL_FACTOR; // When rebuilding pages, start a new page after this capacity
|
||||
int REDWOOD_LAZY_CLEAR_BATCH_SIZE_PAGES; // Number of pages to try to pop from the lazy delete queue and process at once
|
||||
int REDWOOD_LAZY_CLEAR_MIN_PAGES; // Minimum number of pages to free before ending a lazy clear cycle, unless the queue is empty
|
||||
|
|
|
@ -42,8 +42,10 @@ struct LogRouterData {
|
|||
|
||||
TagData( Tag tag, Version popped, Version durableKnownCommittedVersion ) : tag(tag), popped(popped), durableKnownCommittedVersion(durableKnownCommittedVersion) {}
|
||||
|
||||
TagData(TagData&& r) BOOST_NOEXCEPT : version_messages(std::move(r.version_messages)), tag(r.tag), popped(r.popped), durableKnownCommittedVersion(r.durableKnownCommittedVersion) {}
|
||||
void operator= (TagData&& r) BOOST_NOEXCEPT {
|
||||
TagData(TagData&& r) noexcept
|
||||
: version_messages(std::move(r.version_messages)), tag(r.tag), popped(r.popped),
|
||||
durableKnownCommittedVersion(r.durableKnownCommittedVersion) {}
|
||||
void operator=(TagData&& r) noexcept {
|
||||
version_messages = std::move(r.version_messages);
|
||||
tag = r.tag;
|
||||
popped = r.popped;
|
||||
|
|
|
@ -738,10 +738,10 @@ void ILogSystem::SetPeekCursor::advanceTo(LogMessageVersion n) {
|
|||
|
||||
ACTOR Future<Void> setPeekGetMore(ILogSystem::SetPeekCursor* self, LogMessageVersion startVersion, TaskPriority taskID) {
|
||||
loop {
|
||||
//TraceEvent("LPC_GetMore1", self->randomID).detail("Start", startVersion.toString()).detail("Tag", self->tag);
|
||||
//TraceEvent("LPC_GetMore1", self->randomID).detail("Start", startVersion.toString()).detail("Tag", self->tag.toString());
|
||||
if(self->bestServer >= 0 && self->bestSet >= 0 && self->serverCursors[self->bestSet][self->bestServer]->isActive()) {
|
||||
ASSERT(!self->serverCursors[self->bestSet][self->bestServer]->hasMessage());
|
||||
//TraceEvent("LPC_GetMore2", self->randomID).detail("Start", startVersion.toString()).detail("Tag", self->tag);
|
||||
//TraceEvent("LPC_GetMore2", self->randomID).detail("Start", startVersion.toString()).detail("Tag", self->tag.toString());
|
||||
wait( self->serverCursors[self->bestSet][self->bestServer]->getMore(taskID) || self->serverCursors[self->bestSet][self->bestServer]->onFailed() );
|
||||
self->useBestSet = true;
|
||||
} else {
|
||||
|
@ -778,7 +778,7 @@ ACTOR Future<Void> setPeekGetMore(ILogSystem::SetPeekCursor* self, LogMessageVer
|
|||
} else {
|
||||
//FIXME: this will peeking way too many cursors when satellites exist, and does not need to peek bestSet cursors since we cannot get anymore data from them
|
||||
vector<Future<Void>> q;
|
||||
//TraceEvent("LPC_GetMore4", self->randomID).detail("Start", startVersion.toString()).detail("Tag", self->tag);
|
||||
//TraceEvent("LPC_GetMore4", self->randomID).detail("Start", startVersion.toString()).detail("Tag", self->tag.toString());
|
||||
for(auto& cursors : self->serverCursors) {
|
||||
for (auto& c :cursors) {
|
||||
if (!c->hasMessage()) {
|
||||
|
|
|
@ -163,18 +163,21 @@ struct GetCommitVersionReply {
|
|||
|
||||
struct GetCommitVersionRequest {
|
||||
constexpr static FileIdentifier file_identifier = 16683181;
|
||||
SpanID spanContext;
|
||||
uint64_t requestNum;
|
||||
uint64_t mostRecentProcessedRequestNum;
|
||||
UID requestingProxy;
|
||||
ReplyPromise<GetCommitVersionReply> reply;
|
||||
|
||||
GetCommitVersionRequest() { }
|
||||
GetCommitVersionRequest(uint64_t requestNum, uint64_t mostRecentProcessedRequestNum, UID requestingProxy)
|
||||
: requestNum(requestNum), mostRecentProcessedRequestNum(mostRecentProcessedRequestNum), requestingProxy(requestingProxy) {}
|
||||
GetCommitVersionRequest(SpanID spanContext, uint64_t requestNum, uint64_t mostRecentProcessedRequestNum,
|
||||
UID requestingProxy)
|
||||
: spanContext(spanContext), requestNum(requestNum), mostRecentProcessedRequestNum(mostRecentProcessedRequestNum),
|
||||
requestingProxy(requestingProxy) {}
|
||||
|
||||
template <class Ar>
|
||||
void serialize(Ar& ar) {
|
||||
serializer(ar, requestNum, mostRecentProcessedRequestNum, requestingProxy, reply);
|
||||
serializer(ar, requestNum, mostRecentProcessedRequestNum, requestingProxy, reply, spanContext);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -44,10 +44,13 @@
|
|||
#include "fdbserver/WaitFailure.h"
|
||||
#include "fdbserver/WorkerInterface.actor.h"
|
||||
#include "flow/ActorCollection.h"
|
||||
#include "flow/IRandom.h"
|
||||
#include "flow/Knobs.h"
|
||||
#include "flow/Stats.h"
|
||||
#include "flow/TDMetric.actor.h"
|
||||
#include "flow/Tracing.h"
|
||||
#include "flow/actorcompiler.h" // This must be the last #include.
|
||||
#include <tuple>
|
||||
|
||||
ACTOR Future<Void> broadcastTxnRequest(TxnStateRequest req, int sendAmount, bool sendReply) {
|
||||
state ReplyPromise<Void> reply = req.reply;
|
||||
|
@ -287,9 +290,9 @@ ACTOR Future<Void> getRate(UID myID, Reference<AsyncVar<ServerDBInfo>> db, int64
|
|||
|
||||
ACTOR Future<Void> queueTransactionStartRequests(
|
||||
Reference<AsyncVar<ServerDBInfo>> db,
|
||||
Deque<GetReadVersionRequest> *systemQueue,
|
||||
Deque<GetReadVersionRequest> *defaultQueue,
|
||||
Deque<GetReadVersionRequest> *batchQueue,
|
||||
SpannedDeque<GetReadVersionRequest> *systemQueue,
|
||||
SpannedDeque<GetReadVersionRequest> *defaultQueue,
|
||||
SpannedDeque<GetReadVersionRequest> *batchQueue,
|
||||
FutureStream<GetReadVersionRequest> readVersionRequests,
|
||||
PromiseStream<Void> GRVTimer, double *lastGRVTime,
|
||||
double *GRVBatchTime, FutureStream<double> replyTimes,
|
||||
|
@ -326,9 +329,11 @@ ACTOR Future<Void> queueTransactionStartRequests(
|
|||
if (req.priority >= TransactionPriority::IMMEDIATE) {
|
||||
stats->txnSystemPriorityStartIn += req.transactionCount;
|
||||
systemQueue->push_back(req);
|
||||
systemQueue->span->parents.insert(req.spanContext);
|
||||
} else if (req.priority >= TransactionPriority::DEFAULT) {
|
||||
stats->txnDefaultPriorityStartIn += req.transactionCount;
|
||||
defaultQueue->push_back(req);
|
||||
defaultQueue->span->parents.insert(req.spanContext);
|
||||
} else {
|
||||
// Return error for batch_priority GRV requests
|
||||
int64_t proxiesCount = std::max((int)db->get().client.proxies.size(), 1);
|
||||
|
@ -340,6 +345,7 @@ ACTOR Future<Void> queueTransactionStartRequests(
|
|||
|
||||
stats->txnBatchPriorityStartIn += req.transactionCount;
|
||||
batchQueue->push_back(req);
|
||||
batchQueue->span->parents.insert(req.spanContext);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -505,8 +511,11 @@ struct ResolutionRequestBuilder {
|
|||
// [CommitTransactionRef_Index][Resolver_Index][Read_Conflict_Range_Index_on_Resolver]
|
||||
// -> read_conflict_range's original index in the commitTransactionRef
|
||||
|
||||
ResolutionRequestBuilder( ProxyCommitData* self, Version version, Version prevVersion, Version lastReceivedVersion) : self(self), requests(self->resolvers.size()) {
|
||||
for(auto& req : requests) {
|
||||
ResolutionRequestBuilder(ProxyCommitData* self, Version version, Version prevVersion, Version lastReceivedVersion,
|
||||
Span& parentSpan)
|
||||
: self(self), requests(self->resolvers.size()) {
|
||||
for (auto& req : requests) {
|
||||
req.spanContext = parentSpan->context;
|
||||
req.prevVersion = prevVersion;
|
||||
req.version = version;
|
||||
req.lastReceivedVersion = lastReceivedVersion;
|
||||
|
@ -790,6 +799,7 @@ ACTOR Future<Void> commitBatch(
|
|||
state Optional<UID> debugID;
|
||||
state bool forceRecovery = false;
|
||||
state int batchOperations = 0;
|
||||
state Span span("MP:commitBatch"_loc);
|
||||
int64_t batchBytes = 0;
|
||||
for (int t = 0; t<trs.size(); t++) {
|
||||
batchOperations += trs[t].transaction.mutations.size();
|
||||
|
@ -812,6 +822,7 @@ ACTOR Future<Void> commitBatch(
|
|||
debugID = nondeterministicRandom()->randomUniqueID();
|
||||
g_traceBatch.addAttach("CommitAttachID", trs[t].debugID.get().first(), debugID.get().first());
|
||||
}
|
||||
span->parents.insert(trs[t].spanContext);
|
||||
}
|
||||
|
||||
if(localBatchNumber == 2 && !debugID.present() && self->firstProxy && !g_network->isSimulated()) {
|
||||
|
@ -832,7 +843,7 @@ ACTOR Future<Void> commitBatch(
|
|||
if (debugID.present())
|
||||
g_traceBatch.addEvent("CommitDebug", debugID.get().first(), "MasterProxyServer.commitBatch.GettingCommitVersion");
|
||||
|
||||
GetCommitVersionRequest req(self->commitVersionRequestNumber++, self->mostRecentProcessedRequestNumber, self->dbgid);
|
||||
GetCommitVersionRequest req(span->context, self->commitVersionRequestNumber++, self->mostRecentProcessedRequestNumber, self->dbgid);
|
||||
GetCommitVersionReply versionReply = wait( brokenPromiseToNever(self->master.getCommitVersion.getReply(req, TaskPriority::ProxyMasterVersionReply)) );
|
||||
self->mostRecentProcessedRequestNumber = versionReply.requestNum;
|
||||
|
||||
|
@ -853,7 +864,7 @@ ACTOR Future<Void> commitBatch(
|
|||
if (debugID.present())
|
||||
g_traceBatch.addEvent("CommitDebug", debugID.get().first(), "MasterProxyServer.commitBatch.GotCommitVersion");
|
||||
|
||||
ResolutionRequestBuilder requests( self, commitVersion, prevVersion, self->version );
|
||||
ResolutionRequestBuilder requests( self, commitVersion, prevVersion, self->version, span );
|
||||
int conflictRangeCount = 0;
|
||||
state int64_t maxTransactionBytes = 0;
|
||||
for (int t = 0; t<trs.size(); t++) {
|
||||
|
@ -1166,27 +1177,32 @@ ACTOR Future<Void> commitBatch(
|
|||
// We prevent this by limiting the number of versions which are semi-committed but not fully committed to be less than the MVCC window
|
||||
if(self->committedVersion.get() < commitVersion - SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS) {
|
||||
computeDuration += g_network->timer() - computeStart;
|
||||
state Span waitVersionSpan;
|
||||
while (self->committedVersion.get() < commitVersion - SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS) {
|
||||
// This should be *extremely* rare in the real world, but knob buggification should make it happen in simulation
|
||||
TEST(true); // Semi-committed pipeline limited by MVCC window
|
||||
//TraceEvent("ProxyWaitingForCommitted", self->dbgid).detail("CommittedVersion", self->committedVersion.get()).detail("NeedToCommit", commitVersion);
|
||||
waitVersionSpan = Span(deterministicRandom()->randomUniqueID(), "MP:overMaxReadTransactionLifeVersions"_loc, {span->context});
|
||||
choose{
|
||||
when(wait(self->committedVersion.whenAtLeast(commitVersion - SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS))) {
|
||||
wait(yield());
|
||||
break;
|
||||
}
|
||||
when(GetReadVersionReply v = wait(self->getConsistentReadVersion.getReply(GetReadVersionRequest(0, TransactionPriority::IMMEDIATE, GetReadVersionRequest::FLAG_CAUSAL_READ_RISKY)))) {
|
||||
if(v.version > self->committedVersion.get()) {
|
||||
when(GetReadVersionReply v = wait(self->getConsistentReadVersion.getReply(
|
||||
GetReadVersionRequest(waitVersionSpan->context, 0, TransactionPriority::IMMEDIATE,
|
||||
GetReadVersionRequest::FLAG_CAUSAL_READ_RISKY)))) {
|
||||
if (v.version > self->committedVersion.get()) {
|
||||
self->locked = v.locked;
|
||||
self->metadataVersion = v.metadataVersion;
|
||||
self->committedVersion.set(v.version);
|
||||
}
|
||||
|
||||
|
||||
if (self->committedVersion.get() < commitVersion - SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS)
|
||||
wait(delay(SERVER_KNOBS->PROXY_SPIN_DELAY));
|
||||
}
|
||||
}
|
||||
}
|
||||
waitVersionSpan = Span{};
|
||||
computeStart = g_network->timer();
|
||||
}
|
||||
|
||||
|
@ -1386,21 +1402,22 @@ ACTOR Future<Void> updateLastCommit(ProxyCommitData* self, Optional<UID> debugID
|
|||
return Void();
|
||||
}
|
||||
|
||||
ACTOR Future<GetReadVersionReply> getLiveCommittedVersion(ProxyCommitData* commitData, uint32_t flags, vector<MasterProxyInterface> *otherProxies, Optional<UID> debugID,
|
||||
ACTOR Future<GetReadVersionReply> getLiveCommittedVersion(Span parentSpan, ProxyCommitData* commitData, uint32_t flags, vector<MasterProxyInterface> *otherProxies, Optional<UID> debugID,
|
||||
int transactionCount, int systemTransactionCount, int defaultPriTransactionCount, int batchPriTransactionCount)
|
||||
{
|
||||
// Returns a version which (1) is committed, and (2) is >= the latest version reported committed (by a commit response) when this request was sent
|
||||
// (1) The version returned is the committedVersion of some proxy at some point before the request returns, so it is committed.
|
||||
// (2) No proxy on our list reported committed a higher version before this request was received, because then its committedVersion would have been higher,
|
||||
// and no other proxy could have already committed anything without first ending the epoch
|
||||
state Span span("MP:getLiveCommittedVersion"_loc, parentSpan);
|
||||
++commitData->stats.txnStartBatch;
|
||||
state vector<Future<GetReadVersionReply>> proxyVersions;
|
||||
state Future<GetReadVersionReply> replyFromMasterFuture;
|
||||
if (SERVER_KNOBS->ASK_READ_VERSION_FROM_MASTER) {
|
||||
replyFromMasterFuture = commitData->master.getLiveCommittedVersion.getReply(GetRawCommittedVersionRequest(debugID), TaskPriority::ProxyMasterVersionReply);
|
||||
replyFromMasterFuture = commitData->master.getLiveCommittedVersion.getReply(GetRawCommittedVersionRequest(span->context, debugID), TaskPriority::ProxyMasterVersionReply);
|
||||
} else {
|
||||
for (auto const& p : *otherProxies)
|
||||
proxyVersions.push_back(brokenPromiseToNever(p.getRawCommittedVersion.getReply(GetRawCommittedVersionRequest(debugID), TaskPriority::TLogConfirmRunningReply)));
|
||||
proxyVersions.push_back(brokenPromiseToNever(p.getRawCommittedVersion.getReply(GetRawCommittedVersionRequest(span->context, debugID), TaskPriority::TLogConfirmRunningReply)));
|
||||
}
|
||||
|
||||
if (!SERVER_KNOBS->ALWAYS_CAUSAL_READ_RISKY && !(flags&GetReadVersionRequest::FLAG_CAUSAL_READ_RISKY)) {
|
||||
|
@ -1508,15 +1525,16 @@ ACTOR static Future<Void> transactionStarter(
|
|||
state TransactionRateInfo normalRateInfo(10);
|
||||
state TransactionRateInfo batchRateInfo(0);
|
||||
|
||||
state Deque<GetReadVersionRequest> systemQueue;
|
||||
state Deque<GetReadVersionRequest> defaultQueue;
|
||||
state Deque<GetReadVersionRequest> batchQueue;
|
||||
state SpannedDeque<GetReadVersionRequest> systemQueue("MP:transactionStarterSystemQueue"_loc);
|
||||
state SpannedDeque<GetReadVersionRequest> defaultQueue("MP:transactionStarterDefaultQueue"_loc);
|
||||
state SpannedDeque<GetReadVersionRequest> batchQueue("MP:transactionStarterBatchQueue"_loc);
|
||||
state vector<MasterProxyInterface> otherProxies;
|
||||
|
||||
state TransactionTagMap<uint64_t> transactionTagCounter;
|
||||
state PrioritizedTransactionTagMap<ClientTagThrottleLimits> throttledTags;
|
||||
|
||||
state PromiseStream<double> replyTimes;
|
||||
state Span span;
|
||||
|
||||
addActor.send(getRate(proxy.id(), db, &transactionCount, &batchTransactionCount, &normalRateInfo, &batchRateInfo, healthMetricsReply, detailedHealthMetricsReply, &transactionTagCounter, &throttledTags));
|
||||
addActor.send(queueTransactionStartRequests(db, &systemQueue, &defaultQueue, &batchQueue, proxy.getConsistentReadVersion.getFuture(),
|
||||
|
@ -1558,13 +1576,16 @@ ACTOR static Future<Void> transactionStarter(
|
|||
int requestsToStart = 0;
|
||||
|
||||
while (requestsToStart < SERVER_KNOBS->START_TRANSACTION_MAX_REQUESTS_TO_START) {
|
||||
Deque<GetReadVersionRequest>* transactionQueue;
|
||||
SpannedDeque<GetReadVersionRequest>* transactionQueue;
|
||||
if(!systemQueue.empty()) {
|
||||
transactionQueue = &systemQueue;
|
||||
span = systemQueue.resetSpan();
|
||||
} else if(!defaultQueue.empty()) {
|
||||
transactionQueue = &defaultQueue;
|
||||
span = defaultQueue.resetSpan();
|
||||
} else if(!batchQueue.empty()) {
|
||||
transactionQueue = &batchQueue;
|
||||
span = batchQueue.resetSpan();
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
|
@ -1629,7 +1650,9 @@ ACTOR static Future<Void> transactionStarter(
|
|||
|
||||
for (int i = 0; i < start.size(); i++) {
|
||||
if (start[i].size()) {
|
||||
Future<GetReadVersionReply> readVersionReply = getLiveCommittedVersion(commitData, i, &otherProxies, debugID, transactionsStarted[i], systemTransactionsStarted[i], defaultPriTransactionsStarted[i], batchPriTransactionsStarted[i]);
|
||||
Future<GetReadVersionReply> readVersionReply = getLiveCommittedVersion(
|
||||
span, commitData, i, &otherProxies, debugID, transactionsStarted[i], systemTransactionsStarted[i],
|
||||
defaultPriTransactionsStarted[i], batchPriTransactionsStarted[i]);
|
||||
addActor.send(sendGrvReplies(readVersionReply, start[i], &commitData->stats,
|
||||
commitData->minKnownCommittedVersion, throttledTags));
|
||||
|
||||
|
@ -1639,6 +1662,7 @@ ACTOR static Future<Void> transactionStarter(
|
|||
}
|
||||
}
|
||||
}
|
||||
span.reset();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2097,6 +2121,7 @@ ACTOR Future<Void> masterProxyServerCore(
|
|||
}
|
||||
when(GetRawCommittedVersionRequest req = waitNext(proxy.getRawCommittedVersion.getFuture())) {
|
||||
//TraceEvent("ProxyGetRCV", proxy.id());
|
||||
Span span("MP:getRawCommittedReadVersion"_loc, { req.spanContext });
|
||||
if (req.debugID.present())
|
||||
g_traceBatch.addEvent("TransactionDebug", req.debugID.get().first(), "MasterProxyServer.masterProxyServerCore.GetRawCommittedVersion");
|
||||
GetReadVersionReply rep;
|
||||
|
|
|
@ -324,17 +324,19 @@ namespace oldTLog_4_6 {
|
|||
|
||||
TagData( Version popped, bool nothing_persistent, bool popped_recently, OldTag tag ) : nothing_persistent(nothing_persistent), popped(popped), popped_recently(popped_recently), update_version_sizes(tag != txsTagOld) {}
|
||||
|
||||
TagData(TagData&& r) BOOST_NOEXCEPT : version_messages(std::move(r.version_messages)), nothing_persistent(r.nothing_persistent), popped_recently(r.popped_recently), popped(r.popped), update_version_sizes(r.update_version_sizes) {}
|
||||
void operator= (TagData&& r) BOOST_NOEXCEPT {
|
||||
version_messages = std::move(r.version_messages);
|
||||
nothing_persistent = r.nothing_persistent;
|
||||
TagData(TagData&& r) noexcept
|
||||
: version_messages(std::move(r.version_messages)), nothing_persistent(r.nothing_persistent),
|
||||
popped_recently(r.popped_recently), popped(r.popped), update_version_sizes(r.update_version_sizes) {}
|
||||
void operator=(TagData&& r) noexcept {
|
||||
version_messages = std::move(r.version_messages);
|
||||
nothing_persistent = r.nothing_persistent;
|
||||
popped_recently = r.popped_recently;
|
||||
popped = r.popped;
|
||||
update_version_sizes = r.update_version_sizes;
|
||||
}
|
||||
}
|
||||
|
||||
// Erase messages not needed to update *from* versions >= before (thus, messages with toversion <= before)
|
||||
ACTOR Future<Void> eraseMessagesBefore( TagData *self, Version before, int64_t* gBytesErased, Reference<LogData> tlogData, TaskPriority taskID ) {
|
||||
// Erase messages not needed to update *from* versions >= before (thus, messages with toversion <= before)
|
||||
ACTOR Future<Void> eraseMessagesBefore( TagData *self, Version before, int64_t* gBytesErased, Reference<LogData> tlogData, TaskPriority taskID ) {
|
||||
while(!self->version_messages.empty() && self->version_messages.front().first < before) {
|
||||
Version version = self->version_messages.front().first;
|
||||
std::pair<int, int> &sizes = tlogData->version_sizes[version];
|
||||
|
|
|
@ -310,8 +310,10 @@ struct LogData : NonCopyable, public ReferenceCounted<LogData> {
|
|||
|
||||
TagData( Tag tag, Version popped, bool nothingPersistent, bool poppedRecently, bool unpoppedRecovered ) : tag(tag), nothingPersistent(nothingPersistent), popped(popped), poppedRecently(poppedRecently), unpoppedRecovered(unpoppedRecovered) {}
|
||||
|
||||
TagData(TagData&& r) BOOST_NOEXCEPT : versionMessages(std::move(r.versionMessages)), nothingPersistent(r.nothingPersistent), poppedRecently(r.poppedRecently), popped(r.popped), tag(r.tag), unpoppedRecovered(r.unpoppedRecovered) {}
|
||||
void operator= (TagData&& r) BOOST_NOEXCEPT {
|
||||
TagData(TagData&& r) noexcept
|
||||
: versionMessages(std::move(r.versionMessages)), nothingPersistent(r.nothingPersistent),
|
||||
poppedRecently(r.poppedRecently), popped(r.popped), tag(r.tag), unpoppedRecovered(r.unpoppedRecovered) {}
|
||||
void operator=(TagData&& r) noexcept {
|
||||
versionMessages = std::move(r.versionMessages);
|
||||
nothingPersistent = r.nothingPersistent;
|
||||
poppedRecently = r.poppedRecently;
|
||||
|
|
|
@ -375,8 +375,12 @@ struct LogData : NonCopyable, public ReferenceCounted<LogData> {
|
|||
|
||||
TagData( Tag tag, Version popped, IDiskQueue::location poppedLocation, bool nothingPersistent, bool poppedRecently, bool unpoppedRecovered ) : tag(tag), nothingPersistent(nothingPersistent), poppedRecently(poppedRecently), popped(popped), persistentPopped(0), versionForPoppedLocation(0), poppedLocation(poppedLocation), unpoppedRecovered(unpoppedRecovered) {}
|
||||
|
||||
TagData(TagData&& r) BOOST_NOEXCEPT : versionMessages(std::move(r.versionMessages)), nothingPersistent(r.nothingPersistent), poppedRecently(r.poppedRecently), popped(r.popped), persistentPopped(r.persistentPopped), versionForPoppedLocation(r.versionForPoppedLocation), poppedLocation(r.poppedLocation), tag(r.tag), unpoppedRecovered(r.unpoppedRecovered) {}
|
||||
void operator= (TagData&& r) BOOST_NOEXCEPT {
|
||||
TagData(TagData&& r) noexcept
|
||||
: versionMessages(std::move(r.versionMessages)), nothingPersistent(r.nothingPersistent),
|
||||
poppedRecently(r.poppedRecently), popped(r.popped), persistentPopped(r.persistentPopped),
|
||||
versionForPoppedLocation(r.versionForPoppedLocation), poppedLocation(r.poppedLocation), tag(r.tag),
|
||||
unpoppedRecovered(r.unpoppedRecovered) {}
|
||||
void operator=(TagData&& r) noexcept {
|
||||
versionMessages = std::move(r.versionMessages);
|
||||
nothingPersistent = r.nothingPersistent;
|
||||
poppedRecently = r.poppedRecently;
|
||||
|
|
|
@ -20,9 +20,15 @@
|
|||
|
||||
#ifndef FDBSERVER_RESOLVERINTERFACE_H
|
||||
#define FDBSERVER_RESOLVERINTERFACE_H
|
||||
#include "fdbclient/CommitTransaction.h"
|
||||
#include "fdbrpc/Locality.h"
|
||||
#include "fdbrpc/fdbrpc.h"
|
||||
#pragma once
|
||||
|
||||
#include "fdbrpc/Locality.h"
|
||||
#include "fdbrpc/fdbrpc.h"
|
||||
#include "fdbclient/FDBTypes.h"
|
||||
#include "fdbclient/CommitTransaction.h"
|
||||
|
||||
struct ResolverInterface {
|
||||
constexpr static FileIdentifier file_identifier = 1755944;
|
||||
|
@ -91,17 +97,19 @@ struct ResolveTransactionBatchRequest {
|
|||
constexpr static FileIdentifier file_identifier = 16462858;
|
||||
Arena arena;
|
||||
|
||||
SpanID spanContext;
|
||||
Version prevVersion;
|
||||
Version version; // FIXME: ?
|
||||
Version lastReceivedVersion;
|
||||
VectorRef<CommitTransactionRef> transactions;
|
||||
VectorRef<struct CommitTransactionRef> transactions;
|
||||
VectorRef<int> txnStateTransactions; // Offsets of elements of transactions that have (transaction subsystem state) mutations
|
||||
ReplyPromise<ResolveTransactionBatchReply> reply;
|
||||
Optional<UID> debugID;
|
||||
|
||||
template <class Archive>
|
||||
void serialize(Archive& ar) {
|
||||
serializer(ar, prevVersion, version, lastReceivedVersion, transactions, txnStateTransactions, reply, arena, debugID);
|
||||
serializer(ar, prevVersion, version, lastReceivedVersion, transactions, txnStateTransactions, reply, arena,
|
||||
debugID, spanContext);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -83,6 +83,7 @@ ACTOR Future<Void> restoreApplierCore(RestoreApplierInterface applierInterf, int
|
|||
updateProcessStats(self);
|
||||
updateProcessStatsTimer = delay(SERVER_KNOBS->FASTRESTORE_UPDATE_PROCESS_STATS_INTERVAL);
|
||||
}
|
||||
when(wait(actors.getResult())) {}
|
||||
when(wait(exitRole)) {
|
||||
TraceEvent("RestoreApplierCoreExitRole", self->id());
|
||||
break;
|
||||
|
@ -92,6 +93,7 @@ ACTOR Future<Void> restoreApplierCore(RestoreApplierInterface applierInterf, int
|
|||
TraceEvent(SevWarn, "FastRestoreApplierError", self->id())
|
||||
.detail("RequestType", requestTypeStr)
|
||||
.error(e, true);
|
||||
actors.clear(false);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -179,7 +181,6 @@ ACTOR static Future<Void> applyClearRangeMutations(Standalone<VectorRef<KeyRange
|
|||
.detail("DelayTime", delayTime);
|
||||
loop {
|
||||
try {
|
||||
tr->reset();
|
||||
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||
tr->setOption(FDBTransactionOptions::LOCK_AWARE);
|
||||
for (auto& range : ranges) {
|
||||
|
@ -216,47 +217,50 @@ ACTOR static Future<Void> getAndComputeStagingKeys(
|
|||
std::map<Key, std::map<Key, StagingKey>::iterator> incompleteStagingKeys, double delayTime, Database cx,
|
||||
UID applierID, int batchIndex) {
|
||||
state Reference<ReadYourWritesTransaction> tr(new ReadYourWritesTransaction(cx));
|
||||
state std::vector<Future<Optional<Value>>> fValues;
|
||||
state std::vector<Future<Optional<Value>>> fValues(incompleteStagingKeys.size(), Never());
|
||||
state int retries = 0;
|
||||
state UID randomID = deterministicRandom()->randomUniqueID();
|
||||
|
||||
wait(delay(delayTime + deterministicRandom()->random01() * delayTime));
|
||||
TraceEvent("FastRestoreApplierGetAndComputeStagingKeysStart", applierID)
|
||||
.detail("RandomUID", randomID)
|
||||
.detail("BatchIndex", batchIndex)
|
||||
.detail("GetKeys", incompleteStagingKeys.size())
|
||||
.detail("DelayTime", delayTime);
|
||||
|
||||
loop {
|
||||
try {
|
||||
tr->reset();
|
||||
int i = 0;
|
||||
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||
tr->setOption(FDBTransactionOptions::LOCK_AWARE);
|
||||
for (auto& key : incompleteStagingKeys) {
|
||||
fValues.push_back(tr->get(key.first));
|
||||
fValues[i++] = tr->get(key.first);
|
||||
}
|
||||
wait(waitForAll(fValues));
|
||||
break;
|
||||
} catch (Error& e) {
|
||||
if (retries++ > 10) { // TODO: Can we stop retry at the first error?
|
||||
TraceEvent(SevWarn, "FastRestoreApplierGetAndComputeStagingKeysGetKeysStuck", applierID)
|
||||
if (retries++ > incompleteStagingKeys.size()) {
|
||||
TraceEvent(SevWarnAlways, "GetAndComputeStagingKeys", applierID)
|
||||
.suppressFor(1.0)
|
||||
.detail("RandomUID", randomID)
|
||||
.detail("BatchIndex", batchIndex)
|
||||
.detail("GetKeys", incompleteStagingKeys.size())
|
||||
.error(e);
|
||||
break;
|
||||
}
|
||||
wait(tr->onError(e));
|
||||
fValues.clear();
|
||||
}
|
||||
}
|
||||
|
||||
ASSERT(fValues.size() == incompleteStagingKeys.size());
|
||||
int i = 0;
|
||||
for (auto& key : incompleteStagingKeys) {
|
||||
if (!fValues[i].get().present()) { // Debug info to understand which key does not exist in DB
|
||||
if (!fValues[i].get().present()) { // Key not exist in DB
|
||||
// if condition: fValues[i].Valid() && fValues[i].isReady() && !fValues[i].isError() &&
|
||||
TraceEvent(SevWarn, "FastRestoreApplierGetAndComputeStagingKeysNoBaseValueInDB", applierID)
|
||||
.detail("BatchIndex", batchIndex)
|
||||
.detail("Key", key.first)
|
||||
.detail("Reason", "Not found in DB")
|
||||
.detail("IsReady", fValues[i].isReady())
|
||||
.detail("PendingMutations", key.second->second.pendingMutations.size())
|
||||
.detail("StagingKeyType", (int)key.second->second.type);
|
||||
.detail("StagingKeyType", getTypeString(key.second->second.type));
|
||||
for (auto& vm : key.second->second.pendingMutations) {
|
||||
TraceEvent(SevWarn, "FastRestoreApplierGetAndComputeStagingKeysNoBaseValueInDB")
|
||||
.detail("PendingMutationVersion", vm.first.toString())
|
||||
|
@ -274,8 +278,10 @@ ACTOR static Future<Void> getAndComputeStagingKeys(
|
|||
}
|
||||
|
||||
TraceEvent("FastRestoreApplierGetAndComputeStagingKeysDone", applierID)
|
||||
.detail("RandomUID", randomID)
|
||||
.detail("BatchIndex", batchIndex)
|
||||
.detail("GetKeys", incompleteStagingKeys.size());
|
||||
.detail("GetKeys", incompleteStagingKeys.size())
|
||||
.detail("DelayTime", delayTime);
|
||||
|
||||
return Void();
|
||||
}
|
||||
|
@ -404,7 +410,6 @@ ACTOR static Future<Void> applyStagingKeysBatch(std::map<Key, StagingKey>::itera
|
|||
TraceEvent("FastRestoreApplierPhaseApplyStagingKeysBatch", applierID).detail("Begin", begin->first);
|
||||
loop {
|
||||
try {
|
||||
tr->reset();
|
||||
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||
tr->setOption(FDBTransactionOptions::LOCK_AWARE);
|
||||
std::map<Key, StagingKey>::iterator iter = begin;
|
||||
|
@ -502,6 +507,7 @@ ACTOR static Future<Void> handleApplyToDBRequest(RestoreVersionBatchRequest req,
|
|||
.detail("FinishedBatch", self->finishedBatch.get());
|
||||
|
||||
// Ensure batch (i-1) is applied before batch i
|
||||
// TODO: Add a counter to warn when too many requests are waiting on the actor
|
||||
wait(self->finishedBatch.whenAtLeast(req.batchIndex - 1));
|
||||
|
||||
state bool isDuplicated = true;
|
||||
|
@ -523,6 +529,8 @@ ACTOR static Future<Void> handleApplyToDBRequest(RestoreVersionBatchRequest req,
|
|||
}
|
||||
|
||||
ASSERT(batchData->dbApplier.present());
|
||||
ASSERT(!batchData->dbApplier.get().isError()); // writeMutationsToDB actor cannot have error.
|
||||
// We cannot blindly retry because it is not idempodent
|
||||
|
||||
wait(batchData->dbApplier.get());
|
||||
|
||||
|
@ -578,4 +586,4 @@ Value applyAtomicOp(Optional<StringRef> existingValue, Value value, MutationRef:
|
|||
ASSERT(false);
|
||||
}
|
||||
return Value();
|
||||
}
|
||||
}
|
|
@ -123,7 +123,8 @@ struct StagingKey {
|
|||
.detail("Value", val)
|
||||
.detail("MType", type < MutationRef::MAX_ATOMIC_OP ? getTypeString(type) : "[Unset]")
|
||||
.detail("LargestPendingVersion",
|
||||
(pendingMutations.empty() ? "[none]" : pendingMutations.rbegin()->first.toString()));
|
||||
(pendingMutations.empty() ? "[none]" : pendingMutations.rbegin()->first.toString()))
|
||||
.detail("PendingMutations", pendingMutations.size());
|
||||
std::map<LogMessageVersion, Standalone<MutationRef>>::iterator lb = pendingMutations.lower_bound(version);
|
||||
if (lb == pendingMutations.end()) {
|
||||
return;
|
||||
|
|
|
@ -122,7 +122,7 @@ Future<Void> RestoreConfigFR::logError(Database cx, Error e, std::string const&
|
|||
}
|
||||
TraceEvent t(SevWarn, "FileRestoreError");
|
||||
t.error(e).detail("RestoreUID", uid).detail("Description", details).detail("TaskInstance", (uint64_t)taskInstance);
|
||||
// These should not happen
|
||||
// key_not_found could happen
|
||||
if (e.code() == error_code_key_not_found) t.backtrace();
|
||||
|
||||
return updateErrorInfo(cx, e, details);
|
||||
|
|
|
@ -111,13 +111,17 @@ ACTOR Future<Void> restoreLoaderCore(RestoreLoaderInterface loaderInterf, int no
|
|||
updateProcessStats(self);
|
||||
updateProcessStatsTimer = delay(SERVER_KNOBS->FASTRESTORE_UPDATE_PROCESS_STATS_INTERVAL);
|
||||
}
|
||||
when(wait(actors.getResult())) {}
|
||||
when(wait(exitRole)) {
|
||||
TraceEvent("FastRestoreLoaderCoreExitRole", self->id());
|
||||
break;
|
||||
}
|
||||
}
|
||||
} catch (Error& e) {
|
||||
TraceEvent(SevWarn, "FastRestoreLoader", self->id()).detail("RequestType", requestTypeStr).error(e, true);
|
||||
TraceEvent(SevWarn, "FastRestoreLoaderError", self->id())
|
||||
.detail("RequestType", requestTypeStr)
|
||||
.error(e, true);
|
||||
actors.clear(false);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -198,17 +198,25 @@ ACTOR Future<Void> startProcessRestoreRequests(Reference<RestoreMasterData> self
|
|||
|
||||
TraceEvent("FastRestoreMasterWaitOnRestoreRequests", self->id()).detail("RestoreRequests", restoreRequests.size());
|
||||
|
||||
// DB has been locked where restore request is submitted
|
||||
wait(clearDB(cx));
|
||||
// TODO: Sanity check restoreRequests' key ranges do not overlap
|
||||
|
||||
// Step: Perform the restore requests
|
||||
try {
|
||||
for (restoreIndex = 0; restoreIndex < restoreRequests.size(); restoreIndex++) {
|
||||
RestoreRequest& request = restoreRequests[restoreIndex];
|
||||
state RestoreRequest request = restoreRequests[restoreIndex];
|
||||
TraceEvent("FastRestoreMasterProcessRestoreRequests", self->id())
|
||||
.detail("RestoreRequestInfo", request.toString());
|
||||
// TODO: Initialize MasterData and all loaders and appliers' data for each restore request!
|
||||
self->resetPerRestoreRequest();
|
||||
|
||||
// clear the key range that will be restored
|
||||
wait(runRYWTransaction(cx, [=](Reference<ReadYourWritesTransaction> tr) -> Future<Void> {
|
||||
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||
tr->setOption(FDBTransactionOptions::LOCK_AWARE);
|
||||
tr->clear(request.range);
|
||||
return Void();
|
||||
}));
|
||||
|
||||
wait(success(processRestoreRequest(self, cx, request)));
|
||||
wait(notifyRestoreCompleted(self, false));
|
||||
}
|
||||
|
@ -637,7 +645,6 @@ ACTOR static Future<Standalone<VectorRef<RestoreRequest>>> collectRestoreRequest
|
|||
loop {
|
||||
try {
|
||||
TraceEvent("FastRestoreMasterPhaseCollectRestoreRequestsWait");
|
||||
tr.reset();
|
||||
tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||
tr.setOption(FDBTransactionOptions::LOCK_AWARE);
|
||||
|
||||
|
@ -866,6 +873,7 @@ ACTOR static Future<Void> notifyApplierToApplyMutations(Reference<MasterBatchDat
|
|||
}
|
||||
|
||||
ASSERT(batchData->applyToDB.present());
|
||||
ASSERT(!batchData->applyToDB.get().isError());
|
||||
wait(batchData->applyToDB.get());
|
||||
|
||||
// Sanity check all appliers have applied data to destination DB
|
||||
|
@ -943,7 +951,7 @@ ACTOR static Future<Void> notifyRestoreCompleted(Reference<RestoreMasterData> se
|
|||
ACTOR static Future<Void> signalRestoreCompleted(Reference<RestoreMasterData> self, Database cx) {
|
||||
state Reference<ReadYourWritesTransaction> tr(new ReadYourWritesTransaction(cx));
|
||||
|
||||
wait(notifyRestoreCompleted(self, true));
|
||||
wait(notifyRestoreCompleted(self, true)); // notify workers the restore has completed
|
||||
|
||||
wait(delay(5.0)); // Give some time for loaders and appliers to exit
|
||||
|
||||
|
|
|
@ -249,7 +249,6 @@ ACTOR Future<Void> startRestoreWorker(Reference<RestoreWorkerData> self, Restore
|
|||
}
|
||||
|
||||
ACTOR static Future<Void> waitOnRestoreRequests(Database cx, UID nodeID = UID()) {
|
||||
state Future<Void> watch4RestoreRequest;
|
||||
state ReadYourWritesTransaction tr(cx);
|
||||
state Optional<Value> numRequests;
|
||||
|
||||
|
@ -263,10 +262,10 @@ ACTOR static Future<Void> waitOnRestoreRequests(Database cx, UID nodeID = UID())
|
|||
Optional<Value> _numRequests = wait(tr.get(restoreRequestTriggerKey));
|
||||
numRequests = _numRequests;
|
||||
if (!numRequests.present()) {
|
||||
watch4RestoreRequest = tr.watch(restoreRequestTriggerKey);
|
||||
state Future<Void> watchForRestoreRequest = tr.watch(restoreRequestTriggerKey);
|
||||
wait(tr.commit());
|
||||
TraceEvent(SevInfo, "FastRestoreWaitOnRestoreRequestTriggerKey", nodeID);
|
||||
wait(watch4RestoreRequest);
|
||||
wait(watchForRestoreRequest);
|
||||
TraceEvent(SevInfo, "FastRestoreDetectRestoreRequestTriggerKeyChanged", nodeID);
|
||||
} else {
|
||||
TraceEvent(SevInfo, "FastRestoreRestoreRequestTriggerKey", nodeID)
|
||||
|
|
|
@ -55,7 +55,6 @@ struct ServerDBInfo {
|
|||
LogSystemConfig logSystemConfig;
|
||||
std::vector<UID> priorCommittedLogServers; // If !fullyRecovered and logSystemConfig refers to a new log system which may not have been committed to the coordinated state yet, then priorCommittedLogServers are the previous, fully committed generation which need to stay alive in case this recovery fails
|
||||
Optional<LatencyBandConfig> latencyBandConfig;
|
||||
std::vector<std::pair<uint16_t,StorageServerInterface>> storageCaches;
|
||||
int64_t infoGeneration;
|
||||
|
||||
ServerDBInfo() : recoveryCount(0), recoveryState(RecoveryState::UNINITIALIZED), logSystemConfig(0), infoGeneration(0) {}
|
||||
|
@ -65,7 +64,7 @@ struct ServerDBInfo {
|
|||
|
||||
template <class Ar>
|
||||
void serialize( Ar& ar ) {
|
||||
serializer(ar, id, clusterInterface, client, distributor, master, ratekeeper, resolvers, recoveryCount, recoveryState, masterLifetime, logSystemConfig, priorCommittedLogServers, latencyBandConfig, storageCaches, infoGeneration);
|
||||
serializer(ar, id, clusterInterface, client, distributor, master, ratekeeper, resolvers, recoveryCount, recoveryState, masterLifetime, logSystemConfig, priorCommittedLogServers, latencyBandConfig, infoGeneration);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -1220,9 +1220,9 @@ void setupSimulatedSystem(vector<Future<Void>>* systemActors, std::string baseFo
|
|||
|
||||
//FIXME: temporarily code to test storage cache
|
||||
//TODO: caching disabled for this merge
|
||||
//if(dc==0) {
|
||||
// machines++;
|
||||
//}
|
||||
if(dc==0) {
|
||||
machines++;
|
||||
}
|
||||
|
||||
int useSeedForMachine = deterministicRandom()->randomInt(0, machines);
|
||||
Standalone<StringRef> zoneId;
|
||||
|
@ -1249,10 +1249,10 @@ void setupSimulatedSystem(vector<Future<Void>>* systemActors, std::string baseFo
|
|||
|
||||
//FIXME: temporarily code to test storage cache
|
||||
//TODO: caching disabled for this merge
|
||||
//if(machine==machines-1 && dc==0) {
|
||||
// processClass = ProcessClass(ProcessClass::StorageCacheClass, ProcessClass::CommandLineSource);
|
||||
// nonVersatileMachines++;
|
||||
//}
|
||||
if(machine==machines-1 && dc==0) {
|
||||
processClass = ProcessClass(ProcessClass::StorageCacheClass, ProcessClass::CommandLineSource);
|
||||
nonVersatileMachines++;
|
||||
}
|
||||
|
||||
std::vector<IPAddress> ips;
|
||||
for (int i = 0; i < processesPerMachine; i++) {
|
||||
|
|
|
@ -393,8 +393,8 @@ public:
|
|||
}
|
||||
}
|
||||
~SkipList() { destroy(); }
|
||||
SkipList(SkipList&& other) BOOST_NOEXCEPT : header(other.header) { other.header = NULL; }
|
||||
void operator=(SkipList&& other) BOOST_NOEXCEPT {
|
||||
SkipList(SkipList&& other) noexcept : header(other.header) { other.header = NULL; }
|
||||
void operator=(SkipList&& other) noexcept {
|
||||
destroy();
|
||||
header = other.header;
|
||||
other.header = NULL;
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -28,6 +28,7 @@
|
|||
#include "fdbclient/SystemData.h"
|
||||
#include "fdbclient/FDBTypes.h"
|
||||
#include "fdbserver/WorkerInterface.actor.h"
|
||||
#include "fdbserver/LogProtocolMessage.h"
|
||||
#include "fdbserver/TLogInterface.h"
|
||||
#include "fdbserver/Knobs.h"
|
||||
#include "fdbserver/IKeyValueStore.h"
|
||||
|
@ -373,8 +374,12 @@ struct LogData : NonCopyable, public ReferenceCounted<LogData> {
|
|||
|
||||
TagData( Tag tag, Version popped, IDiskQueue::location poppedLocation, bool nothingPersistent, bool poppedRecently, bool unpoppedRecovered ) : tag(tag), nothingPersistent(nothingPersistent), poppedRecently(poppedRecently), popped(popped), persistentPopped(0), versionForPoppedLocation(0), poppedLocation(poppedLocation), unpoppedRecovered(unpoppedRecovered) {}
|
||||
|
||||
TagData(TagData&& r) BOOST_NOEXCEPT : versionMessages(std::move(r.versionMessages)), nothingPersistent(r.nothingPersistent), poppedRecently(r.poppedRecently), popped(r.popped), persistentPopped(r.persistentPopped), versionForPoppedLocation(r.versionForPoppedLocation), poppedLocation(r.poppedLocation), tag(r.tag), unpoppedRecovered(r.unpoppedRecovered) {}
|
||||
void operator= (TagData&& r) BOOST_NOEXCEPT {
|
||||
TagData(TagData&& r) noexcept
|
||||
: versionMessages(std::move(r.versionMessages)), nothingPersistent(r.nothingPersistent),
|
||||
poppedRecently(r.poppedRecently), popped(r.popped), persistentPopped(r.persistentPopped),
|
||||
versionForPoppedLocation(r.versionForPoppedLocation), poppedLocation(r.poppedLocation), tag(r.tag),
|
||||
unpoppedRecovered(r.unpoppedRecovered) {}
|
||||
void operator=(TagData&& r) noexcept {
|
||||
versionMessages = std::move(r.versionMessages);
|
||||
nothingPersistent = r.nothingPersistent;
|
||||
poppedRecently = r.poppedRecently;
|
||||
|
@ -1677,7 +1682,10 @@ ACTOR Future<Void> tLogPeekMessages( TLogData* self, TLogPeekRequest req, Refere
|
|||
reply.end = endVersion;
|
||||
reply.onlySpilled = onlySpilled;
|
||||
|
||||
//TraceEvent("TlogPeek", self->dbgid).detail("LogId", logData->logId).detail("EndVer", reply.end).detail("MsgBytes", reply.messages.expectedSize()).detail("ForAddress", req.reply.getEndpoint().getPrimaryAddress());
|
||||
//TraceEvent("TlogPeek", self->dbgid).detail("LogId", logData->logId).detail("Tag", req.tag.toString()).
|
||||
// detail("BeginVer", req.begin).detail("EndVer", reply.end).
|
||||
// detail("MsgBytes", reply.messages.expectedSize()).
|
||||
// detail("ForAddress", req.reply.getEndpoint().getPrimaryAddress());
|
||||
|
||||
if(req.sequence.present()) {
|
||||
auto& trackerData = logData->peekTracker[peekId];
|
||||
|
|
|
@ -28,6 +28,7 @@
|
|||
#include "fdbrpc/Replication.h"
|
||||
#include "fdbrpc/ReplicationUtils.h"
|
||||
#include "fdbserver/RecoveryState.h"
|
||||
#include "fdbserver/LogProtocolMessage.h"
|
||||
#include "flow/actorcompiler.h" // This must be the last #include.
|
||||
|
||||
ACTOR Future<Version> minVersionWhenReady(Future<Void> f, std::vector<Future<TLogCommitReply>> replies) {
|
||||
|
@ -690,10 +691,10 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
|
|||
}
|
||||
|
||||
Reference<IPeekCursor> peekLocal( UID dbgid, Tag tag, Version begin, Version end, bool useMergePeekCursors, int8_t peekLocality = tagLocalityInvalid ) {
|
||||
if(tag.locality >= 0 || tag.locality == tagLocalityUpgraded) {
|
||||
if(tag.locality >= 0 || tag.locality == tagLocalityUpgraded || tag.locality == tagLocalitySpecial) {
|
||||
peekLocality = tag.locality;
|
||||
}
|
||||
ASSERT(peekLocality >= 0 || peekLocality == tagLocalityUpgraded);
|
||||
ASSERT(peekLocality >= 0 || peekLocality == tagLocalityUpgraded || tag.locality == tagLocalitySpecial);
|
||||
|
||||
int bestSet = -1;
|
||||
bool foundSpecial = false;
|
||||
|
@ -702,7 +703,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
|
|||
if(tLogs[t]->logServers.size() && tLogs[t]->locality != tagLocalitySatellite) {
|
||||
logCount++;
|
||||
}
|
||||
if(tLogs[t]->logServers.size() && (tLogs[t]->locality == tagLocalitySpecial || tLogs[t]->locality == tagLocalityUpgraded || tLogs[t]->locality == peekLocality || peekLocality == tagLocalityUpgraded)) {
|
||||
if(tLogs[t]->logServers.size() && (tLogs[t]->locality == tagLocalitySpecial || tLogs[t]->locality == tagLocalityUpgraded || tLogs[t]->locality == peekLocality || peekLocality == tagLocalityUpgraded || peekLocality == tagLocalitySpecial)) {
|
||||
if( tLogs[t]->locality == tagLocalitySpecial || tLogs[t]->locality == tagLocalityUpgraded ) {
|
||||
foundSpecial = true;
|
||||
}
|
||||
|
@ -757,7 +758,8 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
|
|||
if(oldLogData[i].tLogs[t]->logServers.size() && oldLogData[i].tLogs[t]->locality != tagLocalitySatellite) {
|
||||
logCount++;
|
||||
}
|
||||
if(oldLogData[i].tLogs[t]->logServers.size() && (oldLogData[i].tLogs[t]->locality == tagLocalitySpecial || oldLogData[i].tLogs[t]->locality == tagLocalityUpgraded || oldLogData[i].tLogs[t]->locality == peekLocality || peekLocality == tagLocalityUpgraded)) {
|
||||
if(oldLogData[i].tLogs[t]->logServers.size() && (oldLogData[i].tLogs[t]->locality == tagLocalitySpecial || oldLogData[i].tLogs[t]->locality == tagLocalityUpgraded || oldLogData[i].tLogs[t]->locality == peekLocality ||
|
||||
peekLocality == tagLocalityUpgraded || peekLocality == tagLocalitySpecial)) {
|
||||
if( oldLogData[i].tLogs[t]->locality == tagLocalitySpecial || oldLogData[i].tLogs[t]->locality == tagLocalityUpgraded ) {
|
||||
nextFoundSpecial = true;
|
||||
}
|
||||
|
@ -783,8 +785,9 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
|
|||
Version thisBegin = std::max(oldLogData[i].tLogs[bestOldSet]->startVersion, begin);
|
||||
if(thisBegin < lastBegin) {
|
||||
if(thisBegin < end) {
|
||||
TraceEvent("TLogPeekLocalAddingOldBest", dbgid).detail("Tag", tag.toString()).detail("Begin", begin).detail("End", end)
|
||||
TraceEvent("TLogPeekLocalAddingOldBest", dbgid).detail("Tag", tag.toString()).detail("Begin", begin).detail("End", end).detail("BestOldSet", bestOldSet)
|
||||
.detail("LogServers", oldLogData[i].tLogs[bestOldSet]->logServerString()).detail("ThisBegin", thisBegin).detail("LastBegin", lastBegin);
|
||||
//detail("LogId", oldLogData[i].tLogs[bestOldSet]->logServers[tLogs[bestOldSet]->bestLocationFor( tag )]->get().id());
|
||||
cursors.emplace_back(new ILogSystem::MergedPeekCursor( oldLogData[i].tLogs[bestOldSet]->logServers, oldLogData[i].tLogs[bestOldSet]->bestLocationFor( tag ), oldLogData[i].tLogs[bestOldSet]->logServers.size() + 1 - oldLogData[i].tLogs[bestOldSet]->tLogReplicationFactor, tag,
|
||||
thisBegin, std::min(lastBegin, end), useMergePeekCursors, oldLogData[i].tLogs[bestOldSet]->tLogLocalities, oldLogData[i].tLogs[bestOldSet]->tLogPolicy, oldLogData[i].tLogs[bestOldSet]->tLogReplicationFactor));
|
||||
epochEnds.emplace_back(std::min(lastBegin, end));
|
||||
|
|
|
@ -531,7 +531,7 @@ static int asyncOpen(
|
|||
if (flags & SQLITE_OPEN_WAL) oflags |= IAsyncFile::OPEN_LARGE_PAGES;
|
||||
oflags |= IAsyncFile::OPEN_LOCK;
|
||||
|
||||
memset(p, 0, sizeof(VFSAsyncFile));
|
||||
memset(static_cast<void*>(p), 0, sizeof(VFSAsyncFile));
|
||||
new (p) VFSAsyncFile(zName, flags);
|
||||
try {
|
||||
// Note that SQLiteDB::open also opens the db file, so its flags and modes are important, too
|
||||
|
|
|
@ -2204,6 +2204,7 @@ struct SplitStringRef {
|
|||
// A BTree "page id" is actually a list of LogicalPageID's whose contents should be concatenated together.
|
||||
// NOTE: Uses host byte order
|
||||
typedef VectorRef<LogicalPageID> BTreePageIDRef;
|
||||
constexpr LogicalPageID maxPageID = (LogicalPageID)-1;
|
||||
|
||||
std::string toString(BTreePageIDRef id) {
|
||||
return std::string("BTreePageID") + toString(id.begin(), id.end());
|
||||
|
@ -2246,6 +2247,10 @@ struct RedwoodRecordRef {
|
|||
|
||||
inline RedwoodRecordRef withoutValue() const { return RedwoodRecordRef(key, version); }
|
||||
|
||||
inline RedwoodRecordRef withMaxPageID() const {
|
||||
return RedwoodRecordRef(key, version, StringRef((uint8_t *)&maxPageID, sizeof(maxPageID)));
|
||||
}
|
||||
|
||||
// Truncate (key, version, part) tuple to len bytes.
|
||||
void truncate(int len) {
|
||||
ASSERT(len <= key.size());
|
||||
|
@ -2988,7 +2993,8 @@ public:
|
|||
|
||||
VersionedBTree(IPager2* pager, std::string name)
|
||||
: m_pager(pager), m_writeVersion(invalidVersion), m_lastCommittedVersion(invalidVersion), m_pBuffer(nullptr),
|
||||
m_name(name) {
|
||||
m_commitReadLock(SERVER_KNOBS->REDWOOD_COMMIT_CONCURRENT_READS), m_name(name) {
|
||||
|
||||
m_lazyClearActor = 0;
|
||||
m_init = init_impl(this);
|
||||
m_latestCommit = m_init;
|
||||
|
@ -3435,6 +3441,7 @@ private:
|
|||
Version m_writeVersion;
|
||||
Version m_lastCommittedVersion;
|
||||
Version m_newOldestVersion;
|
||||
FlowLock m_commitReadLock;
|
||||
Future<Void> m_latestCommit;
|
||||
Future<Void> m_init;
|
||||
std::string m_name;
|
||||
|
@ -3872,7 +3879,7 @@ private:
|
|||
// If the decode upper boundary is the subtree upper boundary the pointers will be the same
|
||||
// For the lower boundary, if the pointers are not the same there is still a possibility
|
||||
// that the keys are the same. This happens for the first remaining subtree of an internal page
|
||||
// after the previous first subtree was cleared.
|
||||
// after the prior subtree(s) were cleared.
|
||||
return (decodeUpperBound == subtreeUpperBound) &&
|
||||
(decodeLowerBound == subtreeLowerBound || decodeLowerBound->sameExceptValue(*subtreeLowerBound));
|
||||
}
|
||||
|
@ -4126,8 +4133,13 @@ private:
|
|||
}
|
||||
|
||||
state Version writeVersion = self->getLastCommittedVersion() + 1;
|
||||
|
||||
wait(self->m_commitReadLock.take());
|
||||
state FlowLock::Releaser readLock(self->m_commitReadLock);
|
||||
state Reference<const IPage> page =
|
||||
wait(readPage(snapshot, rootID, update->decodeLowerBound, update->decodeUpperBound));
|
||||
readLock.release();
|
||||
|
||||
state BTreePage* btPage = (BTreePage*)page->begin();
|
||||
ASSERT(isLeaf == btPage->isLeaf());
|
||||
g_redwoodMetrics.level(btPage->height).pageCommitStart += 1;
|
||||
|
@ -4984,6 +4996,246 @@ public:
|
|||
Future<bool> moveLast() { return move_end(this, false); }
|
||||
};
|
||||
|
||||
// Cursor designed for short lifespans.
|
||||
// Holds references to all pages touched.
|
||||
// All record references returned from it are valid until the cursor is destroyed.
|
||||
class BTreeCursor {
|
||||
Arena arena;
|
||||
Reference<IPagerSnapshot> pager;
|
||||
std::unordered_map<LogicalPageID, Reference<const IPage>> pages;
|
||||
VersionedBTree* btree;
|
||||
bool valid;
|
||||
|
||||
struct PathEntry {
|
||||
BTreePage* btPage;
|
||||
BTreePage::BinaryTree::Cursor cursor;
|
||||
};
|
||||
VectorRef<PathEntry> path;
|
||||
|
||||
public:
|
||||
BTreeCursor() {}
|
||||
|
||||
bool isValid() const { return valid; }
|
||||
|
||||
std::string toString() const {
|
||||
std::string r;
|
||||
for (int i = 0; i < path.size(); ++i) {
|
||||
r += format("[%d/%d: %s] ", i + 1, path.size(),
|
||||
path[i].cursor.valid() ? path[i].cursor.get().toString(path[i].btPage->isLeaf()).c_str()
|
||||
: "<invalid>");
|
||||
}
|
||||
if (!valid) {
|
||||
r += " (invalid) ";
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
const RedwoodRecordRef& get() { return path.back().cursor.get(); }
|
||||
|
||||
bool inRoot() const { return path.size() == 1; }
|
||||
|
||||
// Pop and return the page cursor at the end of the path.
|
||||
// This is meant to enable range scans to consume the contents of a leaf page more efficiently.
|
||||
// Can only be used when inRoot() is true.
|
||||
BTreePage::BinaryTree::Cursor popPath() {
|
||||
BTreePage::BinaryTree::Cursor c = path.back().cursor;
|
||||
path.pop_back();
|
||||
return c;
|
||||
}
|
||||
|
||||
Future<Void> pushPage(BTreePageIDRef id, const RedwoodRecordRef& lowerBound,
|
||||
const RedwoodRecordRef& upperBound) {
|
||||
Reference<const IPage>& page = pages[id.front()];
|
||||
if (page.isValid()) {
|
||||
path.push_back(arena, { (BTreePage*)page->begin(), getCursor(page) });
|
||||
return Void();
|
||||
}
|
||||
|
||||
return map(readPage(pager, id, &lowerBound, &upperBound), [this, &page, id](Reference<const IPage> p) {
|
||||
page = p;
|
||||
path.push_back(arena, { (BTreePage*)p->begin(), getCursor(p) });
|
||||
return Void();
|
||||
});
|
||||
}
|
||||
|
||||
Future<Void> pushPage(BTreePage::BinaryTree::Cursor c) {
|
||||
const RedwoodRecordRef& rec = c.get();
|
||||
auto next = c;
|
||||
next.moveNext();
|
||||
BTreePageIDRef id = rec.getChildPage();
|
||||
return pushPage(id, rec, next.getOrUpperBound());
|
||||
}
|
||||
|
||||
Future<Void> init(VersionedBTree* btree_in, Reference<IPagerSnapshot> pager_in, BTreePageIDRef root) {
|
||||
btree = btree_in;
|
||||
pager = pager_in;
|
||||
path.reserve(arena, 6);
|
||||
valid = false;
|
||||
return pushPage(root, dbBegin, dbEnd);
|
||||
}
|
||||
|
||||
// Seeks cursor to query if it exists, the record before or after it, or an undefined and invalid
|
||||
// position between those records
|
||||
// If 0 is returned, then
|
||||
// If the cursor is valid then it points to query
|
||||
// If the cursor is not valid then the cursor points to some place in the btree such that
|
||||
// If there is a record in the tree < query then movePrev() will move to it, and
|
||||
// If there is a record in the tree > query then moveNext() will move to it.
|
||||
// If non-zero is returned then the cursor is valid and the return value is logically equivalent
|
||||
// to query.compare(cursor.get())
|
||||
ACTOR Future<int> seek_impl(BTreeCursor* self, RedwoodRecordRef query, int prefetchBytes) {
|
||||
state RedwoodRecordRef internalPageQuery = query.withMaxPageID();
|
||||
self->path = self->path.slice(0, 1);
|
||||
debug_printf("seek(%s, %d) start cursor = %s\n", query.toString().c_str(), prefetchBytes,
|
||||
self->toString().c_str());
|
||||
|
||||
loop {
|
||||
auto& entry = self->path.back();
|
||||
if (entry.btPage->isLeaf()) {
|
||||
int cmp = entry.cursor.seek(query);
|
||||
self->valid = entry.cursor.valid() && !entry.cursor.node->isDeleted();
|
||||
debug_printf("seek(%s, %d) loop exit cmp=%d cursor=%s\n", query.toString().c_str(), prefetchBytes,
|
||||
cmp, self->toString().c_str());
|
||||
return self->valid ? cmp : 0;
|
||||
}
|
||||
|
||||
// Internal page, so seek to the branch where query must be
|
||||
// Currently, after a subtree deletion internal page boundaries are still strictly adhered
|
||||
// to and will be updated if anything is inserted into the cleared range, so if the seek fails
|
||||
// or it finds an entry with a null child page then query does not exist in the BTree.
|
||||
if (entry.cursor.seekLessThan(internalPageQuery) && entry.cursor.get().value.present()) {
|
||||
debug_printf("seek(%s, %d) loop seek success cursor=%s\n", query.toString().c_str(), prefetchBytes,
|
||||
self->toString().c_str());
|
||||
Future<Void> f = self->pushPage(entry.cursor);
|
||||
|
||||
// Prefetch siblings, at least prefetchBytes, at level 2 but without jumping to another level 2
|
||||
// sibling
|
||||
if (prefetchBytes != 0 && entry.btPage->height == 2) {
|
||||
auto c = entry.cursor;
|
||||
bool fwd = prefetchBytes > 0;
|
||||
prefetchBytes = abs(prefetchBytes);
|
||||
// While we should still preload more bytes and a move in the target direction is successful
|
||||
while (prefetchBytes > 0 && (fwd ? c.moveNext() : c.movePrev())) {
|
||||
// If there is a page link, preload it.
|
||||
if (c.get().value.present()) {
|
||||
BTreePageIDRef childPage = c.get().getChildPage();
|
||||
preLoadPage(self->pager.getPtr(), childPage);
|
||||
prefetchBytes -= self->btree->m_blockSize * childPage.size();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
wait(f);
|
||||
} else {
|
||||
self->valid = false;
|
||||
debug_printf("seek(%s, %d) loop exit cmp=0 cursor=%s\n", query.toString().c_str(), prefetchBytes,
|
||||
self->toString().c_str());
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Future<int> seek(RedwoodRecordRef query, int prefetchBytes) { return seek_impl(this, query, prefetchBytes); }
|
||||
|
||||
ACTOR Future<Void> seekGTE_impl(BTreeCursor* self, RedwoodRecordRef query, int prefetchBytes) {
|
||||
debug_printf("seekGTE(%s, %d) start\n", query.toString().c_str(), prefetchBytes);
|
||||
int cmp = wait(self->seek(query, prefetchBytes));
|
||||
if (cmp > 0 || (cmp == 0 && !self->isValid())) {
|
||||
wait(self->moveNext());
|
||||
}
|
||||
return Void();
|
||||
}
|
||||
|
||||
Future<Void> seekGTE(RedwoodRecordRef query, int prefetchBytes) {
|
||||
return seekGTE_impl(this, query, prefetchBytes);
|
||||
}
|
||||
|
||||
ACTOR Future<Void> seekLT_impl(BTreeCursor* self, RedwoodRecordRef query, int prefetchBytes) {
|
||||
debug_printf("seekLT(%s, %d) start\n", query.toString().c_str(), prefetchBytes);
|
||||
int cmp = wait(self->seek(query, prefetchBytes));
|
||||
if (cmp <= 0) {
|
||||
wait(self->movePrev());
|
||||
}
|
||||
return Void();
|
||||
}
|
||||
|
||||
Future<Void> seekLT(RedwoodRecordRef query, int prefetchBytes) {
|
||||
return seekLT_impl(this, query, -prefetchBytes);
|
||||
}
|
||||
|
||||
ACTOR Future<Void> move_impl(BTreeCursor* self, bool forward) {
|
||||
// Try to the move cursor at the end of the path in the correct direction
|
||||
debug_printf("move%s() start cursor=%s\n", forward ? "Next" : "Prev", self->toString().c_str());
|
||||
while (1) {
|
||||
debug_printf("move%s() first loop cursor=%s\n", forward ? "Next" : "Prev", self->toString().c_str());
|
||||
auto& entry = self->path.back();
|
||||
bool success;
|
||||
if(entry.cursor.valid()) {
|
||||
success = forward ? entry.cursor.moveNext() : entry.cursor.movePrev();
|
||||
} else {
|
||||
success = forward ? entry.cursor.moveFirst() : false;
|
||||
}
|
||||
|
||||
// Skip over internal page entries that do not link to child pages. There should never be two in a row.
|
||||
if (success && !entry.btPage->isLeaf() && !entry.cursor.get().value.present()) {
|
||||
success = forward ? entry.cursor.moveNext() : entry.cursor.movePrev();
|
||||
ASSERT(!success || entry.cursor.get().value.present());
|
||||
}
|
||||
|
||||
// Stop if successful
|
||||
if (success) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (self->path.size() == 1) {
|
||||
self->valid = false;
|
||||
return Void();
|
||||
}
|
||||
|
||||
// Move to parent
|
||||
self->path = self->path.slice(0, self->path.size() - 1);
|
||||
}
|
||||
|
||||
// While not on a leaf page, move down to get to one.
|
||||
while (1) {
|
||||
debug_printf("move%s() second loop cursor=%s\n", forward ? "Next" : "Prev", self->toString().c_str());
|
||||
auto& entry = self->path.back();
|
||||
if (entry.btPage->isLeaf()) {
|
||||
break;
|
||||
}
|
||||
|
||||
// The last entry in an internal page could be a null link, if so move back
|
||||
if (!forward && !entry.cursor.get().value.present()) {
|
||||
ASSERT(entry.cursor.movePrev());
|
||||
ASSERT(entry.cursor.get().value.present());
|
||||
}
|
||||
|
||||
wait(self->pushPage(entry.cursor));
|
||||
auto& newEntry = self->path.back();
|
||||
ASSERT(forward ? newEntry.cursor.moveFirst() : newEntry.cursor.moveLast());
|
||||
}
|
||||
|
||||
self->valid = true;
|
||||
|
||||
debug_printf("move%s() exit cursor=%s\n", forward ? "Next" : "Prev", self->toString().c_str());
|
||||
return Void();
|
||||
}
|
||||
|
||||
Future<Void> moveNext() { return move_impl(this, true); }
|
||||
Future<Void> movePrev() { return move_impl(this, false); }
|
||||
};
|
||||
|
||||
Future<Void> initBTreeCursor(BTreeCursor* cursor, Version snapshotVersion) {
|
||||
// Only committed versions can be read.
|
||||
ASSERT(snapshotVersion <= m_lastCommittedVersion);
|
||||
Reference<IPagerSnapshot> snapshot = m_pager->getReadSnapshot(snapshotVersion);
|
||||
|
||||
// This is a ref because snapshot will continue to hold the metakey value memory
|
||||
KeyRef m = snapshot->getMetaKey();
|
||||
|
||||
return cursor->init(this, snapshot, ((MetaKey*)m.begin())->root.get());
|
||||
}
|
||||
|
||||
// Cursor is for reading and interating over user visible KV pairs at a specific version
|
||||
// KeyValueRefs returned become invalid once the cursor is moved
|
||||
class Cursor : public IStoreCursor, public ReferenceCounted<Cursor>, public FastAllocated<Cursor>, NonCopyable {
|
||||
|
@ -5264,10 +5516,13 @@ public:
|
|||
|
||||
ACTOR static Future<Standalone<RangeResultRef>> readRange_impl(KeyValueStoreRedwoodUnversioned* self, KeyRange keys,
|
||||
int rowLimit, int byteLimit) {
|
||||
state VersionedBTree::BTreeCursor cur;
|
||||
wait(self->m_tree->initBTreeCursor(&cur, self->m_tree->getLastCommittedVersion()));
|
||||
|
||||
wait(self->m_concurrentReads.take());
|
||||
state FlowLock::Releaser releaser(self->m_concurrentReads);
|
||||
|
||||
++g_redwoodMetrics.opGetRange;
|
||||
|
||||
state Standalone<RangeResultRef> result;
|
||||
state int accumulatedBytes = 0;
|
||||
ASSERT(byteLimit > 0);
|
||||
|
@ -5276,33 +5531,58 @@ public:
|
|||
return result;
|
||||
}
|
||||
|
||||
state Reference<IStoreCursor> cur = self->m_tree->readAtVersion(self->m_tree->getLastCommittedVersion());
|
||||
// Prefetch is currently only done in the forward direction
|
||||
state int prefetchBytes = rowLimit > 1 ? byteLimit : 0;
|
||||
// Prefetch is disabled for now pending some decent logic for deciding how much to fetch
|
||||
state int prefetchBytes = 0;
|
||||
|
||||
if (rowLimit > 0) {
|
||||
wait(cur->findFirstEqualOrGreater(keys.begin, prefetchBytes));
|
||||
while (cur->isValid() && cur->getKey() < keys.end) {
|
||||
KeyValueRef kv(KeyRef(result.arena(), cur->getKey()), ValueRef(result.arena(), cur->getValue()));
|
||||
accumulatedBytes += kv.expectedSize();
|
||||
result.push_back(result.arena(), kv);
|
||||
if (--rowLimit == 0 || accumulatedBytes >= byteLimit) {
|
||||
wait(cur.seekGTE(keys.begin, prefetchBytes));
|
||||
while (cur.isValid()) {
|
||||
// Read page contents without using waits
|
||||
bool isRoot = cur.inRoot();
|
||||
BTreePage::BinaryTree::Cursor leafCursor = cur.popPath();
|
||||
while(leafCursor.valid()) {
|
||||
KeyValueRef kv = leafCursor.get().toKeyValueRef();
|
||||
if(kv.key >= keys.end) {
|
||||
break;
|
||||
}
|
||||
accumulatedBytes += kv.expectedSize();
|
||||
result.push_back_deep(result.arena(), kv);
|
||||
if (--rowLimit == 0 || accumulatedBytes >= byteLimit) {
|
||||
break;
|
||||
}
|
||||
leafCursor.moveNext();
|
||||
}
|
||||
// Stop if the leaf cursor is still valid which means we hit a key or size limit or
|
||||
// if we started in the root page
|
||||
if(leafCursor.valid() || isRoot) {
|
||||
break;
|
||||
}
|
||||
wait(cur->next());
|
||||
wait(cur.moveNext());
|
||||
}
|
||||
} else {
|
||||
wait(cur->findLastLessOrEqual(keys.end));
|
||||
if (cur->isValid() && cur->getKey() == keys.end) wait(cur->prev());
|
||||
|
||||
while (cur->isValid() && cur->getKey() >= keys.begin) {
|
||||
KeyValueRef kv(KeyRef(result.arena(), cur->getKey()), ValueRef(result.arena(), cur->getValue()));
|
||||
accumulatedBytes += kv.expectedSize();
|
||||
result.push_back(result.arena(), kv);
|
||||
if (++rowLimit == 0 || accumulatedBytes >= byteLimit) {
|
||||
wait(cur.seekLT(keys.end, prefetchBytes));
|
||||
while (cur.isValid()) {
|
||||
// Read page contents without using waits
|
||||
bool isRoot = cur.inRoot();
|
||||
BTreePage::BinaryTree::Cursor leafCursor = cur.popPath();
|
||||
while(leafCursor.valid()) {
|
||||
KeyValueRef kv = leafCursor.get().toKeyValueRef();
|
||||
if(kv.key < keys.begin) {
|
||||
break;
|
||||
}
|
||||
accumulatedBytes += kv.expectedSize();
|
||||
result.push_back_deep(result.arena(), kv);
|
||||
if (++rowLimit == 0 || accumulatedBytes >= byteLimit) {
|
||||
break;
|
||||
}
|
||||
leafCursor.movePrev();
|
||||
}
|
||||
// Stop if the leaf cursor is still valid which means we hit a key or size limit or
|
||||
// if we started in the root page
|
||||
if(leafCursor.valid() || isRoot) {
|
||||
break;
|
||||
}
|
||||
wait(cur->prev());
|
||||
wait(cur.movePrev());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -5316,15 +5596,16 @@ public:
|
|||
|
||||
ACTOR static Future<Optional<Value>> readValue_impl(KeyValueStoreRedwoodUnversioned* self, Key key,
|
||||
Optional<UID> debugID) {
|
||||
state VersionedBTree::BTreeCursor cur;
|
||||
wait(self->m_tree->initBTreeCursor(&cur, self->m_tree->getLastCommittedVersion()));
|
||||
|
||||
wait(self->m_concurrentReads.take());
|
||||
state FlowLock::Releaser releaser(self->m_concurrentReads);
|
||||
|
||||
++g_redwoodMetrics.opGet;
|
||||
state Reference<IStoreCursor> cur = self->m_tree->readAtVersion(self->m_tree->getLastCommittedVersion());
|
||||
|
||||
wait(cur->findEqual(key));
|
||||
if (cur->isValid()) {
|
||||
return cur->getValue();
|
||||
wait(cur.seekGTE(key, 0));
|
||||
if (cur.isValid() && cur.get().key == key) {
|
||||
return cur.get().value.get();
|
||||
}
|
||||
return Optional<Value>();
|
||||
}
|
||||
|
@ -5335,18 +5616,20 @@ public:
|
|||
|
||||
ACTOR static Future<Optional<Value>> readValuePrefix_impl(KeyValueStoreRedwoodUnversioned* self, Key key,
|
||||
int maxLength, Optional<UID> debugID) {
|
||||
state VersionedBTree::BTreeCursor cur;
|
||||
wait(self->m_tree->initBTreeCursor(&cur, self->m_tree->getLastCommittedVersion()));
|
||||
|
||||
wait(self->m_concurrentReads.take());
|
||||
state FlowLock::Releaser releaser(self->m_concurrentReads);
|
||||
|
||||
++g_redwoodMetrics.opGet;
|
||||
state Reference<IStoreCursor> cur = self->m_tree->readAtVersion(self->m_tree->getLastCommittedVersion());
|
||||
|
||||
wait(cur->findEqual(key));
|
||||
if (cur->isValid()) {
|
||||
Value v = cur->getValue();
|
||||
wait(cur.seekGTE(key, 0));
|
||||
if (cur.isValid() && cur.get().key == key) {
|
||||
Value v = cur.get().value.get();
|
||||
int len = std::min(v.size(), maxLength);
|
||||
return Value(cur->getValue().substr(0, len));
|
||||
return Value(v.substr(0, len));
|
||||
}
|
||||
|
||||
return Optional<Value>();
|
||||
}
|
||||
|
||||
|
@ -5411,6 +5694,157 @@ KeyValue randomKV(int maxKeySize = 10, int maxValueSize = 5) {
|
|||
return kv;
|
||||
}
|
||||
|
||||
// Verify a range using a BTreeCursor.
|
||||
// Assumes that the BTree holds a single data version and the version is 0.
|
||||
ACTOR Future<int> verifyRangeBTreeCursor(VersionedBTree* btree, Key start, Key end, Version v,
|
||||
std::map<std::pair<std::string, Version>, Optional<std::string>>* written,
|
||||
int* pErrorCount) {
|
||||
state int errors = 0;
|
||||
if (end <= start) end = keyAfter(start);
|
||||
|
||||
state std::map<std::pair<std::string, Version>, Optional<std::string>>::const_iterator i =
|
||||
written->lower_bound(std::make_pair(start.toString(), 0));
|
||||
state std::map<std::pair<std::string, Version>, Optional<std::string>>::const_iterator iEnd =
|
||||
written->upper_bound(std::make_pair(end.toString(), 0));
|
||||
state std::map<std::pair<std::string, Version>, Optional<std::string>>::const_iterator iLast;
|
||||
|
||||
state VersionedBTree::BTreeCursor cur;
|
||||
wait(btree->initBTreeCursor(&cur, v));
|
||||
debug_printf("VerifyRange(@%" PRId64 ", %s, %s): Start\n", v, start.printable().c_str(), end.printable().c_str());
|
||||
|
||||
// Randomly use the cursor for something else first.
|
||||
if (deterministicRandom()->coinflip()) {
|
||||
state Key randomKey = randomKV().key;
|
||||
debug_printf("VerifyRange(@%" PRId64 ", %s, %s): Dummy seek to '%s'\n", v, start.printable().c_str(),
|
||||
end.printable().c_str(), randomKey.toString().c_str());
|
||||
wait(success(cur.seek(randomKey, 0)));
|
||||
}
|
||||
|
||||
debug_printf("VerifyRange(@%" PRId64 ", %s, %s): Actual seek\n", v, start.printable().c_str(),
|
||||
end.printable().c_str());
|
||||
wait(cur.seekGTE(start, 0));
|
||||
|
||||
state std::vector<KeyValue> results;
|
||||
|
||||
while (cur.isValid() && cur.get().key < end) {
|
||||
// Find the next written kv pair that would be present at this version
|
||||
while (1) {
|
||||
iLast = i;
|
||||
if (i == iEnd) break;
|
||||
++i;
|
||||
|
||||
if (iLast->first.second <= v && iLast->second.present() &&
|
||||
(i == iEnd || i->first.first != iLast->first.first || i->first.second > v)) {
|
||||
debug_printf("VerifyRange(@%" PRId64 ", %s, %s) Found key in written map: %s\n", v,
|
||||
start.printable().c_str(), end.printable().c_str(), iLast->first.first.c_str());
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (iLast == iEnd) {
|
||||
++errors;
|
||||
++*pErrorCount;
|
||||
printf("VerifyRange(@%" PRId64 ", %s, %s) ERROR: Tree key '%s' vs nothing in written map.\n", v,
|
||||
start.printable().c_str(), end.printable().c_str(), cur.get().key.toString().c_str());
|
||||
break;
|
||||
}
|
||||
|
||||
if (cur.get().key != iLast->first.first) {
|
||||
++errors;
|
||||
++*pErrorCount;
|
||||
printf("VerifyRange(@%" PRId64 ", %s, %s) ERROR: Tree key '%s' but expected '%s'\n", v,
|
||||
start.printable().c_str(), end.printable().c_str(), cur.get().key.toString().c_str(),
|
||||
iLast->first.first.c_str());
|
||||
break;
|
||||
}
|
||||
if (cur.get().value.get() != iLast->second.get()) {
|
||||
++errors;
|
||||
++*pErrorCount;
|
||||
printf("VerifyRange(@%" PRId64 ", %s, %s) ERROR: Tree key '%s' has tree value '%s' but expected '%s'\n", v,
|
||||
start.printable().c_str(), end.printable().c_str(), cur.get().key.toString().c_str(),
|
||||
cur.get().value.get().toString().c_str(), iLast->second.get().c_str());
|
||||
break;
|
||||
}
|
||||
|
||||
ASSERT(errors == 0);
|
||||
|
||||
results.push_back(KeyValue(KeyValueRef(cur.get().key, cur.get().value.get())));
|
||||
wait(cur.moveNext());
|
||||
}
|
||||
|
||||
// Make sure there are no further written kv pairs that would be present at this version.
|
||||
while (1) {
|
||||
iLast = i;
|
||||
if (i == iEnd) break;
|
||||
++i;
|
||||
if (iLast->first.second <= v && iLast->second.present() &&
|
||||
(i == iEnd || i->first.first != iLast->first.first || i->first.second > v))
|
||||
break;
|
||||
}
|
||||
|
||||
if (iLast != iEnd) {
|
||||
++errors;
|
||||
++*pErrorCount;
|
||||
printf("VerifyRange(@%" PRId64 ", %s, %s) ERROR: Tree range ended but written has @%" PRId64 " '%s'\n", v,
|
||||
start.printable().c_str(), end.printable().c_str(), iLast->first.second, iLast->first.first.c_str());
|
||||
}
|
||||
|
||||
debug_printf("VerifyRangeReverse(@%" PRId64 ", %s, %s): start\n", v, start.printable().c_str(),
|
||||
end.printable().c_str());
|
||||
|
||||
// Randomly use a new cursor at the same version for the reverse range read, if the version is still available for
|
||||
// opening new cursors
|
||||
if (v >= btree->getOldestVersion() && deterministicRandom()->coinflip()) {
|
||||
cur = VersionedBTree::BTreeCursor();
|
||||
wait(btree->initBTreeCursor(&cur, v));
|
||||
}
|
||||
|
||||
// Now read the range from the tree in reverse order and compare to the saved results
|
||||
wait(cur.seekLT(end, 0));
|
||||
|
||||
state std::vector<KeyValue>::const_reverse_iterator r = results.rbegin();
|
||||
|
||||
while (cur.isValid() && cur.get().key >= start) {
|
||||
if (r == results.rend()) {
|
||||
++errors;
|
||||
++*pErrorCount;
|
||||
printf("VerifyRangeReverse(@%" PRId64 ", %s, %s) ERROR: Tree key '%s' vs nothing in written map.\n", v,
|
||||
start.printable().c_str(), end.printable().c_str(), cur.get().key.toString().c_str());
|
||||
break;
|
||||
}
|
||||
|
||||
if (cur.get().key != r->key) {
|
||||
++errors;
|
||||
++*pErrorCount;
|
||||
printf("VerifyRangeReverse(@%" PRId64 ", %s, %s) ERROR: Tree key '%s' but expected '%s'\n", v,
|
||||
start.printable().c_str(), end.printable().c_str(), cur.get().key.toString().c_str(),
|
||||
r->key.toString().c_str());
|
||||
break;
|
||||
}
|
||||
if (cur.get().value.get() != r->value) {
|
||||
++errors;
|
||||
++*pErrorCount;
|
||||
printf("VerifyRangeReverse(@%" PRId64
|
||||
", %s, %s) ERROR: Tree key '%s' has tree value '%s' but expected '%s'\n",
|
||||
v, start.printable().c_str(), end.printable().c_str(), cur.get().key.toString().c_str(),
|
||||
cur.get().value.get().toString().c_str(), r->value.toString().c_str());
|
||||
break;
|
||||
}
|
||||
|
||||
++r;
|
||||
wait(cur.movePrev());
|
||||
}
|
||||
|
||||
if (r != results.rend()) {
|
||||
++errors;
|
||||
++*pErrorCount;
|
||||
printf("VerifyRangeReverse(@%" PRId64 ", %s, %s) ERROR: Tree range ended but written has '%s'\n", v,
|
||||
start.printable().c_str(), end.printable().c_str(), r->key.toString().c_str());
|
||||
}
|
||||
|
||||
return errors;
|
||||
}
|
||||
|
||||
ACTOR Future<int> verifyRange(VersionedBTree* btree, Key start, Key end, Version v,
|
||||
std::map<std::pair<std::string, Version>, Optional<std::string>>* written,
|
||||
int* pErrorCount) {
|
||||
|
@ -5607,6 +6041,58 @@ ACTOR Future<int> seekAll(VersionedBTree* btree, Version v,
|
|||
return errors;
|
||||
}
|
||||
|
||||
// Verify the result of point reads for every set or cleared key at the given version
|
||||
ACTOR Future<int> seekAllBTreeCursor(VersionedBTree* btree, Version v,
|
||||
std::map<std::pair<std::string, Version>, Optional<std::string>>* written, int* pErrorCount) {
|
||||
state std::map<std::pair<std::string, Version>, Optional<std::string>>::const_iterator i = written->cbegin();
|
||||
state std::map<std::pair<std::string, Version>, Optional<std::string>>::const_iterator iEnd = written->cend();
|
||||
state int errors = 0;
|
||||
state VersionedBTree::BTreeCursor cur;
|
||||
|
||||
wait(btree->initBTreeCursor(&cur, v));
|
||||
|
||||
while (i != iEnd) {
|
||||
state std::string key = i->first.first;
|
||||
state Version ver = i->first.second;
|
||||
if (ver == v) {
|
||||
state Optional<std::string> val = i->second;
|
||||
debug_printf("Verifying @%" PRId64 " '%s'\n", ver, key.c_str());
|
||||
state Arena arena;
|
||||
wait(cur.seekGTE(RedwoodRecordRef(KeyRef(arena, key), 0), 0));
|
||||
bool foundKey = cur.isValid() && cur.get().key == key;
|
||||
bool hasValue = foundKey && cur.get().value.present();
|
||||
|
||||
if (val.present()) {
|
||||
bool valueMatch = hasValue && cur.get().value.get() == val.get();
|
||||
if (!foundKey || !hasValue || !valueMatch) {
|
||||
++errors;
|
||||
++*pErrorCount;
|
||||
if (!foundKey) {
|
||||
printf("Verify ERROR: key_not_found: '%s' -> '%s' @%" PRId64 "\n", key.c_str(),
|
||||
val.get().c_str(), ver);
|
||||
}
|
||||
else if (!hasValue) {
|
||||
printf("Verify ERROR: value_not_found: '%s' -> '%s' @%" PRId64 "\n", key.c_str(),
|
||||
val.get().c_str(), ver);
|
||||
}
|
||||
else if (!valueMatch) {
|
||||
printf("Verify ERROR: value_incorrect: for '%s' found '%s' expected '%s' @%" PRId64 "\n",
|
||||
key.c_str(), cur.get().value.get().toString().c_str(), val.get().c_str(),
|
||||
ver);
|
||||
}
|
||||
}
|
||||
} else if (foundKey && hasValue) {
|
||||
++errors;
|
||||
++*pErrorCount;
|
||||
printf("Verify ERROR: cleared_key_found: '%s' -> '%s' @%" PRId64 "\n", key.c_str(),
|
||||
cur.get().value.get().toString().c_str(), ver);
|
||||
}
|
||||
}
|
||||
++i;
|
||||
}
|
||||
return errors;
|
||||
}
|
||||
|
||||
ACTOR Future<Void> verify(VersionedBTree* btree, FutureStream<Version> vStream,
|
||||
std::map<std::pair<std::string, Version>, Optional<std::string>>* written, int* pErrorCount,
|
||||
bool serial) {
|
||||
|
@ -5637,7 +6123,13 @@ ACTOR Future<Void> verify(VersionedBTree* btree, FutureStream<Version> vStream,
|
|||
state Reference<IStoreCursor> cur = btree->readAtVersion(v);
|
||||
|
||||
debug_printf("Verifying entire key range at version %" PRId64 "\n", v);
|
||||
fRangeAll = verifyRange(btree, LiteralStringRef(""), LiteralStringRef("\xff\xff"), v, written, pErrorCount);
|
||||
if(deterministicRandom()->coinflip()) {
|
||||
fRangeAll = verifyRange(btree, LiteralStringRef(""), LiteralStringRef("\xff\xff"), v, written,
|
||||
pErrorCount);
|
||||
} else {
|
||||
fRangeAll = verifyRangeBTreeCursor(btree, LiteralStringRef(""), LiteralStringRef("\xff\xff"), v, written,
|
||||
pErrorCount);
|
||||
}
|
||||
if (serial) {
|
||||
wait(success(fRangeAll));
|
||||
}
|
||||
|
@ -5646,13 +6138,21 @@ ACTOR Future<Void> verify(VersionedBTree* btree, FutureStream<Version> vStream,
|
|||
Key end = randomKV().key;
|
||||
debug_printf("Verifying range (%s, %s) at version %" PRId64 "\n", toString(begin).c_str(),
|
||||
toString(end).c_str(), v);
|
||||
fRangeRandom = verifyRange(btree, begin, end, v, written, pErrorCount);
|
||||
if(deterministicRandom()->coinflip()) {
|
||||
fRangeRandom = verifyRange(btree, begin, end, v, written, pErrorCount);
|
||||
} else {
|
||||
fRangeRandom = verifyRangeBTreeCursor(btree, begin, end, v, written, pErrorCount);
|
||||
}
|
||||
if (serial) {
|
||||
wait(success(fRangeRandom));
|
||||
}
|
||||
|
||||
debug_printf("Verifying seeks to each changed key at version %" PRId64 "\n", v);
|
||||
fSeekAll = seekAll(btree, v, written, pErrorCount);
|
||||
if(deterministicRandom()->coinflip()) {
|
||||
fSeekAll = seekAll(btree, v, written, pErrorCount);
|
||||
} else {
|
||||
fSeekAll = seekAllBTreeCursor(btree, v, written, pErrorCount);
|
||||
}
|
||||
if (serial) {
|
||||
wait(success(fSeekAll));
|
||||
}
|
||||
|
@ -6485,11 +6985,11 @@ TEST_CASE("!/redwood/correctness/btree") {
|
|||
state int maxKeySize = deterministicRandom()->randomInt(1, pageSize * 2);
|
||||
state int maxValueSize = randomSize(pageSize * 25);
|
||||
state int maxCommitSize = shortTest ? 1000 : randomSize(std::min<int>((maxKeySize + maxValueSize) * 20000, 10e6));
|
||||
state int mutationBytesTarget = shortTest ? 100000 : randomSize(std::min<int>(maxCommitSize * 100, 100e6));
|
||||
state int mutationBytesTarget = shortTest ? 100000 : randomSize(std::min<int>(maxCommitSize * 100, pageSize * 100000));
|
||||
state double clearProbability = deterministicRandom()->random01() * .1;
|
||||
state double clearSingleKeyProbability = deterministicRandom()->random01();
|
||||
state double clearPostSetProbability = deterministicRandom()->random01() * .1;
|
||||
state double coldStartProbability = pagerMemoryOnly ? 0 : deterministicRandom()->random01();
|
||||
state double coldStartProbability = pagerMemoryOnly ? 0 : (deterministicRandom()->random01() * 0.3);
|
||||
state double advanceOldVersionProbability = deterministicRandom()->random01();
|
||||
state double maxDuration = 60;
|
||||
state int64_t cacheSizeBytes =
|
||||
|
|
|
@ -163,14 +163,13 @@ struct RegisterWorkerReply {
|
|||
constexpr static FileIdentifier file_identifier = 16475696;
|
||||
ProcessClass processClass;
|
||||
ClusterControllerPriorityInfo priorityInfo;
|
||||
Optional<uint16_t> storageCache;
|
||||
|
||||
RegisterWorkerReply() : priorityInfo(ProcessClass::UnsetFit, false, ClusterControllerPriorityInfo::FitnessUnknown) {}
|
||||
RegisterWorkerReply(ProcessClass processClass, ClusterControllerPriorityInfo priorityInfo, Optional<uint16_t> storageCache) : processClass(processClass), priorityInfo(priorityInfo), storageCache(storageCache) {}
|
||||
RegisterWorkerReply(ProcessClass processClass, ClusterControllerPriorityInfo priorityInfo) : processClass(processClass), priorityInfo(priorityInfo) {}
|
||||
|
||||
template <class Ar>
|
||||
void serialize( Ar& ar ) {
|
||||
serializer(ar, processClass, priorityInfo, storageCache);
|
||||
serializer(ar, processClass, priorityInfo);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -302,19 +301,18 @@ struct RegisterWorkerRequest {
|
|||
Generation generation;
|
||||
Optional<DataDistributorInterface> distributorInterf;
|
||||
Optional<RatekeeperInterface> ratekeeperInterf;
|
||||
Optional<std::pair<uint16_t,StorageServerInterface>> storageCacheInterf;
|
||||
Standalone<VectorRef<StringRef>> issues;
|
||||
std::vector<NetworkAddress> incompatiblePeers;
|
||||
ReplyPromise<RegisterWorkerReply> reply;
|
||||
bool degraded;
|
||||
|
||||
RegisterWorkerRequest() : priorityInfo(ProcessClass::UnsetFit, false, ClusterControllerPriorityInfo::FitnessUnknown), degraded(false) {}
|
||||
RegisterWorkerRequest(WorkerInterface wi, ProcessClass initialClass, ProcessClass processClass, ClusterControllerPriorityInfo priorityInfo, Generation generation, Optional<DataDistributorInterface> ddInterf, Optional<RatekeeperInterface> rkInterf, Optional<std::pair<uint16_t,StorageServerInterface>> storageCacheInterf, bool degraded) :
|
||||
wi(wi), initialClass(initialClass), processClass(processClass), priorityInfo(priorityInfo), generation(generation), distributorInterf(ddInterf), ratekeeperInterf(rkInterf), storageCacheInterf(storageCacheInterf), degraded(degraded) {}
|
||||
RegisterWorkerRequest(WorkerInterface wi, ProcessClass initialClass, ProcessClass processClass, ClusterControllerPriorityInfo priorityInfo, Generation generation, Optional<DataDistributorInterface> ddInterf, Optional<RatekeeperInterface> rkInterf, bool degraded) :
|
||||
wi(wi), initialClass(initialClass), processClass(processClass), priorityInfo(priorityInfo), generation(generation), distributorInterf(ddInterf), ratekeeperInterf(rkInterf), degraded(degraded) {}
|
||||
|
||||
template <class Ar>
|
||||
void serialize( Ar& ar ) {
|
||||
serializer(ar, wi, initialClass, processClass, priorityInfo, generation, distributorInterf, ratekeeperInterf, storageCacheInterf, issues, incompatiblePeers, reply, degraded);
|
||||
serializer(ar, wi, initialClass, processClass, priorityInfo, generation, distributorInterf, ratekeeperInterf, issues, incompatiblePeers, reply, degraded);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -712,7 +710,7 @@ ACTOR Future<Void> logRouter(TLogInterface interf, InitializeLogRouterRequest re
|
|||
Reference<AsyncVar<ServerDBInfo>> db);
|
||||
ACTOR Future<Void> dataDistributor(DataDistributorInterface ddi, Reference<AsyncVar<ServerDBInfo>> db);
|
||||
ACTOR Future<Void> ratekeeper(RatekeeperInterface rki, Reference<AsyncVar<ServerDBInfo>> db);
|
||||
ACTOR Future<Void> storageCache(StorageServerInterface interf, uint16_t id, Reference<AsyncVar<ServerDBInfo>> db);
|
||||
ACTOR Future<Void> storageCacheServer(StorageServerInterface interf, uint16_t id, Reference<AsyncVar<ServerDBInfo>> db);
|
||||
ACTOR Future<Void> backupWorker(BackupInterface bi, InitializeBackupRequest req, Reference<AsyncVar<ServerDBInfo>> db);
|
||||
|
||||
void registerThreadForProfiling();
|
||||
|
|
|
@ -20,6 +20,9 @@
|
|||
|
||||
// There's something in one of the files below that defines a macros
|
||||
// a macro that makes boost interprocess break on Windows.
|
||||
#include "flow/Tracing.h"
|
||||
#include <cctype>
|
||||
#include <iterator>
|
||||
#define BOOST_DATE_TIME_NO_LIB
|
||||
#include <boost/interprocess/managed_shared_memory.hpp>
|
||||
#include <boost/algorithm/string.hpp>
|
||||
|
@ -78,7 +81,7 @@
|
|||
|
||||
// clang-format off
|
||||
enum {
|
||||
OPT_CONNFILE, OPT_SEEDCONNFILE, OPT_SEEDCONNSTRING, OPT_ROLE, OPT_LISTEN, OPT_PUBLICADDR, OPT_DATAFOLDER, OPT_LOGFOLDER, OPT_PARENTPID, OPT_NEWCONSOLE,
|
||||
OPT_CONNFILE, OPT_SEEDCONNFILE, OPT_SEEDCONNSTRING, OPT_ROLE, OPT_LISTEN, OPT_PUBLICADDR, OPT_DATAFOLDER, OPT_LOGFOLDER, OPT_PARENTPID, OPT_TRACER, OPT_NEWCONSOLE,
|
||||
OPT_NOBOX, OPT_TESTFILE, OPT_RESTARTING, OPT_RESTORING, OPT_RANDOMSEED, OPT_KEY, OPT_MEMLIMIT, OPT_STORAGEMEMLIMIT, OPT_CACHEMEMLIMIT, OPT_MACHINEID,
|
||||
OPT_DCID, OPT_MACHINE_CLASS, OPT_BUGGIFY, OPT_VERSION, OPT_CRASHONERROR, OPT_HELP, OPT_NETWORKIMPL, OPT_NOBUFSTDOUT, OPT_BUFSTDOUTERR, OPT_TRACECLOCK,
|
||||
OPT_NUMTESTERS, OPT_DEVHELP, OPT_ROLLSIZE, OPT_MAXLOGS, OPT_MAXLOGSSIZE, OPT_KNOB, OPT_TESTSERVERS, OPT_TEST_ON_SERVERS, OPT_METRICSCONNFILE,
|
||||
|
@ -111,6 +114,7 @@ CSimpleOpt::SOption g_rgOptions[] = {
|
|||
{ OPT_MAXLOGSSIZE, "--maxlogssize", SO_REQ_SEP },
|
||||
{ OPT_LOGGROUP, "--loggroup", SO_REQ_SEP },
|
||||
{ OPT_PARENTPID, "--parentpid", SO_REQ_SEP },
|
||||
{ OPT_TRACER, "--tracer", SO_REQ_SEP },
|
||||
#ifdef _WIN32
|
||||
{ OPT_NEWCONSOLE, "-n", SO_NONE },
|
||||
{ OPT_NEWCONSOLE, "--newconsole", SO_NONE },
|
||||
|
@ -514,6 +518,9 @@ static void printUsage( const char *name, bool devhelp ) {
|
|||
printf(" --trace_format FORMAT\n"
|
||||
" Select the format of the log files. xml (the default) and json\n"
|
||||
" are supported.\n");
|
||||
printf(" --tracer TRACER\n"
|
||||
" Select a tracer for transaction tracing. Currently disabled\n"
|
||||
" (the default) and log_file are supported.\n");
|
||||
printf(" -i ID, --machine_id ID\n"
|
||||
" Machine and zone identifier key (up to 16 hex characters).\n"
|
||||
" Defaults to a random value shared by all fdbserver processes\n"
|
||||
|
@ -884,7 +891,7 @@ struct CLIOptions {
|
|||
double fileIoTimeout = 0.0;
|
||||
bool fileIoWarnOnly = false;
|
||||
uint64_t rsssize = -1;
|
||||
std::vector<std::string> blobCredentials; // used for fast restore workers
|
||||
std::vector<std::string> blobCredentials; // used for fast restore workers & backup workers
|
||||
const char* blobCredsFromENV = nullptr;
|
||||
|
||||
Reference<ClusterConnectionFile> connectionFile;
|
||||
|
@ -1169,6 +1176,22 @@ private:
|
|||
break;
|
||||
}
|
||||
#endif
|
||||
case OPT_TRACER:
|
||||
{
|
||||
std::string arg = args.OptionArg();
|
||||
std::string tracer;
|
||||
std::transform(arg.begin(), arg.end(), std::back_inserter(tracer), [](char c) { return tolower(c); });
|
||||
if (tracer == "none" || tracer == "disabled") {
|
||||
openTracer(TracerType::DISABLED);
|
||||
} else if (tracer == "logfile" || tracer == "file" || tracer == "log_file") {
|
||||
openTracer(TracerType::LOG_FILE);
|
||||
} else {
|
||||
fprintf(stderr, "ERROR: Unknown or unsupported tracer: `%s'", args.OptionArg());
|
||||
printHelpTeaser(argv[0]);
|
||||
flushAndExit(FDB_EXIT_ERROR);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case OPT_TESTFILE:
|
||||
testFile = args.OptionArg();
|
||||
break;
|
||||
|
@ -1789,6 +1812,16 @@ int main(int argc, char* argv[]) {
|
|||
setupAndRun(dataFolder, opts.testFile, opts.restarting, (isRestoring >= 1), opts.whitelistBinPaths);
|
||||
g_simulator.run();
|
||||
} else if (role == FDBD) {
|
||||
// Update the global blob credential files list so that both fast
|
||||
// restore workers and backup workers can access blob storage.
|
||||
std::vector<std::string>* pFiles =
|
||||
(std::vector<std::string>*)g_network->global(INetwork::enBlobCredentialFiles);
|
||||
if (pFiles != nullptr) {
|
||||
for (auto& f : opts.blobCredentials) {
|
||||
pFiles->push_back(f);
|
||||
}
|
||||
}
|
||||
|
||||
// Call fast restore for the class FastRestoreClass. This is a short-cut to run fast restore in circus
|
||||
if (opts.processClass == ProcessClass::FastRestoreClass) {
|
||||
printf("Run as fast restore worker\n");
|
||||
|
@ -1797,15 +1830,6 @@ int main(int argc, char* argv[]) {
|
|||
if (!dataFolder.size())
|
||||
dataFolder = format("fdb/%d/", opts.publicAddresses.address.port); // SOMEDAY: Better default
|
||||
|
||||
// Update the global blob credential files list
|
||||
std::vector<std::string>* pFiles =
|
||||
(std::vector<std::string>*)g_network->global(INetwork::enBlobCredentialFiles);
|
||||
if (pFiles != nullptr) {
|
||||
for (auto& f : opts.blobCredentials) {
|
||||
pFiles->push_back(f);
|
||||
}
|
||||
}
|
||||
|
||||
vector<Future<Void>> actors(listenErrors.begin(), listenErrors.end());
|
||||
actors.push_back(restoreWorker(opts.connectionFile, opts.localities, dataFolder));
|
||||
f = stopAfter(waitForAll(actors));
|
||||
|
|
|
@ -53,8 +53,12 @@ struct ProxyVersionReplies {
|
|||
std::map<uint64_t, GetCommitVersionReply> replies;
|
||||
NotifiedVersion latestRequestNum;
|
||||
|
||||
ProxyVersionReplies(ProxyVersionReplies&& r) BOOST_NOEXCEPT : replies(std::move(r.replies)), latestRequestNum(std::move(r.latestRequestNum)) {}
|
||||
void operator=(ProxyVersionReplies&& r) BOOST_NOEXCEPT { replies = std::move(r.replies); latestRequestNum = std::move(r.latestRequestNum); }
|
||||
ProxyVersionReplies(ProxyVersionReplies&& r) noexcept
|
||||
: replies(std::move(r.replies)), latestRequestNum(std::move(r.latestRequestNum)) {}
|
||||
void operator=(ProxyVersionReplies&& r) noexcept {
|
||||
replies = std::move(r.replies);
|
||||
latestRequestNum = std::move(r.latestRequestNum);
|
||||
}
|
||||
|
||||
ProxyVersionReplies() : latestRequestNum(0) {}
|
||||
};
|
||||
|
@ -157,7 +161,7 @@ private:
|
|||
} else {
|
||||
self->fullyRecovered.send(Void());
|
||||
}
|
||||
|
||||
|
||||
return Void();
|
||||
}
|
||||
};
|
||||
|
@ -379,7 +383,7 @@ ACTOR Future<Void> newSeedServers( Reference<MasterData> self, RecruitFromConfig
|
|||
dcId_tags[recruits.storageServers[idx].locality.dcId()] = Tag(nextLocality, 0);
|
||||
nextLocality++;
|
||||
}
|
||||
|
||||
|
||||
Tag& tag = dcId_tags[recruits.storageServers[idx].locality.dcId()];
|
||||
tag.id++;
|
||||
idx++;
|
||||
|
@ -588,7 +592,7 @@ ACTOR Future<vector<Standalone<CommitTransactionRef>>> recruitEverything( Refere
|
|||
.detail("DesiredResolvers", self->configuration.getDesiredResolvers())
|
||||
.detail("StoreType", self->configuration.storageServerStoreType)
|
||||
.trackLatest("MasterRecoveryState");
|
||||
|
||||
|
||||
//FIXME: we only need log routers for the same locality as the master
|
||||
int maxLogRouters = self->cstate.prevDBState.logRouterTags;
|
||||
for(auto& old : self->cstate.prevDBState.oldTLogData) {
|
||||
|
@ -917,6 +921,7 @@ ACTOR Future<Void> recoverFrom( Reference<MasterData> self, Reference<ILogSystem
|
|||
}
|
||||
|
||||
ACTOR Future<Void> getVersion(Reference<MasterData> self, GetCommitVersionRequest req) {
|
||||
state Span span("M:getVersion"_loc, { req.spanContext });
|
||||
state std::map<UID, ProxyVersionReplies>::iterator proxyItr = self->lastProxyVersionReplies.find(req.requestingProxy); // lastProxyVersionReplies never changes
|
||||
|
||||
if (proxyItr == self->lastProxyVersionReplies.end()) {
|
||||
|
@ -1539,15 +1544,6 @@ ACTOR Future<Void> masterCore( Reference<MasterData> self ) {
|
|||
tr.set(recoveryCommitRequest.arena, coordinatorsKey, self->coordinators.ccf->getConnectionString().toString());
|
||||
tr.set(recoveryCommitRequest.arena, logsKey, self->logSystem->getLogsValue());
|
||||
tr.set(recoveryCommitRequest.arena, primaryDatacenterKey, self->myInterface.locality.dcId().present() ? self->myInterface.locality.dcId().get() : StringRef());
|
||||
|
||||
//FIXME: remove this code, caching the entire normal keyspace as a test of functionality
|
||||
//TODO: caching disabled for this merge
|
||||
//tr.set(recoveryCommitRequest.arena, storageCacheKey(normalKeys.begin), storageCacheValue({0}));
|
||||
//tr.set(recoveryCommitRequest.arena, storageCacheKey(normalKeys.end), storageCacheValue({}));
|
||||
//tr.set(recoveryCommitRequest.arena, cacheKeysKey(0, normalKeys.begin), serverKeysTrue);
|
||||
//tr.set(recoveryCommitRequest.arena, cacheKeysKey(0, normalKeys.end), serverKeysFalse);
|
||||
//tr.set(recoveryCommitRequest.arena, cacheChangeKeyFor(0), BinaryWriter::toValue(deterministicRandom()->randomUniqueID(),Unversioned()));
|
||||
//tr.set(recoveryCommitRequest.arena, cacheChangeKey, BinaryWriter::toValue(deterministicRandom()->randomUniqueID(),Unversioned()));
|
||||
|
||||
tr.clear(recoveryCommitRequest.arena, tLogDatacentersKeys);
|
||||
for(auto& dc : self->primaryDcId) {
|
||||
|
@ -1689,7 +1685,7 @@ ACTOR Future<Void> masterServer( MasterInterface mi, Reference<AsyncVar<ServerDB
|
|||
while(!self->addActor.isEmpty()) {
|
||||
self->addActor.getFuture().pop();
|
||||
}
|
||||
|
||||
|
||||
TEST(err.code() == error_code_master_tlog_failed); // Master: terminated because of a tLog failure
|
||||
TEST(err.code() == error_code_master_proxy_failed); // Master: terminated because of a proxy failure
|
||||
TEST(err.code() == error_code_master_resolver_failed); // Master: terminated because of a resolver failure
|
||||
|
|
|
@ -21,6 +21,9 @@
|
|||
#include <cinttypes>
|
||||
#include "fdbrpc/fdbrpc.h"
|
||||
#include "fdbrpc/LoadBalance.h"
|
||||
#include "flow/Arena.h"
|
||||
#include "flow/IRandom.h"
|
||||
#include "flow/Tracing.h"
|
||||
#include "flow/IndexedSet.h"
|
||||
#include "flow/Hash3.h"
|
||||
#include "flow/ActorCollection.h"
|
||||
|
@ -712,7 +715,7 @@ public:
|
|||
}
|
||||
|
||||
template<class Request, class HandleFunction>
|
||||
Future<Void> readGuard(const Request& request, const HandleFunction& fun) {
|
||||
Future<Void> readGuard(const Span& parentSpan, const Request& request, const HandleFunction& fun) {
|
||||
auto rate = currentRate();
|
||||
if (rate < SERVER_KNOBS->STORAGE_DURABILITY_LAG_REJECT_THRESHOLD && deterministicRandom()->random01() > std::max(SERVER_KNOBS->STORAGE_DURABILITY_LAG_MIN_RATE, rate/SERVER_KNOBS->STORAGE_DURABILITY_LAG_REJECT_THRESHOLD)) {
|
||||
//request.error = future_version();
|
||||
|
@ -720,7 +723,7 @@ public:
|
|||
++counters.readsRejected;
|
||||
return Void();
|
||||
}
|
||||
return fun(this, request);
|
||||
return fun(this, request, parentSpan);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -846,7 +849,8 @@ updateProcessStats(StorageServer* self)
|
|||
#pragma region Queries
|
||||
#endif
|
||||
|
||||
ACTOR Future<Version> waitForVersionActor(StorageServer* data, Version version) {
|
||||
ACTOR Future<Version> waitForVersionActor(StorageServer* data, Version version, SpanID spanContext) {
|
||||
state Span span("SS.WaitForVersion"_loc, { spanContext });
|
||||
choose {
|
||||
when(wait(data->version.whenAtLeast(version))) {
|
||||
// FIXME: A bunch of these can block with or without the following delay 0.
|
||||
|
@ -865,7 +869,7 @@ ACTOR Future<Version> waitForVersionActor(StorageServer* data, Version version)
|
|||
}
|
||||
}
|
||||
|
||||
Future<Version> waitForVersion(StorageServer* data, Version version) {
|
||||
Future<Version> waitForVersion(StorageServer* data, Version version, SpanID spanContext) {
|
||||
if (version == latestVersion) {
|
||||
version = std::max(Version(1), data->version.get());
|
||||
}
|
||||
|
@ -883,7 +887,7 @@ Future<Version> waitForVersion(StorageServer* data, Version version) {
|
|||
if (deterministicRandom()->random01() < 0.001) {
|
||||
TraceEvent("WaitForVersion1000x");
|
||||
}
|
||||
return waitForVersionActor(data, version);
|
||||
return waitForVersionActor(data, version, spanContext);
|
||||
}
|
||||
|
||||
ACTOR Future<Version> waitForVersionNoTooOld( StorageServer* data, Version version ) {
|
||||
|
@ -907,7 +911,7 @@ ACTOR Future<Version> waitForVersionNoTooOld( StorageServer* data, Version versi
|
|||
}
|
||||
}
|
||||
|
||||
ACTOR Future<Void> getValueQ( StorageServer* data, GetValueRequest req ) {
|
||||
ACTOR Future<Void> getValueQ( StorageServer* data, GetValueRequest req, Span span ) {
|
||||
state int64_t resultSize = 0;
|
||||
|
||||
try {
|
||||
|
@ -924,7 +928,7 @@ ACTOR Future<Void> getValueQ( StorageServer* data, GetValueRequest req ) {
|
|||
g_traceBatch.addEvent("GetValueDebug", req.debugID.get().first(), "getValueQ.DoRead"); //.detail("TaskID", g_network->getCurrentTask());
|
||||
|
||||
state Optional<Value> v;
|
||||
state Version version = wait( waitForVersion( data, req.version ) );
|
||||
state Version version = wait( waitForVersion( data, req.version, req.spanContext ) );
|
||||
if( req.debugID.present() )
|
||||
g_traceBatch.addEvent("GetValueDebug", req.debugID.get().first(), "getValueQ.AfterVersion"); //.detail("TaskID", g_network->getCurrentTask());
|
||||
|
||||
|
@ -982,7 +986,12 @@ ACTOR Future<Void> getValueQ( StorageServer* data, GetValueRequest req ) {
|
|||
if( req.debugID.present() )
|
||||
g_traceBatch.addEvent("GetValueDebug", req.debugID.get().first(), "getValueQ.AfterRead"); //.detail("TaskID", g_network->getCurrentTask());
|
||||
|
||||
GetValueReply reply(v);
|
||||
// Check if the desired key might be cached
|
||||
auto cached = data->cachedRangeMap[req.key];
|
||||
//if (cached)
|
||||
// TraceEvent(SevDebug, "SSGetValueCached").detail("Key", req.key);
|
||||
|
||||
GetValueReply reply(v, cached);
|
||||
reply.penalty = data->getPenalty();
|
||||
req.reply.send(reply);
|
||||
} catch (Error& e) {
|
||||
|
@ -1003,7 +1012,8 @@ ACTOR Future<Void> getValueQ( StorageServer* data, GetValueRequest req ) {
|
|||
return Void();
|
||||
};
|
||||
|
||||
ACTOR Future<Void> watchValue_impl( StorageServer* data, WatchValueRequest req ) {
|
||||
ACTOR Future<Void> watchValue_impl( StorageServer* data, WatchValueRequest req, SpanID parent ) {
|
||||
state Span span("SS:WatchValueImpl"_loc, { parent });
|
||||
try {
|
||||
++data->counters.watchQueries;
|
||||
|
||||
|
@ -1018,9 +1028,11 @@ ACTOR Future<Void> watchValue_impl( StorageServer* data, WatchValueRequest req )
|
|||
try {
|
||||
state Version latest = data->data().latestVersion;
|
||||
state Future<Void> watchFuture = data->watches.onChange(req.key);
|
||||
GetValueRequest getReq( req.key, latest, req.tags, req.debugID );
|
||||
state Future<Void> getValue = getValueQ( data, getReq ); //we are relying on the delay zero at the top of getValueQ, if removed we need one here
|
||||
state Span getValueSpan(deterministicRandom()->randomUniqueID(), "SS:GetValue"_loc, { span->context });
|
||||
GetValueRequest getReq( getValueSpan->context, req.key, latest, req.tags, req.debugID );
|
||||
state Future<Void> getValue = getValueQ( data, getReq, span ); //we are relying on the delay zero at the top of getValueQ, if removed we need one here
|
||||
GetValueReply reply = wait( getReq.reply.getFuture() );
|
||||
getValueSpan.reset();
|
||||
//TraceEvent("WatcherCheckValue").detail("Key", req.key ).detail("Value", req.value ).detail("CurrentValue", v ).detail("Ver", latest);
|
||||
|
||||
if(reply.error.present()) {
|
||||
|
@ -1067,8 +1079,8 @@ ACTOR Future<Void> watchValue_impl( StorageServer* data, WatchValueRequest req )
|
|||
return Void();
|
||||
}
|
||||
|
||||
ACTOR Future<Void> watchValueQ( StorageServer* data, WatchValueRequest req ) {
|
||||
state Future<Void> watch = watchValue_impl( data, req );
|
||||
ACTOR Future<Void> watchValueQ( StorageServer* data, WatchValueRequest req, Span span ) {
|
||||
state Future<Void> watch = watchValue_impl( data, req, span->context );
|
||||
state double startTime = now();
|
||||
|
||||
loop {
|
||||
|
@ -1173,7 +1185,7 @@ void merge( Arena& arena, VectorRef<KeyValueRef, VecSerStrategy::String>& output
|
|||
|
||||
// If limit>=0, it returns the first rows in the range (sorted ascending), otherwise the last rows (sorted descending).
|
||||
// readRange has O(|result|) + O(log |data|) cost
|
||||
ACTOR Future<GetKeyValuesReply> readRange( StorageServer* data, Version version, KeyRange range, int limit, int* pLimitBytes ) {
|
||||
ACTOR Future<GetKeyValuesReply> readRange( StorageServer* data, Version version, KeyRange range, int limit, int* pLimitBytes, Span parentSpan ) {
|
||||
state GetKeyValuesReply result;
|
||||
state StorageServer::VersionedData::ViewAtVersion view = data->data().at(version);
|
||||
state StorageServer::VersionedData::iterator vCurrent = view.end();
|
||||
|
@ -1181,6 +1193,7 @@ ACTOR Future<GetKeyValuesReply> readRange( StorageServer* data, Version version,
|
|||
state KeyRef readEnd;
|
||||
state Key readBeginTemp;
|
||||
state int vCount = 0;
|
||||
state Span span("SS:readRange"_loc, parentSpan);
|
||||
|
||||
// for caching the storage queue results during the first PTree traversal
|
||||
state VectorRef<KeyValueRef> resultCache;
|
||||
|
@ -1190,11 +1203,14 @@ ACTOR Future<GetKeyValuesReply> readRange( StorageServer* data, Version version,
|
|||
state int pos = 0;
|
||||
|
||||
|
||||
// Check if the desired key-range intersects the cached key-ranges
|
||||
// TODO Find a more efficient way to do it
|
||||
// TODO Also need this check in single key/value lookup
|
||||
auto cached = data->cachedRangeMap.intersectingRanges(range);
|
||||
result.cached = (cached.begin() != cached.end());
|
||||
// Check if the desired key-range is cached
|
||||
auto containingRange = data->cachedRangeMap.rangeContaining(range.begin);
|
||||
if (containingRange.value() && containingRange->range().end >= range.end) {
|
||||
//TraceEvent(SevDebug, "SSReadRangeCached").detail("Size",data->cachedRangeMap.size()).detail("ContainingRangeBegin",containingRange->range().begin).detail("ContainingRangeEnd",containingRange->range().end).
|
||||
// detail("Begin", range.begin).detail("End",range.end);
|
||||
result.cached = true;
|
||||
} else
|
||||
result.cached = false;
|
||||
|
||||
// if (limit >= 0) we are reading forward, else backward
|
||||
if (limit >= 0) {
|
||||
|
@ -1349,7 +1365,7 @@ ACTOR Future<GetKeyValuesReply> readRange( StorageServer* data, Version version,
|
|||
// return sel.getKey() >= range.begin && (sel.isBackward() ? sel.getKey() <= range.end : sel.getKey() < range.end);
|
||||
//}
|
||||
|
||||
ACTOR Future<Key> findKey( StorageServer* data, KeySelectorRef sel, Version version, KeyRange range, int* pOffset)
|
||||
ACTOR Future<Key> findKey( StorageServer* data, KeySelectorRef sel, Version version, KeyRange range, int* pOffset, SpanID parentSpan)
|
||||
// Attempts to find the key indicated by sel in the data at version, within range.
|
||||
// Precondition: selectorInRange(sel, range)
|
||||
// If it is found, offset is set to 0 and a key is returned which falls inside range.
|
||||
|
@ -1366,6 +1382,7 @@ ACTOR Future<Key> findKey( StorageServer* data, KeySelectorRef sel, Version vers
|
|||
state int sign = forward ? +1 : -1;
|
||||
state bool skipEqualKey = sel.orEqual == forward;
|
||||
state int distance = forward ? sel.offset : 1-sel.offset;
|
||||
state Span span("SS.findKey"_loc, { parentSpan });
|
||||
|
||||
//Don't limit the number of bytes if this is a trivial key selector (there will be at most two items returned from the read range in this case)
|
||||
state int maxBytes;
|
||||
|
@ -1374,14 +1391,18 @@ ACTOR Future<Key> findKey( StorageServer* data, KeySelectorRef sel, Version vers
|
|||
else
|
||||
maxBytes = BUGGIFY ? SERVER_KNOBS->BUGGIFY_LIMIT_BYTES : SERVER_KNOBS->STORAGE_LIMIT_BYTES;
|
||||
|
||||
state GetKeyValuesReply rep = wait( readRange( data, version, forward ? KeyRangeRef(sel.getKey(), range.end) : KeyRangeRef(range.begin, keyAfter(sel.getKey())), (distance + skipEqualKey)*sign, &maxBytes ) );
|
||||
state GetKeyValuesReply rep = wait(
|
||||
readRange(data, version,
|
||||
forward ? KeyRangeRef(sel.getKey(), range.end) : KeyRangeRef(range.begin, keyAfter(sel.getKey())),
|
||||
(distance + skipEqualKey) * sign, &maxBytes, span));
|
||||
state bool more = rep.more && rep.data.size() != distance + skipEqualKey;
|
||||
|
||||
//If we get only one result in the reverse direction as a result of the data being too large, we could get stuck in a loop
|
||||
if(more && !forward && rep.data.size() == 1) {
|
||||
TEST(true); //Reverse key selector returned only one result in range read
|
||||
maxBytes = std::numeric_limits<int>::max();
|
||||
GetKeyValuesReply rep2 = wait( readRange( data, version, KeyRangeRef(range.begin, keyAfter(sel.getKey())), -2, &maxBytes ) );
|
||||
GetKeyValuesReply rep2 =
|
||||
wait(readRange(data, version, KeyRangeRef(range.begin, keyAfter(sel.getKey())), -2, &maxBytes, span));
|
||||
rep = rep2;
|
||||
more = rep.more && rep.data.size() != distance + skipEqualKey;
|
||||
ASSERT(rep.data.size() == 2 || !more);
|
||||
|
@ -1436,7 +1457,7 @@ KeyRange getShardKeyRange( StorageServer* data, const KeySelectorRef& sel )
|
|||
return i->range();
|
||||
}
|
||||
|
||||
ACTOR Future<Void> getKeyValuesQ( StorageServer* data, GetKeyValuesRequest req )
|
||||
ACTOR Future<Void> getKeyValuesQ( StorageServer* data, GetKeyValuesRequest req, Span span )
|
||||
// Throws a wrong_shard_server if the keys in the request or result depend on data outside this server OR if a large selector offset prevents
|
||||
// all data from being read in one range read
|
||||
{
|
||||
|
@ -1461,7 +1482,7 @@ ACTOR Future<Void> getKeyValuesQ( StorageServer* data, GetKeyValuesRequest req )
|
|||
try {
|
||||
if( req.debugID.present() )
|
||||
g_traceBatch.addEvent("TransactionDebug", req.debugID.get().first(), "storageserver.getKeyValues.Before");
|
||||
state Version version = wait( waitForVersion( data, req.version ) );
|
||||
state Version version = wait( waitForVersion( data, req.version, span->context ) );
|
||||
|
||||
state uint64_t changeCounter = data->shardChangeCounter;
|
||||
// try {
|
||||
|
@ -1479,8 +1500,8 @@ ACTOR Future<Void> getKeyValuesQ( StorageServer* data, GetKeyValuesRequest req )
|
|||
|
||||
state int offset1;
|
||||
state int offset2;
|
||||
state Future<Key> fBegin = req.begin.isFirstGreaterOrEqual() ? Future<Key>(req.begin.getKey()) : findKey( data, req.begin, version, shard, &offset1 );
|
||||
state Future<Key> fEnd = req.end.isFirstGreaterOrEqual() ? Future<Key>(req.end.getKey()) : findKey( data, req.end, version, shard, &offset2 );
|
||||
state Future<Key> fBegin = req.begin.isFirstGreaterOrEqual() ? Future<Key>(req.begin.getKey()) : findKey( data, req.begin, version, shard, &offset1, span->context );
|
||||
state Future<Key> fEnd = req.end.isFirstGreaterOrEqual() ? Future<Key>(req.end.getKey()) : findKey( data, req.end, version, shard, &offset2, span->context );
|
||||
state Key begin = wait(fBegin);
|
||||
state Key end = wait(fEnd);
|
||||
if( req.debugID.present() )
|
||||
|
@ -1514,7 +1535,7 @@ ACTOR Future<Void> getKeyValuesQ( StorageServer* data, GetKeyValuesRequest req )
|
|||
} else {
|
||||
state int remainingLimitBytes = req.limitBytes;
|
||||
|
||||
GetKeyValuesReply _r = wait( readRange(data, version, KeyRangeRef(begin, end), req.limit, &remainingLimitBytes) );
|
||||
GetKeyValuesReply _r = wait( readRange(data, version, KeyRangeRef(begin, end), req.limit, &remainingLimitBytes, span) );
|
||||
GetKeyValuesReply r = _r;
|
||||
|
||||
if( req.debugID.present() )
|
||||
|
@ -1576,7 +1597,7 @@ ACTOR Future<Void> getKeyValuesQ( StorageServer* data, GetKeyValuesRequest req )
|
|||
return Void();
|
||||
}
|
||||
|
||||
ACTOR Future<Void> getKeyQ( StorageServer* data, GetKeyRequest req ) {
|
||||
ACTOR Future<Void> getKeyQ( StorageServer* data, GetKeyRequest req, Span span ) {
|
||||
state int64_t resultSize = 0;
|
||||
|
||||
++data->counters.getKeyQueries;
|
||||
|
@ -1589,12 +1610,12 @@ ACTOR Future<Void> getKeyQ( StorageServer* data, GetKeyRequest req ) {
|
|||
wait( delay(0, TaskPriority::DefaultEndpoint) );
|
||||
|
||||
try {
|
||||
state Version version = wait( waitForVersion( data, req.version ) );
|
||||
state Version version = wait( waitForVersion( data, req.version, req.spanContext ) );
|
||||
state uint64_t changeCounter = data->shardChangeCounter;
|
||||
state KeyRange shard = getShardKeyRange( data, req.sel );
|
||||
|
||||
state int offset;
|
||||
Key k = wait( findKey( data, req.sel, version, shard, &offset ) );
|
||||
Key k = wait( findKey( data, req.sel, version, shard, &offset, req.spanContext ) );
|
||||
|
||||
data->checkChangeCounter( changeCounter, KeyRangeRef( std::min<KeyRef>(req.sel.getKey(), k), std::max<KeyRef>(req.sel.getKey(), k) ) );
|
||||
|
||||
|
@ -1610,8 +1631,14 @@ ACTOR Future<Void> getKeyQ( StorageServer* data, GetKeyRequest req ) {
|
|||
data->counters.bytesQueried += resultSize;
|
||||
++data->counters.rowsQueried;
|
||||
|
||||
GetKeyReply reply(updated);
|
||||
// Check if the desired key might be cached
|
||||
auto cached = data->cachedRangeMap[k];
|
||||
//if (cached)
|
||||
// TraceEvent(SevDebug, "SSGetKeyCached").detail("Key", k).detail("Begin", shard.begin.printable()).detail("End", shard.end.printable());
|
||||
|
||||
GetKeyReply reply(updated, cached);
|
||||
reply.penalty = data->getPenalty();
|
||||
|
||||
req.reply.send(reply);
|
||||
}
|
||||
catch (Error& e) {
|
||||
|
@ -2584,7 +2611,6 @@ public:
|
|||
if ((m.type == MutationRef::SetValue) && m.param1.substr(1).startsWith(storageCachePrefix))
|
||||
applyPrivateCacheData( data, m);
|
||||
else {
|
||||
//TraceEvent("PrivateData", data->thisServerID).detail("Mutation", m.toString()).detail("Version", ver);
|
||||
applyPrivateData( data, m );
|
||||
}
|
||||
} else {
|
||||
|
@ -2673,7 +2699,7 @@ private:
|
|||
}
|
||||
|
||||
void applyPrivateCacheData( StorageServer* data, MutationRef const& m ) {
|
||||
TraceEvent(SevDebug, "SSPrivateCacheMutation", data->thisServerID).detail("Mutation", m.toString());
|
||||
//TraceEvent(SevDebug, "SSPrivateCacheMutation", data->thisServerID).detail("Mutation", m.toString());
|
||||
|
||||
if (processedCacheStartKey) {
|
||||
// Because of the implementation of the krm* functions, we expect changes in pairs, [begin,end)
|
||||
|
@ -2681,17 +2707,16 @@ private:
|
|||
KeyRangeRef keys( cacheStartKey.removePrefix(systemKeys.begin).removePrefix( storageCachePrefix ),
|
||||
m.param1.removePrefix(systemKeys.begin).removePrefix( storageCachePrefix ));
|
||||
data->cachedRangeMap.insert(keys, true);
|
||||
//TraceEvent(SevDebug, "SSPrivateCacheMutation", data->thisServerID).detail("Begin", keys.begin).detail("End", keys.end);
|
||||
//fprintf(stderr, "applyPrivateCacheData : begin: %s, end: %s\n", printable(keys.begin).c_str(), printable(keys.end).c_str());
|
||||
|
||||
//Figure out the affected shard ranges and maintain the cached key-range information in the in-memory map
|
||||
// TODO revisit- we are not splitting the cached ranges based on shards as of now.
|
||||
if (0) {
|
||||
auto cachedRanges = data->shards.intersectingRanges(keys);
|
||||
for(auto shard = cachedRanges.begin(); shard != cachedRanges.end(); ++shard) {
|
||||
KeyRangeRef intersectingRange = shard.range() & keys;
|
||||
data->cachedRangeMap.insert(KeyRangeRef(intersectingRange.begin, intersectingRange.end), true);
|
||||
}
|
||||
auto cachedRanges = data->shards.intersectingRanges(keys);
|
||||
for(auto shard = cachedRanges.begin(); shard != cachedRanges.end(); ++shard) {
|
||||
KeyRangeRef intersectingRange = shard.range() & keys;
|
||||
TraceEvent(SevDebug, "SSPrivateCacheMutationInsertUnexpected", data->thisServerID).detail("Begin", intersectingRange.begin).detail("End", intersectingRange.end);
|
||||
data->cachedRangeMap.insert(intersectingRange, true);
|
||||
}
|
||||
}
|
||||
processedStartKey = false;
|
||||
} else if ((m.type == MutationRef::SetValue) && m.param1.substr(1).startsWith(storageCachePrefix)) {
|
||||
|
@ -2728,7 +2753,6 @@ ACTOR Future<Void> update( StorageServer* data, bool* pReceivedUpdate )
|
|||
}
|
||||
|
||||
state Reference<ILogSystem::IPeekCursor> cursor = data->logCursor;
|
||||
//TraceEvent("SSUpdatePeeking", data->thisServerID).detail("MyVer", data->version.get()).detail("Epoch", data->updateEpoch).detail("Seq", data->updateSequence);
|
||||
|
||||
loop {
|
||||
wait( cursor->getMore() );
|
||||
|
@ -2775,12 +2799,14 @@ ACTOR Future<Void> update( StorageServer* data, bool* pReceivedUpdate )
|
|||
if (LogProtocolMessage::isNextIn(cloneReader)) {
|
||||
LogProtocolMessage lpm;
|
||||
cloneReader >> lpm;
|
||||
//TraceEvent(SevDebug, "SSReadingLPM", data->thisServerID).detail("Mutation", lpm.toString());
|
||||
dbgLastMessageWasProtocol = true;
|
||||
cloneCursor1->setProtocolVersion(cloneReader.protocolVersion());
|
||||
}
|
||||
else {
|
||||
MutationRef msg;
|
||||
cloneReader >> msg;
|
||||
//TraceEvent(SevDebug, "SSReadingLog", data->thisServerID).detail("Mutation", msg.toString());
|
||||
|
||||
if (firstMutation && msg.param1.startsWith(systemKeys.end))
|
||||
hasPrivateData = true;
|
||||
|
@ -2844,7 +2870,6 @@ ACTOR Future<Void> update( StorageServer* data, bool* pReceivedUpdate )
|
|||
|
||||
state Version ver = invalidVersion;
|
||||
cloneCursor2->setProtocolVersion(data->logProtocol);
|
||||
//TraceEvent("SSUpdatePeeked", data->thisServerID).detail("FromEpoch", data->updateEpoch).detail("FromSeq", data->updateSequence).detail("ToEpoch", results.end_epoch).detail("ToSeq", results.end_seq).detail("MsgSize", results.messages.size());
|
||||
for (;cloneCursor2->hasMessage(); cloneCursor2->nextMessage()) {
|
||||
if(mutationBytes > SERVER_KNOBS->DESIRED_UPDATE_BYTES) {
|
||||
mutationBytes = 0;
|
||||
|
@ -3651,6 +3676,7 @@ ACTOR Future<Void> checkBehind( StorageServer* self ) {
|
|||
ACTOR Future<Void> serveGetValueRequests( StorageServer* self, FutureStream<GetValueRequest> getValue ) {
|
||||
loop {
|
||||
GetValueRequest req = waitNext(getValue);
|
||||
Span span("SS:getValue"_loc, { req.spanContext });
|
||||
// Warning: This code is executed at extremely high priority (TaskPriority::LoadBalancedEndpoint), so downgrade before doing real work
|
||||
if( req.debugID.present() )
|
||||
g_traceBatch.addEvent("GetValueDebug", req.debugID.get().first(), "storageServer.received"); //.detail("TaskID", g_network->getCurrentTask());
|
||||
|
@ -3658,32 +3684,35 @@ ACTOR Future<Void> serveGetValueRequests( StorageServer* self, FutureStream<GetV
|
|||
if (SHORT_CIRCUT_ACTUAL_STORAGE && normalKeys.contains(req.key))
|
||||
req.reply.send(GetValueReply());
|
||||
else
|
||||
self->actors.add(self->readGuard(req , getValueQ));
|
||||
self->actors.add(self->readGuard(span, req , getValueQ));
|
||||
}
|
||||
}
|
||||
|
||||
ACTOR Future<Void> serveGetKeyValuesRequests( StorageServer* self, FutureStream<GetKeyValuesRequest> getKeyValues ) {
|
||||
loop {
|
||||
GetKeyValuesRequest req = waitNext(getKeyValues);
|
||||
Span span("SS:getKeyValues"_loc, { req.spanContext });
|
||||
// Warning: This code is executed at extremely high priority (TaskPriority::LoadBalancedEndpoint), so downgrade before doing real work
|
||||
self->actors.add(self->readGuard(req, getKeyValuesQ));
|
||||
self->actors.add(self->readGuard(span, req, getKeyValuesQ));
|
||||
}
|
||||
}
|
||||
|
||||
ACTOR Future<Void> serveGetKeyRequests( StorageServer* self, FutureStream<GetKeyRequest> getKey ) {
|
||||
loop {
|
||||
GetKeyRequest req = waitNext(getKey);
|
||||
Span span("SS:getKey"_loc, { req.spanContext });
|
||||
// Warning: This code is executed at extremely high priority (TaskPriority::LoadBalancedEndpoint), so downgrade before doing real work
|
||||
self->actors.add(self->readGuard(req , getKeyQ));
|
||||
self->actors.add(self->readGuard(span, req , getKeyQ));
|
||||
}
|
||||
}
|
||||
|
||||
ACTOR Future<Void> serveWatchValueRequests( StorageServer* self, FutureStream<WatchValueRequest> watchValue ) {
|
||||
loop {
|
||||
WatchValueRequest req = waitNext(watchValue);
|
||||
Span span("SS:watchValue"_loc, { req.spanContext });
|
||||
// TODO: fast load balancing?
|
||||
// SOMEDAY: combine watches for the same key/value into a single watch
|
||||
self->actors.add(self->readGuard(req, watchValueQ));
|
||||
self->actors.add(self->readGuard(span, req, watchValueQ));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -21,6 +21,8 @@
|
|||
#include <tuple>
|
||||
#include <boost/lexical_cast.hpp>
|
||||
|
||||
#include "fdbrpc/Locality.h"
|
||||
#include "fdbclient/StorageServerInterface.h"
|
||||
#include "fdbserver/Knobs.h"
|
||||
#include "flow/ActorCollection.h"
|
||||
#include "flow/SystemMonitor.h"
|
||||
|
@ -453,7 +455,7 @@ ACTOR Future<Void> registrationClient(
|
|||
state Future<Void> cacheErrorsFuture;
|
||||
state Optional<double> incorrectTime;
|
||||
loop {
|
||||
RegisterWorkerRequest request(interf, initialClass, processClass, asyncPriorityInfo->get(), requestGeneration++, ddInterf->get(), rkInterf->get(), scInterf->get(), degraded->get());
|
||||
RegisterWorkerRequest request(interf, initialClass, processClass, asyncPriorityInfo->get(), requestGeneration++, ddInterf->get(), rkInterf->get(), degraded->get());
|
||||
for (auto const& i : issues->get()) {
|
||||
request.issues.push_back_deep(request.issues.arena(), i);
|
||||
}
|
||||
|
@ -491,41 +493,10 @@ ACTOR Future<Void> registrationClient(
|
|||
when ( RegisterWorkerReply reply = wait( registrationReply )) {
|
||||
processClass = reply.processClass;
|
||||
asyncPriorityInfo->set( reply.priorityInfo );
|
||||
|
||||
if(!reply.storageCache.present()) {
|
||||
cacheProcessFuture.cancel();
|
||||
scInterf->set(Optional<std::pair<uint16_t,StorageServerInterface>>());
|
||||
} else if (!scInterf->get().present() || scInterf->get().get().first != reply.storageCache.get()) {
|
||||
StorageServerInterface recruited;
|
||||
recruited.locality = locality;
|
||||
recruited.initEndpoints();
|
||||
|
||||
std::map<std::string, std::string> details;
|
||||
startRole( Role::STORAGE_CACHE, recruited.id(), interf.id(), details );
|
||||
|
||||
//DUMPTOKEN(recruited.getVersion);
|
||||
DUMPTOKEN(recruited.getValue);
|
||||
DUMPTOKEN(recruited.getKey);
|
||||
DUMPTOKEN(recruited.getKeyValues);
|
||||
DUMPTOKEN(recruited.getShardState);
|
||||
DUMPTOKEN(recruited.waitMetrics);
|
||||
DUMPTOKEN(recruited.splitMetrics);
|
||||
DUMPTOKEN(recruited.getReadHotRanges);
|
||||
DUMPTOKEN(recruited.getStorageMetrics);
|
||||
DUMPTOKEN(recruited.waitFailure);
|
||||
DUMPTOKEN(recruited.getQueuingMetrics);
|
||||
DUMPTOKEN(recruited.getKeyValueStoreType);
|
||||
DUMPTOKEN(recruited.watchValue);
|
||||
|
||||
cacheProcessFuture = storageCache( recruited, reply.storageCache.get(), dbInfo );
|
||||
cacheErrorsFuture = forwardError(errors, Role::STORAGE_CACHE, recruited.id(), setWhenDoneOrError(cacheProcessFuture, scInterf, Optional<std::pair<uint16_t,StorageServerInterface>>()));
|
||||
scInterf->set(std::make_pair(reply.storageCache.get(), recruited));
|
||||
}
|
||||
}
|
||||
when ( wait( ccInterface->onChange() )) {}
|
||||
when ( wait( ddInterf->onChange() ) ) {}
|
||||
when ( wait( rkInterf->onChange() ) ) {}
|
||||
when ( wait( scInterf->onChange() ) ) {}
|
||||
when ( wait( degraded->onChange() ) ) {}
|
||||
when ( wait( FlowTransport::transport().onIncompatibleChanged() ) ) {}
|
||||
when ( wait( issues->onChange() ) ) {}
|
||||
|
@ -712,6 +683,41 @@ ACTOR Future<Void> storageServerRollbackRebooter( Future<Void> prevStorageServer
|
|||
}
|
||||
}
|
||||
|
||||
ACTOR Future<Void> storageCacheRollbackRebooter( Future<Void> prevStorageCache, UID id, LocalityData locality, Reference<AsyncVar<ServerDBInfo>> db) {
|
||||
loop {
|
||||
ErrorOr<Void> e = wait( errorOr( prevStorageCache) );
|
||||
if (!e.isError()) {
|
||||
TraceEvent("StorageCacheRequestedReboot1", id);
|
||||
return Void();
|
||||
}
|
||||
else if (e.getError().code() != error_code_please_reboot && e.getError().code() != error_code_worker_removed) {
|
||||
TraceEvent("StorageCacheRequestedReboot2", id).detail("Code",e.getError().code());
|
||||
throw e.getError();
|
||||
}
|
||||
|
||||
TraceEvent("StorageCacheRequestedReboot", id);
|
||||
|
||||
StorageServerInterface recruited;
|
||||
recruited.uniqueID = deterministicRandom()->randomUniqueID();// id;
|
||||
recruited.locality = locality;
|
||||
recruited.initEndpoints();
|
||||
|
||||
DUMPTOKEN(recruited.getValue);
|
||||
DUMPTOKEN(recruited.getKey);
|
||||
DUMPTOKEN(recruited.getKeyValues);
|
||||
DUMPTOKEN(recruited.getShardState);
|
||||
DUMPTOKEN(recruited.waitMetrics);
|
||||
DUMPTOKEN(recruited.splitMetrics);
|
||||
DUMPTOKEN(recruited.getStorageMetrics);
|
||||
DUMPTOKEN(recruited.waitFailure);
|
||||
DUMPTOKEN(recruited.getQueuingMetrics);
|
||||
DUMPTOKEN(recruited.getKeyValueStoreType);
|
||||
DUMPTOKEN(recruited.watchValue);
|
||||
|
||||
prevStorageCache = storageCacheServer(recruited, 0, db);
|
||||
}
|
||||
}
|
||||
|
||||
// FIXME: This will not work correctly in simulation as all workers would share the same roles map
|
||||
std::set<std::pair<std::string, std::string>> g_roles;
|
||||
|
||||
|
@ -1049,10 +1055,40 @@ ACTOR Future<Void> workerServer(
|
|||
}
|
||||
}
|
||||
|
||||
bool hasCache = false;
|
||||
// start cache role if we have the right process class
|
||||
if (initialClass.classType() == ProcessClass::StorageCacheClass) {
|
||||
hasCache = true;
|
||||
StorageServerInterface recruited;
|
||||
recruited.locality = locality;
|
||||
recruited.initEndpoints();
|
||||
|
||||
std::map<std::string, std::string> details;
|
||||
startRole(Role::STORAGE_CACHE, recruited.id(), interf.id(), details);
|
||||
|
||||
// DUMPTOKEN(recruited.getVersion);
|
||||
DUMPTOKEN(recruited.getValue);
|
||||
DUMPTOKEN(recruited.getKey);
|
||||
DUMPTOKEN(recruited.getKeyValues);
|
||||
DUMPTOKEN(recruited.getShardState);
|
||||
DUMPTOKEN(recruited.waitMetrics);
|
||||
DUMPTOKEN(recruited.splitMetrics);
|
||||
DUMPTOKEN(recruited.getStorageMetrics);
|
||||
DUMPTOKEN(recruited.waitFailure);
|
||||
DUMPTOKEN(recruited.getQueuingMetrics);
|
||||
DUMPTOKEN(recruited.getKeyValueStoreType);
|
||||
DUMPTOKEN(recruited.watchValue);
|
||||
|
||||
auto f = storageCacheServer(recruited, 0, dbInfo);
|
||||
f = storageCacheRollbackRebooter( f, recruited.id(), recruited.locality, dbInfo);
|
||||
errorForwarders.add(forwardError(errors, Role::STORAGE_CACHE, recruited.id(), f));
|
||||
}
|
||||
|
||||
std::map<std::string, std::string> details;
|
||||
details["Locality"] = locality.toString();
|
||||
details["DataFolder"] = folder;
|
||||
details["StoresPresent"] = format("%d", stores.size());
|
||||
details["CachePresent"] = hasCache ? "true" : "false";
|
||||
startRole( Role::WORKER, interf.id(), interf.id(), details );
|
||||
errorForwarders.add(traceRole(Role::WORKER, interf.id()));
|
||||
|
||||
|
@ -1350,7 +1386,7 @@ ACTOR Future<Void> workerServer(
|
|||
DUMPTOKEN( recruited.getQueuingMetrics );
|
||||
DUMPTOKEN( recruited.confirmRunning );
|
||||
|
||||
errorForwarders.add( zombie(recruited, forwardError( errors, Role::LOG_ROUTER, recruited.id(),
|
||||
errorForwarders.add( zombie(recruited, forwardError( errors, Role::LOG_ROUTER, recruited.id(),
|
||||
logRouter( recruited, req, dbInfo ) ) ) );
|
||||
req.reply.send(recruited);
|
||||
}
|
||||
|
@ -1672,7 +1708,7 @@ ACTOR Future<Void> monitorLeaderRemotelyWithDelayedCandidacy( Reference<ClusterC
|
|||
if(currentCC->get().present() && dbInfo->get().clusterInterface == currentCC->get().get() && IFailureMonitor::failureMonitor().getState( currentCC->get().get().registerWorker.getEndpoint() ).isAvailable()) {
|
||||
timeout = Future<Void>();
|
||||
} else if(!timeout.isValid()) {
|
||||
timeout = delay( SERVER_KNOBS->MIN_DELAY_STORAGE_CANDIDACY_SECONDS + (deterministicRandom()->random01()*(SERVER_KNOBS->MAX_DELAY_STORAGE_CANDIDACY_SECONDS-SERVER_KNOBS->MIN_DELAY_STORAGE_CANDIDACY_SECONDS)) );
|
||||
timeout = delay( SERVER_KNOBS->MIN_DELAY_CC_WORST_FIT_CANDIDACY_SECONDS + (deterministicRandom()->random01()*(SERVER_KNOBS->MAX_DELAY_CC_WORST_FIT_CANDIDACY_SECONDS-SERVER_KNOBS->MIN_DELAY_CC_WORST_FIT_CANDIDACY_SECONDS)) );
|
||||
}
|
||||
choose {
|
||||
when( wait(currentCC->onChange()) ) {}
|
||||
|
@ -1729,9 +1765,9 @@ ACTOR Future<Void> fdbd(
|
|||
Reference<AsyncVar<ServerDBInfo>> dbInfo( new AsyncVar<ServerDBInfo>(ServerDBInfo()) );
|
||||
|
||||
actors.push_back(reportErrors(monitorAndWriteCCPriorityInfo(fitnessFilePath, asyncPriorityInfo), "MonitorAndWriteCCPriorityInfo"));
|
||||
if (processClass == ProcessClass::TesterClass) {
|
||||
if (processClass.machineClassFitness(ProcessClass::ClusterController) == ProcessClass::NeverAssign) {
|
||||
actors.push_back( reportErrors( monitorLeader( connFile, cc ), "ClusterController" ) );
|
||||
} else if (processClass == ProcessClass::StorageClass && SERVER_KNOBS->MAX_DELAY_STORAGE_CANDIDACY_SECONDS > 0) {
|
||||
} else if (processClass.machineClassFitness(ProcessClass::ClusterController) == ProcessClass::WorstFit && SERVER_KNOBS->MAX_DELAY_CC_WORST_FIT_CANDIDACY_SECONDS > 0) {
|
||||
actors.push_back( reportErrors( monitorLeaderRemotelyWithDelayedCandidacy( connFile, cc, asyncPriorityInfo, recoveredDiskFiles.getFuture(), localities, dbInfo ), "ClusterController" ) );
|
||||
} else {
|
||||
actors.push_back( reportErrors( clusterController( connFile, cc , asyncPriorityInfo, recoveredDiskFiles.getFuture(), localities ), "ClusterController") );
|
||||
|
|
|
@ -507,9 +507,9 @@ struct BackupAndParallelRestoreCorrectnessWorkload : TestWorkload {
|
|||
}
|
||||
|
||||
// Wait for parallel restore to finish before we can proceed
|
||||
TraceEvent("FastRestore").detail("BackupAndParallelRestore", "WaitForRestoreToFinish");
|
||||
TraceEvent("FastRestoreWorkload").detail("WaitForRestoreToFinish", randomID);
|
||||
wait(backupAgent.parallelRestoreFinish(cx, randomID));
|
||||
TraceEvent("FastRestore").detail("BackupAndParallelRestore", "RestoreFinished");
|
||||
TraceEvent("FastRestoreWorkload").detail("RestoreFinished", randomID);
|
||||
|
||||
for (auto& restore : restores) {
|
||||
ASSERT(!restore.isError());
|
||||
|
@ -668,7 +668,7 @@ struct BackupAndParallelRestoreCorrectnessWorkload : TestWorkload {
|
|||
g_simulator.backupAgents = ISimulator::NoBackupAgents;
|
||||
}
|
||||
} catch (Error& e) {
|
||||
TraceEvent(SevError, "BackupAndRestoreCorrectness").error(e).GetLastError();
|
||||
TraceEvent(SevError, "BackupAndParallelRestoreCorrectness").error(e).GetLastError();
|
||||
throw;
|
||||
}
|
||||
return Void();
|
||||
|
|
|
@ -0,0 +1,33 @@
|
|||
#include "fdbclient/ManagementAPI.actor.h"
|
||||
#include "fdbserver/TesterInterface.actor.h"
|
||||
#include "fdbserver/workloads/workloads.actor.h"
|
||||
#include "flow/actorcompiler.h" // This must be the last #include.
|
||||
|
||||
struct CacheWorkload : TestWorkload {
|
||||
Key keyPrefix;
|
||||
|
||||
CacheWorkload(WorkloadContext const& wcx)
|
||||
: TestWorkload(wcx)
|
||||
{
|
||||
keyPrefix = unprintable( getOption(options, LiteralStringRef("keyPrefix"), LiteralStringRef("")).toString() );
|
||||
}
|
||||
|
||||
virtual std::string description() { return "CacheWorkload"; }
|
||||
virtual Future<Void> setup( Database const& cx ) {
|
||||
if (clientId == 0) {
|
||||
//Call management API to cache keys under the given prefix
|
||||
return addCachedRange(cx, prefixRange(keyPrefix));
|
||||
}
|
||||
return Void();
|
||||
}
|
||||
virtual Future<Void> start( Database const& cx ) {
|
||||
return Void();
|
||||
}
|
||||
virtual Future<bool> check( Database const& cx ) {
|
||||
return true;
|
||||
}
|
||||
virtual void getMetrics( vector<PerfMetric>& m ) {
|
||||
}
|
||||
};
|
||||
|
||||
WorkloadFactory<CacheWorkload> CacheWorkloadFactory("Cache");
|
|
@ -21,6 +21,7 @@
|
|||
#include <math.h>
|
||||
|
||||
#include "flow/IRandom.h"
|
||||
#include "flow/Tracing.h"
|
||||
#include "fdbclient/NativeAPI.actor.h"
|
||||
#include "fdbserver/TesterInterface.actor.h"
|
||||
#include "fdbserver/workloads/workloads.actor.h"
|
||||
|
@ -376,12 +377,16 @@ struct ConsistencyCheckWorkload : TestWorkload
|
|||
state Key begin = keyServersKeys.begin;
|
||||
state Key end = keyServersKeys.end;
|
||||
state int limitKeyServers = BUGGIFY ? 1 : 100;
|
||||
state Span span(deterministicRandom()->randomUniqueID(), "WL:ConsistencyCheck"_loc);
|
||||
|
||||
while (begin < end) {
|
||||
state Reference<ProxyInfo> proxyInfo = wait(cx->getMasterProxiesFuture(false));
|
||||
keyServerLocationFutures.clear();
|
||||
for (int i = 0; i < proxyInfo->size(); i++)
|
||||
keyServerLocationFutures.push_back(proxyInfo->get(i, &MasterProxyInterface::getKeyServersLocations).getReplyUnlessFailedFor(GetKeyServerLocationsRequest(begin, end, limitKeyServers, false, Arena()), 2, 0));
|
||||
keyServerLocationFutures.push_back(
|
||||
proxyInfo->get(i, &MasterProxyInterface::getKeyServersLocations)
|
||||
.getReplyUnlessFailedFor(
|
||||
GetKeyServerLocationsRequest(span->context, begin, end, limitKeyServers, false, Arena()), 2, 0));
|
||||
|
||||
state bool keyServersInsertedForThisIteration = false;
|
||||
choose {
|
||||
|
@ -708,6 +713,7 @@ struct ConsistencyCheckWorkload : TestWorkload
|
|||
state vector<UID> storageServers = (isRelocating) ? destStorageServers : sourceStorageServers;
|
||||
state vector<StorageServerInterface> storageServerInterfaces;
|
||||
|
||||
//TraceEvent("ConsistencyCheck_GetStorageInfo").detail("StorageServers", storageServers.size());
|
||||
loop {
|
||||
try {
|
||||
vector< Future< Optional<Value> > > serverListEntries;
|
||||
|
@ -720,6 +726,7 @@ struct ConsistencyCheckWorkload : TestWorkload
|
|||
else if (self->performQuiescentChecks)
|
||||
self->testFailure("/FF/serverList changing in a quiescent database");
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
catch(Error &e) {
|
||||
|
@ -917,7 +924,7 @@ struct ConsistencyCheckWorkload : TestWorkload
|
|||
else if(!isRelocating)
|
||||
{
|
||||
TraceEvent("ConsistencyCheck_StorageServerUnavailable").suppressFor(1.0).detail("StorageServer", storageServers[j]).detail("ShardBegin", printable(range.begin)).detail("ShardEnd", printable(range.end))
|
||||
.detail("Address", storageServerInterfaces[j].address()).detail("GetKeyValuesToken", storageServerInterfaces[j].getKeyValues.getEndpoint().token);
|
||||
.detail("Address", storageServerInterfaces[j].address()).detail("UID", storageServerInterfaces[j].id()).detail("GetKeyValuesToken", storageServerInterfaces[j].getKeyValues.getEndpoint().token);
|
||||
|
||||
//All shards should be available in quiscence
|
||||
if(self->performQuiescentChecks)
|
||||
|
|
|
@ -18,15 +18,21 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "fdbclient/FDBOptions.g.h"
|
||||
#include "fdbclient/NativeAPI.actor.h"
|
||||
#include "fdbserver/TesterInterface.actor.h"
|
||||
#include "fdbserver/workloads/workloads.actor.h"
|
||||
#include "fdbserver/workloads/BulkSetup.actor.h"
|
||||
#include "flow/Arena.h"
|
||||
#include "flow/IRandom.h"
|
||||
#include "flow/Trace.h"
|
||||
#include "flow/actorcompiler.h" // This must be the last #include.
|
||||
#include "flow/serialize.h"
|
||||
#include <cstring>
|
||||
|
||||
struct CycleWorkload : TestWorkload {
|
||||
int actorCount, nodeCount;
|
||||
double testDuration, transactionsPerSecond, minExpectedTransactionsPerSecond;
|
||||
double testDuration, transactionsPerSecond, minExpectedTransactionsPerSecond, traceParentProbability;
|
||||
Key keyPrefix;
|
||||
|
||||
vector<Future<Void>> clients;
|
||||
|
@ -38,12 +44,13 @@ struct CycleWorkload : TestWorkload {
|
|||
transactions("Transactions"), retries("Retries"), totalLatency("Latency"),
|
||||
tooOldRetries("Retries.too_old"), commitFailedRetries("Retries.commit_failed")
|
||||
{
|
||||
testDuration = getOption( options, LiteralStringRef("testDuration"), 10.0 );
|
||||
transactionsPerSecond = getOption( options, LiteralStringRef("transactionsPerSecond"), 5000.0 ) / clientCount;
|
||||
actorCount = getOption( options, LiteralStringRef("actorsPerClient"), transactionsPerSecond / 5 );
|
||||
nodeCount = getOption(options, LiteralStringRef("nodeCount"), transactionsPerSecond * clientCount);
|
||||
keyPrefix = unprintable( getOption(options, LiteralStringRef("keyPrefix"), LiteralStringRef("")).toString() );
|
||||
minExpectedTransactionsPerSecond = transactionsPerSecond * getOption(options, LiteralStringRef("expectedRate"), 0.7);
|
||||
testDuration = getOption( options, "testDuration"_sr, 10.0 );
|
||||
transactionsPerSecond = getOption( options, "transactionsPerSecond"_sr, 5000.0 ) / clientCount;
|
||||
actorCount = getOption( options, "actorsPerClient"_sr, transactionsPerSecond / 5 );
|
||||
nodeCount = getOption(options, "nodeCount"_sr, transactionsPerSecond * clientCount);
|
||||
keyPrefix = unprintable( getOption(options, "keyPrefix"_sr, LiteralStringRef("")).toString() );
|
||||
traceParentProbability = getOption(options, "traceParentProbability "_sr, 0.01);
|
||||
minExpectedTransactionsPerSecond = transactionsPerSecond * getOption(options, "expectedRate"_sr, 0.7);
|
||||
}
|
||||
|
||||
virtual std::string description() { return "CycleWorkload"; }
|
||||
|
@ -98,6 +105,12 @@ struct CycleWorkload : TestWorkload {
|
|||
state double tstart = now();
|
||||
state int r = deterministicRandom()->randomInt(0, self->nodeCount);
|
||||
state Transaction tr(cx);
|
||||
if (deterministicRandom()->random01() >= self->traceParentProbability) {
|
||||
state Span span("CycleClient"_loc);
|
||||
TraceEvent("CycleTracingTransaction", span->context);
|
||||
tr.setOption(FDBTransactionOptions::SPAN_PARENT,
|
||||
BinaryWriter::toValue(span->context, Unversioned()));
|
||||
}
|
||||
while (true) {
|
||||
try {
|
||||
// Reverse next and next^2 node
|
||||
|
@ -115,9 +128,9 @@ struct CycleWorkload : TestWorkload {
|
|||
tr.set( self->key(r), self->value(r3) );
|
||||
tr.set( self->key(r2), self->value(r4) );
|
||||
tr.set( self->key(r3), self->value(r2) );
|
||||
// TraceEvent("CyclicTestMX").detail("Key", self->key(r).toString()).detail("Value", self->value(r3).toString());
|
||||
// TraceEvent("CyclicTestMX").detail("Key", self->key(r2).toString()).detail("Value", self->value(r4).toString());
|
||||
// TraceEvent("CyclicTestMX").detail("Key", self->key(r3).toString()).detail("Value", self->value(r2).toString());
|
||||
//TraceEvent("CyclicTestMX1").detail("Key", self->key(r).toString()).detail("Value", self->value(r3).toString());
|
||||
//TraceEvent("CyclicTestMX2").detail("Key", self->key(r2).toString()).detail("Value", self->value(r4).toString());
|
||||
//TraceEvent("CyclicTestMX3").detail("Key", self->key(r3).toString()).detail("Value", self->value(r2).toString());
|
||||
|
||||
wait( tr.commit() );
|
||||
// TraceEvent("CycleCommit");
|
||||
|
@ -161,7 +174,10 @@ struct CycleWorkload : TestWorkload {
|
|||
return false;
|
||||
}
|
||||
int i=0;
|
||||
for(int c=0; c<nodeCount; c++) {
|
||||
int iPrev=0;
|
||||
double d;
|
||||
int c;
|
||||
for(c=0; c<nodeCount; c++) {
|
||||
if (c && !i) {
|
||||
TraceEvent(SevError, "TestFailure").detail("Reason", "Cycle got shorter").detail("Before", nodeCount).detail("After", c).detail("KeyPrefix", keyPrefix.printable());
|
||||
logTestData(data);
|
||||
|
@ -172,7 +188,8 @@ struct CycleWorkload : TestWorkload {
|
|||
logTestData(data);
|
||||
return false;
|
||||
}
|
||||
double d = testKeyToDouble(data[i].value, keyPrefix);
|
||||
d = testKeyToDouble(data[i].value, keyPrefix);
|
||||
iPrev = i;
|
||||
i = (int)d;
|
||||
if ( i != d || i<0 || i>=nodeCount) {
|
||||
TraceEvent(SevError, "TestFailure").detail("Reason", "Invalid value").detail("KeyPrefix", keyPrefix.printable());
|
||||
|
@ -181,7 +198,8 @@ struct CycleWorkload : TestWorkload {
|
|||
}
|
||||
}
|
||||
if (i != 0) {
|
||||
TraceEvent(SevError, "TestFailure").detail("Reason", "Cycle got longer").detail("KeyPrefix", keyPrefix.printable());
|
||||
TraceEvent(SevError, "TestFailure").detail("Reason", "Cycle got longer").detail("KeyPrefix", keyPrefix.printable()).detail("Key", key(i)).detail("Value", data[i].value).
|
||||
detail("Iteration", c).detail("Nodecount", nodeCount).detail("Int", i).detail("Double", d).detail("ValuePrev", data[iPrev].value).detail("KeyPrev", data[iPrev].key);
|
||||
logTestData(data);
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -59,7 +59,9 @@ struct ExceptionContract {
|
|||
e.code() == error_code_transaction_cancelled ||
|
||||
e.code() == error_code_key_too_large ||
|
||||
e.code() == error_code_value_too_large ||
|
||||
e.code() == error_code_process_behind)
|
||||
e.code() == error_code_process_behind ||
|
||||
e.code() == error_code_batch_transaction_throttled ||
|
||||
e.code() == error_code_tag_throttled)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -226,15 +226,21 @@ struct ReadWriteWorkload : KVWorkload {
|
|||
ACTOR static Future<bool> traceDumpWorkers( Reference<AsyncVar<ServerDBInfo>> db ) {
|
||||
try {
|
||||
loop {
|
||||
ErrorOr<std::vector<WorkerDetails>> workerList = wait( db->get().clusterInterface.getWorkers.tryGetReply( GetWorkersRequest() ) );
|
||||
if( workerList.present() ) {
|
||||
std::vector<Future<ErrorOr<Void>>> dumpRequests;
|
||||
for( int i = 0; i < workerList.get().size(); i++)
|
||||
dumpRequests.push_back( workerList.get()[i].interf.traceBatchDumpRequest.tryGetReply( TraceBatchDumpRequest() ) );
|
||||
wait( waitForAll( dumpRequests ) );
|
||||
return true;
|
||||
choose {
|
||||
when( wait( db->onChange() ) ) {}
|
||||
|
||||
when (ErrorOr<std::vector<WorkerDetails>> workerList = wait( db->get().clusterInterface.getWorkers.tryGetReply( GetWorkersRequest() ) );)
|
||||
{
|
||||
if( workerList.present() ) {
|
||||
std::vector<Future<ErrorOr<Void>>> dumpRequests;
|
||||
for( int i = 0; i < workerList.get().size(); i++)
|
||||
dumpRequests.push_back( workerList.get()[i].interf.traceBatchDumpRequest.tryGetReply( TraceBatchDumpRequest() ) );
|
||||
wait( waitForAll( dumpRequests ) );
|
||||
return true;
|
||||
}
|
||||
wait( delay( 1.0 ) );
|
||||
}
|
||||
}
|
||||
wait( delay( 1.0 ) );
|
||||
}
|
||||
} catch( Error &e ) {
|
||||
TraceEvent(SevError, "FailedToDumpWorkers").error(e);
|
||||
|
|
|
@ -26,6 +26,8 @@ void forceLinkIndexedSetTests();
|
|||
void forceLinkDequeTests();
|
||||
void forceLinkFlowTests();
|
||||
void forceLinkVersionedMapTests();
|
||||
void forceLinkMemcpyTests();
|
||||
void forceLinkMemcpyPerfTests();
|
||||
|
||||
struct UnitTestWorkload : TestWorkload {
|
||||
bool enabled;
|
||||
|
@ -45,6 +47,8 @@ struct UnitTestWorkload : TestWorkload {
|
|||
forceLinkDequeTests();
|
||||
forceLinkFlowTests();
|
||||
forceLinkVersionedMapTests();
|
||||
forceLinkMemcpyTests();
|
||||
forceLinkMemcpyPerfTests();
|
||||
}
|
||||
|
||||
virtual std::string description() { return "UnitTests"; }
|
||||
|
|
61
flow/Arena.h
61
flow/Arena.h
|
@ -95,9 +95,9 @@ public:
|
|||
inline explicit Arena( size_t reservedSize );
|
||||
//~Arena();
|
||||
Arena(const Arena&);
|
||||
Arena(Arena && r) BOOST_NOEXCEPT;
|
||||
Arena(Arena&& r) noexcept;
|
||||
Arena& operator=(const Arena&);
|
||||
Arena& operator=(Arena&&) BOOST_NOEXCEPT;
|
||||
Arena& operator=(Arena&&) noexcept;
|
||||
|
||||
inline void dependsOn( const Arena& p );
|
||||
inline size_t getSize() const;
|
||||
|
@ -173,12 +173,12 @@ inline Arena::Arena(size_t reservedSize) : impl( 0 ) {
|
|||
ArenaBlock::create((int)reservedSize,impl);
|
||||
}
|
||||
inline Arena::Arena( const Arena& r ) : impl( r.impl ) {}
|
||||
inline Arena::Arena(Arena && r) BOOST_NOEXCEPT : impl(std::move(r.impl)) {}
|
||||
inline Arena::Arena(Arena&& r) noexcept : impl(std::move(r.impl)) {}
|
||||
inline Arena& Arena::operator=(const Arena& r) {
|
||||
impl = r.impl;
|
||||
return *this;
|
||||
}
|
||||
inline Arena& Arena::operator=(Arena&& r) BOOST_NOEXCEPT {
|
||||
inline Arena& Arena::operator=(Arena&& r) noexcept {
|
||||
impl = std::move(r.impl);
|
||||
return *this;
|
||||
}
|
||||
|
@ -380,12 +380,11 @@ public:
|
|||
}
|
||||
#else
|
||||
Standalone( const T& t, const Arena& arena ) : Arena( arena ), T( t ) {}
|
||||
Standalone( const Standalone<T> & t ) : Arena((Arena const&)t), T((T const&)t) {}
|
||||
Standalone<T>& operator=( const Standalone<T> & t ) {
|
||||
*(Arena*)this = (Arena const&)t;
|
||||
*(T*)this = (T const&)t;
|
||||
return *this;
|
||||
}
|
||||
Standalone(const Standalone<T>&) = default;
|
||||
Standalone<T>& operator=(const Standalone<T>&) = default;
|
||||
Standalone(Standalone<T>&&) = default;
|
||||
Standalone<T>& operator=(Standalone<T>&&) = default;
|
||||
~Standalone() = default;
|
||||
#endif
|
||||
|
||||
template <class U> Standalone<U> castTo() const {
|
||||
|
@ -632,6 +631,9 @@ struct Traceable<Standalone<T>> : std::conditional<Traceable<T>::value, std::tru
|
|||
};
|
||||
|
||||
#define LiteralStringRef( str ) StringRef( (const uint8_t*)(str), sizeof((str))-1 )
|
||||
inline StringRef operator "" _sr(const char* str, size_t size) {
|
||||
return StringRef(reinterpret_cast<const uint8_t*>(str), size);
|
||||
}
|
||||
|
||||
// makeString is used to allocate a Standalone<StringRef> of a known length for later
|
||||
// mutation (via mutateString). If you need to append to a string of unknown length,
|
||||
|
@ -710,15 +712,20 @@ inline bool operator != (const StringRef& lhs, const StringRef& rhs ) { return !
|
|||
inline bool operator <= ( const StringRef& lhs, const StringRef& rhs ) { return !(lhs>rhs); }
|
||||
inline bool operator >= ( const StringRef& lhs, const StringRef& rhs ) { return !(lhs<rhs); }
|
||||
|
||||
// This trait is used by VectorRef to determine if it should just memcpy the vector contents.
|
||||
// FIXME: VectorRef really should use std::is_trivially_copyable for this BUT that is not implemented
|
||||
// in gcc c++0x so instead we will use this custom trait which defaults to std::is_trivial, which
|
||||
// handles most situations but others will have to be specialized.
|
||||
// This trait is used by VectorRef to determine if deep copy constructor should recursively
|
||||
// call deep copies of each element.
|
||||
//
|
||||
// TODO: There should be an easier way to identify the difference between flow_ref and non-flow_ref types.
|
||||
// std::is_trivially_copyable does not work because some flow_ref types are trivially copyable
|
||||
// and some non-flow_ref types are not trivially copyable.
|
||||
template <typename T>
|
||||
struct memcpy_able : std::is_trivial<T> {};
|
||||
struct flow_ref : std::integral_constant<bool, !std::is_fundamental_v<T>> {};
|
||||
|
||||
template <>
|
||||
struct memcpy_able<UID> : std::integral_constant<bool, true> {};
|
||||
struct flow_ref<UID> : std::integral_constant<bool, false> {};
|
||||
|
||||
template <class A, class B>
|
||||
struct flow_ref<std::pair<A, B>> : std::integral_constant<bool, false> {};
|
||||
|
||||
template<class T>
|
||||
struct string_serialized_traits : std::false_type {
|
||||
|
@ -794,7 +801,7 @@ public:
|
|||
using value_type = T;
|
||||
static_assert(SerStrategy == VecSerStrategy::FlatBuffers || string_serialized_traits<T>::value);
|
||||
|
||||
// T must be trivially destructible (and copyable)!
|
||||
// T must be trivially destructible!
|
||||
VectorRef() : data(0), m_size(0), m_capacity(0) {}
|
||||
|
||||
template <VecSerStrategy S>
|
||||
|
@ -809,19 +816,19 @@ public:
|
|||
return *this;
|
||||
}
|
||||
|
||||
// Arena constructor for non-Ref types, identified by memcpy_able
|
||||
// Arena constructor for non-Ref types, identified by !flow_ref
|
||||
template <class T2 = T, VecSerStrategy S>
|
||||
VectorRef(Arena& p, const VectorRef<T, S>& toCopy, typename std::enable_if<memcpy_able<T2>::value, int>::type = 0)
|
||||
VectorRef(Arena& p, const VectorRef<T, S>& toCopy, typename std::enable_if<!flow_ref<T2>::value, int>::type = 0)
|
||||
: VPS(toCopy), data((T*)new (p) uint8_t[sizeof(T) * toCopy.size()]), m_size(toCopy.size()),
|
||||
m_capacity(toCopy.size()) {
|
||||
if (m_size > 0) {
|
||||
memcpy(data, toCopy.data, m_size * sizeof(T));
|
||||
std::copy(toCopy.data, toCopy.data + m_size, data);
|
||||
}
|
||||
}
|
||||
|
||||
// Arena constructor for Ref types, which must have an Arena constructor
|
||||
template <class T2 = T, VecSerStrategy S>
|
||||
VectorRef(Arena& p, const VectorRef<T, S>& toCopy, typename std::enable_if<!memcpy_able<T2>::value, int>::type = 0)
|
||||
VectorRef(Arena& p, const VectorRef<T, S>& toCopy, typename std::enable_if<flow_ref<T2>::value, int>::type = 0)
|
||||
: VPS(), data((T*)new (p) uint8_t[sizeof(T) * toCopy.size()]), m_size(toCopy.size()), m_capacity(toCopy.size()) {
|
||||
for (int i = 0; i < m_size; i++) {
|
||||
auto ptr = new (&data[i]) T(p, toCopy[i]);
|
||||
|
@ -917,7 +924,7 @@ public:
|
|||
if (m_size + count > m_capacity) reallocate(p, m_size + count);
|
||||
VPS::invalidate();
|
||||
if (count > 0) {
|
||||
memcpy(data + m_size, begin, sizeof(T) * count);
|
||||
std::copy(begin, begin + count, data + m_size);
|
||||
}
|
||||
m_size += count;
|
||||
}
|
||||
|
@ -957,15 +964,15 @@ public:
|
|||
if (size > m_capacity) reallocate(p, size);
|
||||
}
|
||||
|
||||
// expectedSize() for non-Ref types, identified by memcpy_able
|
||||
// expectedSize() for non-Ref types, identified by !flow_ref
|
||||
template <class T2 = T>
|
||||
typename std::enable_if<memcpy_able<T2>::value, size_t>::type expectedSize() const {
|
||||
typename std::enable_if<!flow_ref<T2>::value, size_t>::type expectedSize() const {
|
||||
return sizeof(T) * m_size;
|
||||
}
|
||||
|
||||
// expectedSize() for Ref types, which must in turn have expectedSize() implemented.
|
||||
template <class T2 = T>
|
||||
typename std::enable_if<!memcpy_able<T2>::value, size_t>::type expectedSize() const {
|
||||
typename std::enable_if<flow_ref<T2>::value, size_t>::type expectedSize() const {
|
||||
size_t t = sizeof(T) * m_size;
|
||||
for (int i = 0; i < m_size; i++) t += data[i].expectedSize();
|
||||
return t;
|
||||
|
@ -982,9 +989,9 @@ private:
|
|||
void reallocate(Arena& p, int requiredCapacity) {
|
||||
requiredCapacity = std::max(m_capacity * 2, requiredCapacity);
|
||||
// SOMEDAY: Maybe we are right at the end of the arena and can expand cheaply
|
||||
T* newData = (T*)new (p) uint8_t[requiredCapacity * sizeof(T)];
|
||||
T* newData = new (p) T[requiredCapacity];
|
||||
if (m_size > 0) {
|
||||
memcpy(newData, data, m_size * sizeof(T));
|
||||
std::move(data, data + m_size, newData);
|
||||
}
|
||||
data = newData;
|
||||
m_capacity = requiredCapacity;
|
||||
|
|
|
@ -28,6 +28,7 @@ set(FLOW_SRCS
|
|||
IRandom.h
|
||||
IThreadPool.cpp
|
||||
IThreadPool.h
|
||||
ITrace.h
|
||||
IndexedSet.actor.h
|
||||
IndexedSet.cpp
|
||||
IndexedSet.h
|
||||
|
@ -61,13 +62,15 @@ set(FLOW_SRCS
|
|||
ThreadSafeQueue.h
|
||||
Trace.cpp
|
||||
Trace.h
|
||||
Tracing.h
|
||||
Tracing.cpp
|
||||
TreeBenchmark.h
|
||||
UnitTest.cpp
|
||||
UnitTest.h
|
||||
XmlTraceLogFormatter.cpp
|
||||
XmlTraceLogFormatter.h
|
||||
actorcompiler.h
|
||||
crc32c.h
|
||||
crc32c.h
|
||||
crc32c.cpp
|
||||
error_definitions.h
|
||||
${CMAKE_CURRENT_BINARY_DIR}/SourceVersion.h
|
||||
|
@ -79,12 +82,19 @@ set(FLOW_SRCS
|
|||
genericactors.actor.h
|
||||
network.cpp
|
||||
network.h
|
||||
rte_memcpy.h
|
||||
serialize.cpp
|
||||
serialize.h
|
||||
stacktrace.amalgamation.cpp
|
||||
stacktrace.h
|
||||
test_memcpy.cpp
|
||||
test_memcpy_perf.cpp
|
||||
version.cpp)
|
||||
|
||||
if(UNIX AND NOT APPLE)
|
||||
list(APPEND FLOW_SRCS folly_memcpy.S)
|
||||
endif()
|
||||
|
||||
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/SourceVersion.h.cmake ${CMAKE_CURRENT_BINARY_DIR}/SourceVersion.h)
|
||||
|
||||
add_flow_target(STATIC_LIBRARY NAME flow SRCS ${FLOW_SRCS})
|
||||
|
|
38
flow/Deque.h
38
flow/Deque.h
|
@ -41,21 +41,27 @@ public:
|
|||
Deque() : arr(0), begin(0), end(0), mask(-1) {}
|
||||
|
||||
// TODO: iterator construction, other constructors
|
||||
Deque(Deque const& r) : arr(0), begin(0), end(r.size()), mask(r.mask) {
|
||||
Deque(Deque const& r) : arr(nullptr), begin(0), end(r.size()), mask(r.mask) {
|
||||
if (r.capacity() > 0) {
|
||||
arr = (T*)aligned_alloc(std::max(__alignof(T), sizeof(void*)), capacity() * sizeof(T));
|
||||
ASSERT(arr != nullptr);
|
||||
}
|
||||
ASSERT(capacity() >= end || end == 0);
|
||||
for (uint32_t i=0; i<end; i++)
|
||||
new (&arr[i]) T(r[i]);
|
||||
// FIXME: Specialization for POD types using memcpy?
|
||||
if (r.end < r.capacity()) {
|
||||
std::copy(r.arr + r.begin, r.arr + r.begin + r.size(), arr);
|
||||
} else {
|
||||
// r.begin is always < capacity(), and r.end is always >= r.begin. Mask is used for wrapping r.end.
|
||||
// but if r.end >= r.capacity(), the deque wraps around so the
|
||||
// copy must be performed in two parts
|
||||
auto partTwo = std::copy(r.arr + r.begin, r.arr + r.capacity(), arr);
|
||||
std::copy(r.arr, r.arr + (r.end & r.mask), partTwo);
|
||||
}
|
||||
}
|
||||
|
||||
void operator=(Deque const& r) {
|
||||
cleanup();
|
||||
|
||||
arr = 0;
|
||||
arr = nullptr;
|
||||
begin = 0;
|
||||
end = r.size();
|
||||
mask = r.mask;
|
||||
|
@ -64,26 +70,32 @@ public:
|
|||
ASSERT(arr != nullptr);
|
||||
}
|
||||
ASSERT(capacity() >= end || end == 0);
|
||||
for (uint32_t i=0; i<end; i++)
|
||||
new (&arr[i]) T(r[i]);
|
||||
// FIXME: Specialization for POD types using memcpy?
|
||||
if (r.end < r.capacity()) {
|
||||
std::copy(r.arr + r.begin, r.arr + r.begin + r.size(), arr);
|
||||
} else {
|
||||
// r.begin is always < capacity(), and r.end is always >= r.begin. Mask is used for wrapping r.end.
|
||||
// but if r.end >= r.capacity(), the deque wraps around so the
|
||||
// copy must be performed in two parts
|
||||
auto partTwo = std::copy(r.arr + r.begin, r.arr + r.capacity(), arr);
|
||||
std::copy(r.arr, r.arr + (r.end & r.mask), partTwo);
|
||||
}
|
||||
}
|
||||
|
||||
Deque(Deque&& r) BOOST_NOEXCEPT : begin(r.begin), end(r.end), mask(r.mask), arr(r.arr) {
|
||||
r.arr = 0;
|
||||
Deque(Deque&& r) noexcept : begin(r.begin), end(r.end), mask(r.mask), arr(r.arr) {
|
||||
r.arr = nullptr;
|
||||
r.begin = r.end = 0;
|
||||
r.mask = -1;
|
||||
}
|
||||
|
||||
void operator=(Deque&& r) BOOST_NOEXCEPT {
|
||||
void operator=(Deque&& r) noexcept {
|
||||
cleanup();
|
||||
|
||||
begin = r.begin;
|
||||
end = r.end;
|
||||
mask = r.mask;
|
||||
arr = r.arr;
|
||||
|
||||
r.arr = 0;
|
||||
|
||||
r.arr = nullptr;
|
||||
r.begin = r.end = 0;
|
||||
r.mask = -1;
|
||||
}
|
||||
|
|
|
@ -104,7 +104,7 @@ public:
|
|||
static Reference<P> addRef( P* ptr ) { ptr->addref(); return Reference(ptr); }
|
||||
|
||||
Reference(const Reference& r) : ptr(r.getPtr()) { if (ptr) addref(ptr); }
|
||||
Reference(Reference && r) BOOST_NOEXCEPT : ptr(r.getPtr()) { r.ptr = NULL; }
|
||||
Reference(Reference&& r) noexcept : ptr(r.getPtr()) { r.ptr = NULL; }
|
||||
|
||||
template <class Q>
|
||||
Reference(const Reference<Q>& r) : ptr(r.getPtr()) { if (ptr) addref(ptr); }
|
||||
|
@ -122,7 +122,7 @@ public:
|
|||
}
|
||||
return *this;
|
||||
}
|
||||
Reference& operator=(Reference&& r) BOOST_NOEXCEPT {
|
||||
Reference& operator=(Reference&& r) noexcept {
|
||||
P* oldPtr = ptr;
|
||||
P* newPtr = r.ptr;
|
||||
if (oldPtr != newPtr) {
|
||||
|
|
|
@ -48,6 +48,36 @@
|
|||
#include <fcntl.h>
|
||||
#include <cmath>
|
||||
|
||||
struct IssuesListImpl {
|
||||
IssuesListImpl(){}
|
||||
void addIssue(std::string issue) {
|
||||
MutexHolder h(mutex);
|
||||
issues.insert(issue);
|
||||
}
|
||||
|
||||
void retrieveIssues(std::set<std::string>& out) {
|
||||
MutexHolder h(mutex);
|
||||
for (auto const& i : issues) {
|
||||
out.insert(i);
|
||||
}
|
||||
}
|
||||
|
||||
void resolveIssue(std::string issue) {
|
||||
MutexHolder h(mutex);
|
||||
issues.erase(issue);
|
||||
}
|
||||
|
||||
private:
|
||||
Mutex mutex;
|
||||
std::set<std::string> issues;
|
||||
};
|
||||
|
||||
IssuesList::IssuesList() : impl(new IssuesListImpl{}) {}
|
||||
IssuesList::~IssuesList() { delete impl; }
|
||||
void IssuesList::addIssue(std::string issue) { impl->addIssue(issue); }
|
||||
void IssuesList::retrieveIssues(std::set<std::string> &out) { impl->retrieveIssues(out); }
|
||||
void IssuesList::resolveIssue(std::string issue) { impl->resolveIssue(issue); }
|
||||
|
||||
FileTraceLogWriter::FileTraceLogWriter(std::string directory, std::string processName, std::string basename,
|
||||
std::string extension, uint64_t maxLogsSize, std::function<void()> onError,
|
||||
Reference<ITraceLogIssuesReporter> issues)
|
||||
|
@ -72,8 +102,16 @@ void FileTraceLogWriter::lastError(int err) {
|
|||
}
|
||||
|
||||
void FileTraceLogWriter::write(const std::string& str) {
|
||||
auto ptr = str.c_str();
|
||||
int remaining = str.size();
|
||||
write(str.data(), str.size());
|
||||
}
|
||||
|
||||
void FileTraceLogWriter::write(const StringRef& str) {
|
||||
write(reinterpret_cast<const char*>(str.begin()), str.size());
|
||||
}
|
||||
|
||||
void FileTraceLogWriter::write(const char* str, size_t len) {
|
||||
auto ptr = str;
|
||||
int remaining = len;
|
||||
bool needsResolve = false;
|
||||
|
||||
while ( remaining ) {
|
||||
|
|
|
@ -23,11 +23,29 @@
|
|||
#define FLOW_FILE_TRACE_LOG_WRITER_H
|
||||
#pragma once
|
||||
|
||||
#include "flow/Arena.h"
|
||||
#include "flow/FastRef.h"
|
||||
#include "flow/Trace.h"
|
||||
|
||||
#include <functional>
|
||||
|
||||
struct IssuesListImpl;
|
||||
struct IssuesList : ITraceLogIssuesReporter, ThreadSafeReferenceCounted<IssuesList> {
|
||||
IssuesList();
|
||||
virtual ~IssuesList();
|
||||
void addIssue(std::string issue) override;
|
||||
|
||||
void retrieveIssues(std::set<std::string>& out) override;
|
||||
|
||||
void resolveIssue(std::string issue) override;
|
||||
|
||||
void addref() { ThreadSafeReferenceCounted<IssuesList>::addref(); }
|
||||
void delref() { ThreadSafeReferenceCounted<IssuesList>::delref(); }
|
||||
|
||||
private:
|
||||
IssuesListImpl* impl;
|
||||
};
|
||||
|
||||
class FileTraceLogWriter : public ITraceLogWriter, ReferenceCounted<FileTraceLogWriter> {
|
||||
private:
|
||||
std::string directory;
|
||||
|
@ -42,6 +60,8 @@ private:
|
|||
|
||||
std::function<void()> onError;
|
||||
|
||||
void write(const char* str, size_t size);
|
||||
|
||||
public:
|
||||
FileTraceLogWriter(std::string directory, std::string processName, std::string basename, std::string extension,
|
||||
uint64_t maxLogsSize, std::function<void()> onError, Reference<ITraceLogIssuesReporter> issues);
|
||||
|
@ -51,11 +71,12 @@ public:
|
|||
|
||||
void lastError(int err);
|
||||
|
||||
void write(const std::string& str);
|
||||
void open();
|
||||
void close();
|
||||
void roll();
|
||||
void sync();
|
||||
void write(const std::string& str) override;
|
||||
void write(StringRef const& str) override;
|
||||
void open() override;
|
||||
void close() override;
|
||||
void roll() override;
|
||||
void sync() override;
|
||||
|
||||
void cleanupTraceFiles();
|
||||
};
|
||||
|
|
|
@ -69,23 +69,33 @@ bool operator<(CompatibleWithKey const& l, KeyValueMapPair const& r) {
|
|||
|
||||
class IKeyValueContainer {
|
||||
public:
|
||||
typedef typename IndexedSet<KeyValueMapPair, uint64_t>::iterator iterator;
|
||||
using const_iterator = IndexedSet<KeyValueMapPair, uint64_t>::const_iterator;
|
||||
using iterator = IndexedSet<KeyValueMapPair, uint64_t>::iterator;
|
||||
|
||||
IKeyValueContainer() = default;
|
||||
~IKeyValueContainer() = default;
|
||||
|
||||
bool empty() { return data.empty(); }
|
||||
bool empty() const { return data.empty(); }
|
||||
void clear() { return data.clear(); }
|
||||
|
||||
std::tuple<size_t, size_t, size_t> size() { return std::make_tuple(0, 0, 0); }
|
||||
std::tuple<size_t, size_t, size_t> size() const { return std::make_tuple(0, 0, 0); }
|
||||
|
||||
const_iterator find(const StringRef& key) const { return data.find(key); }
|
||||
iterator find(const StringRef& key) { return data.find(key); }
|
||||
const_iterator begin() const { return data.begin(); }
|
||||
iterator begin() { return data.begin(); }
|
||||
const_iterator cbegin() const { return begin(); }
|
||||
const_iterator end() const { return data.end(); }
|
||||
iterator end() { return data.end(); }
|
||||
const_iterator cend() const { return end(); }
|
||||
|
||||
const_iterator lower_bound(const StringRef& key) const { return data.lower_bound(key); }
|
||||
iterator lower_bound(const StringRef& key) { return data.lower_bound(key); }
|
||||
const_iterator upper_bound(const StringRef& key) const { return data.upper_bound(key); }
|
||||
iterator upper_bound(const StringRef& key) { return data.upper_bound(key); }
|
||||
iterator previous(iterator i) const { return data.previous(i); }
|
||||
const_iterator previous(const_iterator i) const { return data.previous(i); }
|
||||
const_iterator previous(iterator i) const { return data.previous(const_iterator{ i }); }
|
||||
iterator previous(iterator i) { return data.previous(i); }
|
||||
|
||||
void erase(iterator begin, iterator end) { data.erase(begin, end); }
|
||||
iterator insert(const StringRef& key, const StringRef& val, bool replaceExisting = true) {
|
||||
|
@ -96,7 +106,8 @@ public:
|
|||
return data.insert(pairs, replaceExisting);
|
||||
}
|
||||
|
||||
uint64_t sumTo(iterator to) { return data.sumTo(to); }
|
||||
uint64_t sumTo(const_iterator to) const { return data.sumTo(to); }
|
||||
uint64_t sumTo(iterator to) const { return data.sumTo(const_iterator{ to }); }
|
||||
|
||||
static int getElementBytes() { return IndexedSet<KeyValueMapPair, uint64_t>::getElementBytes(); }
|
||||
|
||||
|
|
|
@ -109,5 +109,41 @@ private:
|
|||
|
||||
Reference<IThreadPool> createGenericThreadPool();
|
||||
|
||||
class DummyThreadPool : public IThreadPool, ReferenceCounted<DummyThreadPool> {
|
||||
public:
|
||||
~DummyThreadPool() {}
|
||||
DummyThreadPool() : thread(NULL) {}
|
||||
Future<Void> getError() {
|
||||
return errors.getFuture();
|
||||
}
|
||||
void addThread( IThreadPoolReceiver* userData ) {
|
||||
ASSERT( !thread );
|
||||
thread = userData;
|
||||
}
|
||||
void post( PThreadAction action ) {
|
||||
try {
|
||||
(*action)( thread );
|
||||
} catch (Error& e) {
|
||||
errors.sendError( e );
|
||||
} catch (...) {
|
||||
errors.sendError( unknown_error() );
|
||||
}
|
||||
}
|
||||
Future<Void> stop(Error const& e) {
|
||||
return Void();
|
||||
}
|
||||
void addref() {
|
||||
ReferenceCounted<DummyThreadPool>::addref();
|
||||
}
|
||||
void delref() {
|
||||
ReferenceCounted<DummyThreadPool>::delref();
|
||||
}
|
||||
|
||||
private:
|
||||
IThreadPoolReceiver* thread;
|
||||
Promise<Void> errors;
|
||||
};
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1,61 @@
|
|||
/*
|
||||
* ITrace.h
|
||||
*
|
||||
* This source file is part of the FoundationDB open source project
|
||||
*
|
||||
* Copyright 2018 Apple Inc. and the FoundationDB project authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <set>
|
||||
|
||||
class StringRef;
|
||||
|
||||
struct ITraceLogWriter {
|
||||
virtual void open() = 0;
|
||||
virtual void roll() = 0;
|
||||
virtual void close() = 0;
|
||||
virtual void write(const std::string&) = 0;
|
||||
virtual void write(const StringRef&) = 0;
|
||||
virtual void sync() = 0;
|
||||
|
||||
virtual void addref() = 0;
|
||||
virtual void delref() = 0;
|
||||
};
|
||||
|
||||
class TraceEventFields;
|
||||
|
||||
struct ITraceLogFormatter {
|
||||
virtual const char* getExtension() = 0;
|
||||
virtual const char* getHeader() = 0; // Called when starting a new file
|
||||
virtual const char* getFooter() = 0; // Called when ending a file
|
||||
virtual std::string formatEvent(const TraceEventFields&) = 0; // Called for each event
|
||||
|
||||
virtual void addref() = 0;
|
||||
virtual void delref() = 0;
|
||||
};
|
||||
|
||||
struct ITraceLogIssuesReporter {
|
||||
virtual ~ITraceLogIssuesReporter();
|
||||
virtual void addIssue(std::string issue) = 0;
|
||||
virtual void resolveIssue(std::string issue) = 0;
|
||||
|
||||
virtual void retrieveIssues(std::set<std::string>& out) = 0;
|
||||
|
||||
virtual void addref() = 0;
|
||||
virtual void delref() = 0;
|
||||
};
|
|
@ -31,6 +31,7 @@
|
|||
#include <cstring>
|
||||
#include <deque>
|
||||
#include <random>
|
||||
#include <type_traits>
|
||||
#include "flow/TreeBenchmark.h"
|
||||
#include "flow/UnitTest.h"
|
||||
template <class Node>
|
||||
|
@ -204,18 +205,25 @@ TEST_CASE("/flow/IndexedSet/strings") {
|
|||
template <typename K>
|
||||
struct IndexedSetHarness {
|
||||
using map = IndexedSet<K, int>;
|
||||
using const_result = typename map::const_iterator;
|
||||
using result = typename map::iterator;
|
||||
using key_type = K;
|
||||
|
||||
map s;
|
||||
|
||||
void insert(K const& k) { s.insert(K(k), 1); }
|
||||
result find(K const& k) const { return s.find(k); }
|
||||
result not_found() const { return s.end(); }
|
||||
result begin() const { return s.begin(); }
|
||||
result end() const { return s.end(); }
|
||||
result lower_bound(K const& k) const { return s.lower_bound(k); }
|
||||
result upper_bound(K const& k) const { return s.upper_bound(k); }
|
||||
const_result find(K const& k) const { return s.find(k); }
|
||||
result find(K const& k) { return s.find(k); }
|
||||
const_result not_found() const { return s.end(); }
|
||||
result not_found() { return s.end(); }
|
||||
const_result begin() const { return s.begin(); }
|
||||
result begin() { return s.begin(); }
|
||||
const_result end() const { return s.end(); }
|
||||
result end() { return s.end(); }
|
||||
const_result lower_bound(K const& k) const { return s.lower_bound(k); }
|
||||
result lower_bound(K const& k) { return s.lower_bound(k); }
|
||||
const_result upper_bound(K const& k) const { return s.upper_bound(k); }
|
||||
result upper_bound(K const& k) { return s.upper_bound(k); }
|
||||
void erase(K const& k) { s.erase(k); }
|
||||
};
|
||||
|
||||
|
@ -494,4 +502,60 @@ TEST_CASE("/flow/IndexedSet/all numbers") {
|
|||
return Void();
|
||||
}
|
||||
|
||||
template <class T>
|
||||
static constexpr bool is_const_ref_v = std::is_const_v<typename std::remove_reference_t<T>>;
|
||||
|
||||
TEST_CASE("/flow/IndexedSet/const_iterator") {
|
||||
struct Key {
|
||||
int key;
|
||||
explicit Key(int key) : key(key) {}
|
||||
};
|
||||
struct Metric {
|
||||
int metric;
|
||||
explicit Metric(int metric) : metric(metric) {}
|
||||
};
|
||||
|
||||
IndexedSet<int, int64_t> is;
|
||||
for (int i = 0; i < 10; ++i) is.insert(i, 1);
|
||||
|
||||
IndexedSet<int, int64_t>& ncis = is;
|
||||
static_assert(!is_const_ref_v<decltype(ncis)>);
|
||||
static_assert(!is_const_ref_v<decltype(*ncis.begin())>);
|
||||
static_assert(is_const_ref_v<decltype(*ncis.cbegin())>);
|
||||
static_assert(!is_const_ref_v<decltype(*ncis.previous(ncis.end()))>);
|
||||
static_assert(is_const_ref_v<decltype(*ncis.previous(ncis.cend()))>);
|
||||
static_assert(!is_const_ref_v<decltype(*ncis.index(Metric{ 5 }))>);
|
||||
static_assert(!is_const_ref_v<decltype(*ncis.find(Key{ 5 }))>);
|
||||
static_assert(!is_const_ref_v<decltype(*ncis.upper_bound(Key{ 5 }))>);
|
||||
static_assert(!is_const_ref_v<decltype(*ncis.lower_bound(Key{ 5 }))>);
|
||||
static_assert(!is_const_ref_v<decltype(*ncis.lastLessOrEqual(Key{ 5 }))>);
|
||||
static_assert(!is_const_ref_v<decltype(*ncis.lastItem())>);
|
||||
|
||||
const IndexedSet<int, int64_t>& cis = is;
|
||||
static_assert(is_const_ref_v<decltype(cis)>);
|
||||
static_assert(is_const_ref_v<decltype(*cis.begin())>);
|
||||
static_assert(is_const_ref_v<decltype(*cis.cbegin())>);
|
||||
static_assert(is_const_ref_v<decltype(*cis.previous(cis.end()))>);
|
||||
static_assert(is_const_ref_v<decltype(*cis.previous(cis.cend()))>);
|
||||
static_assert(is_const_ref_v<decltype(*cis.previous(ncis.end()))>);
|
||||
static_assert(is_const_ref_v<decltype(*cis.previous(ncis.cend()))>);
|
||||
static_assert(is_const_ref_v<decltype(*cis.index(Metric{ 5 }))>);
|
||||
static_assert(is_const_ref_v<decltype(*cis.find(Key{ 5 }))>);
|
||||
static_assert(is_const_ref_v<decltype(*cis.upper_bound(Key{ 5 }))>);
|
||||
static_assert(is_const_ref_v<decltype(*cis.lower_bound(Key{ 5 }))>);
|
||||
static_assert(is_const_ref_v<decltype(*cis.lastLessOrEqual(Key{ 5 }))>);
|
||||
static_assert(is_const_ref_v<decltype(*cis.lastItem())>);
|
||||
|
||||
for (auto& val : ncis) {
|
||||
static_assert(!is_const_ref_v<decltype(val)>);
|
||||
}
|
||||
for (const auto& val : ncis) {
|
||||
static_assert(is_const_ref_v<decltype(val)>);
|
||||
}
|
||||
for (auto& val : cis) {
|
||||
static_assert(is_const_ref_v<decltype(val)>);
|
||||
}
|
||||
|
||||
return Void();
|
||||
}
|
||||
void forceLinkIndexedSetTests() {}
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue