Merge remote-tracking branch 'upstream/master' into add-lock-unlock-to-special-keys

This commit is contained in:
Chaoguang Lin 2020-10-08 14:23:16 -07:00
commit c66a775d64
59 changed files with 1634 additions and 668 deletions

View File

@ -153,7 +153,7 @@ void fdb_future_destroy( FDBFuture* f ) {
extern "C" DLLEXPORT
fdb_error_t fdb_future_block_until_ready( FDBFuture* f ) {
CATCH_AND_RETURN( TSAVB(f)->blockUntilReady(); );
CATCH_AND_RETURN(TSAVB(f)->blockUntilReadyCheckOnMainThread(););
}
fdb_bool_t fdb_future_is_error_v22( FDBFuture* f ) {

View File

@ -78,8 +78,9 @@ type Subspace interface {
// FoundationDB keys (corresponding to the prefix of this Subspace).
fdb.KeyConvertible
// All Subspaces implement fdb.ExactRange and fdb.Range, and describe all
// keys logically in this Subspace.
// All Subspaces implement fdb.ExactRange and fdb.Range, and describe all
// keys strictly within the subspace that encode tuples. Specifically,
// this will include all keys in [prefix + '\x00', prefix + '\xff').
fdb.ExactRange
}

View File

@ -1,3 +1,6 @@
set(RUN_JAVA_TESTS ON CACHE BOOL "Run Java unit tests")
set(RUN_JUNIT_TESTS OFF CACHE BOOL "Compile and run junit tests")
set(JAVA_BINDING_SRCS
src/main/com/apple/foundationdb/async/AsyncIterable.java
src/main/com/apple/foundationdb/async/AsyncIterator.java
@ -102,6 +105,10 @@ set(JAVA_TESTS_SRCS
src/test/com/apple/foundationdb/test/WatchTest.java
src/test/com/apple/foundationdb/test/WhileTrueTest.java)
set(JAVA_JUNIT_TESTS
src/junit/com/apple/foundationdb/tuple/AllTests.java
src/junit/com/apple/foundationdb/tuple/ArrayUtilTests.java)
set(GENERATED_JAVA_DIR ${CMAKE_CURRENT_BINARY_DIR}/src/main/com/apple/foundationdb)
file(MAKE_DIRECTORY ${GENERATED_JAVA_DIR})
@ -173,12 +180,6 @@ add_jar(fdb-java ${JAVA_BINDING_SRCS} ${GENERATED_JAVA_FILES} ${CMAKE_SOURCE_DIR
OUTPUT_DIR ${PROJECT_BINARY_DIR}/lib VERSION ${CMAKE_PROJECT_VERSION} MANIFEST ${MANIFEST_FILE})
add_dependencies(fdb-java fdb_java_options fdb_java)
# TODO[mpilman]: The java RPM will require some more effort (mostly on debian). However,
# most people will use the fat-jar, so it is not clear how high this priority is.
#install_jar(fdb-java DESTINATION ${FDB_SHARE_DIR}/java COMPONENT java)
#install(TARGETS fdb_java DESTINATION ${FDB_LIB_DIR} COMPONENT java)
if(NOT OPEN_FOR_IDE)
set(FAT_JAR_BINARIES "NOTFOUND" CACHE STRING
"Path of a directory structure with libraries to include in fat jar (a lib directory)")
@ -252,4 +253,30 @@ if(NOT OPEN_FOR_IDE)
add_dependencies(fat-jar fdb-java)
add_dependencies(fat-jar copy_lib)
add_dependencies(packages fat-jar)
if(RUN_JAVA_TESTS)
set(enabled ENABLED)
else()
set(enabled DISABLED)
endif()
set(TEST_CP ${tests_jar} ${target_jar})
message(STATUS "TEST_CP ${TEST_CP}")
add_java_test(NAME DirectoryTest CLASS_PATH ${TEST_CP}
CLASS com.apple.foundationdb.test.DirectoryTest ${enabled})
if(RUN_JUNIT_TESTS)
file(DOWNLOAD "https://search.maven.org/remotecontent?filepath=junit/junit/4.13/junit-4.13.jar"
${CMAKE_BINARY_DIR}/packages/junit-4.13.jar
EXPECTED_HASH SHA256=4b8532f63bdc0e0661507f947eb324a954d1dbac631ad19c8aa9a00feed1d863)
file(DOWNLOAD "https://repo1.maven.org/maven2/org/hamcrest/hamcrest-all/1.3/hamcrest-all-1.3.jar"
${CMAKE_BINARY_DIR}/packages/hamcrest-all-1.3.jar
EXPECTED_HASH SHA256=4877670629ab96f34f5f90ab283125fcd9acb7e683e66319a68be6eb2cca60de)
add_jar(fdb-junit SOURCES ${JAVA_JUNIT_TESTS} INCLUDE_JARS fdb-java ${CMAKE_BINARY_DIR}/packages/junit-4.13.jar)
get_property(junit_jar_path TARGET fdb-junit PROPERTY JAR_FILE)
add_test(NAME junit
COMMAND ${Java_JAVA_EXECUTABLE}
-cp "${target_jar}:${junit_jar_path}:${CMAKE_BINARY_DIR}/packages/junit-4.13.jar:${CMAKE_BINARY_DIR}/packages/hamcrest-all-1.3.jar"
-Djava.library.path=${CMAKE_BINARY_DIR}/lib
org.junit.runner.JUnitCore "com.apple.foundationdb.tuple.AllTests")
endif()
endif()

View File

@ -27,9 +27,14 @@ import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Random;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import org.junit.Ignore;
/**
* @author Ben
@ -251,7 +256,7 @@ public class ArrayUtilTests {
/**
* Test method for {@link ByteArrayUtil#bisectLeft(java.math.BigInteger[], java.math.BigInteger)}.
*/
@Test
@Test @Ignore
public void testBisectLeft() {
fail("Not yet implemented");
}
@ -259,7 +264,7 @@ public class ArrayUtilTests {
/**
* Test method for {@link ByteArrayUtil#compareUnsigned(byte[], byte[])}.
*/
@Test
@Test @Ignore
public void testCompare() {
fail("Not yet implemented");
}
@ -267,7 +272,7 @@ public class ArrayUtilTests {
/**
* Test method for {@link ByteArrayUtil#findNext(byte[], byte, int)}.
*/
@Test
@Test @Ignore
public void testFindNext() {
fail("Not yet implemented");
}
@ -275,7 +280,7 @@ public class ArrayUtilTests {
/**
* Test method for {@link ByteArrayUtil#findTerminator(byte[], byte, byte, int)}.
*/
@Test
@Test @Ignore
public void testFindTerminator() {
fail("Not yet implemented");
}
@ -283,7 +288,7 @@ public class ArrayUtilTests {
/**
* Test method for {@link ByteArrayUtil#copyOfRange(byte[], int, int)}.
*/
@Test
@Test @Ignore
public void testCopyOfRange() {
fail("Not yet implemented");
}
@ -291,7 +296,7 @@ public class ArrayUtilTests {
/**
* Test method for {@link ByteArrayUtil#strinc(byte[])}.
*/
@Test
@Test @Ignore
public void testStrinc() {
fail("Not yet implemented");
}
@ -299,7 +304,7 @@ public class ArrayUtilTests {
/**
* Test method for {@link ByteArrayUtil#printable(byte[])}.
*/
@Test
@Test @Ignore
public void testPrintable() {
fail("Not yet implemented");
}

View File

@ -34,7 +34,7 @@ public class DirectoryTest {
public static void main(String[] args) throws Exception {
try {
FDB fdb = FDB.selectAPIVersion(700);
try(Database db = fdb.open()) {
try(Database db = args.length > 0 ? fdb.open(args[0]) : fdb.open()) {
runTests(db);
}
}

View File

@ -45,13 +45,13 @@ RUN cd /tmp && curl -L https://github.com/ninja-build/ninja/archive/v1.9.0.zip -
cd .. && rm -rf ninja-1.9.0 ninja.zip
# install openssl
RUN cd /tmp && curl -L https://www.openssl.org/source/openssl-1.1.1d.tar.gz -o openssl.tar.gz &&\
echo "1e3a91bc1f9dfce01af26026f856e064eab4c8ee0a8f457b5ae30b40b8b711f2 openssl.tar.gz" > openssl-sha.txt &&\
RUN cd /tmp && curl -L https://www.openssl.org/source/openssl-1.1.1h.tar.gz -o openssl.tar.gz &&\
echo "5c9ca8774bd7b03e5784f26ae9e9e6d749c9da2438545077e6b3d755a06595d9 openssl.tar.gz" > openssl-sha.txt &&\
sha256sum -c openssl-sha.txt && tar -xzf openssl.tar.gz &&\
cd openssl-1.1.1d && scl enable devtoolset-8 -- ./config CFLAGS="-fPIC -O3" --prefix=/usr/local &&\
cd openssl-1.1.1h && scl enable devtoolset-8 -- ./config CFLAGS="-fPIC -O3" --prefix=/usr/local &&\
scl enable devtoolset-8 -- make -j`nproc` && scl enable devtoolset-8 -- make -j1 install &&\
ln -sv /usr/local/lib64/lib*.so.1.1 /usr/lib64/ &&\
cd /tmp/ && rm -rf /tmp/openssl-1.1.1d /tmp/openssl.tar.gz
cd /tmp/ && rm -rf /tmp/openssl-1.1.1h /tmp/openssl.tar.gz
RUN cd /opt/ && curl -L https://github.com/facebook/rocksdb/archive/v6.10.1.tar.gz -o rocksdb.tar.gz &&\
echo "d573d2f15cdda883714f7e0bc87b814a8d4a53a82edde558f08f940e905541ee rocksdb.tar.gz" > rocksdb-sha.txt &&\
@ -61,8 +61,8 @@ RUN cd /opt/ && curl -L https://github.com/facebook/rocksdb/archive/v6.10.1.tar.
ARG TIMEZONEINFO=America/Los_Angeles
RUN rm -f /etc/localtime && ln -s /usr/share/zoneinfo/${TIMEZONEINFO} /etc/localtime
LABEL version=0.1.15
ENV DOCKER_IMAGEVER=0.1.15
LABEL version=0.1.17
ENV DOCKER_IMAGEVER=0.1.17
ENV JAVA_HOME=/usr/lib/jvm/java-1.8.0
ENV CC=/opt/rh/devtoolset-8/root/usr/bin/gcc
ENV CXX=/opt/rh/devtoolset-8/root/usr/bin/g++

View File

@ -1,4 +1,4 @@
FROM foundationdb/foundationdb-build:0.1.15
FROM foundationdb/foundationdb-build:0.1.17
USER root

View File

@ -2,7 +2,7 @@ version: "3"
services:
common: &common
image: foundationdb/foundationdb-build:0.1.15
image: foundationdb/foundationdb-build:0.1.17
build-setup: &build-setup
<<: *common

View File

@ -363,3 +363,60 @@ function(package_bindingtester)
add_custom_target(bindingtester ALL DEPENDS ${tar_file})
add_dependencies(bindingtester copy_bindingtester_binaries)
endfunction()
function(add_fdbclient_test)
set(options DISABLED ENABLED)
set(oneValueArgs NAME)
set(multiValueArgs COMMAND)
cmake_parse_arguments(T "${options}" "${oneValueArgs}" "${multiValueArgs}" "${ARGN}")
if(NOT T_ENABLED AND T_DISABLED)
return()
endif()
if(NOT T_NAME)
message(FATAL_ERROR "NAME is a required argument for add_fdbclient_test")
endif()
if(NOT T_COMMAND)
message(FATAL_ERROR "COMMAND is a required argument for add_fdbclient_test")
endif()
message(STATUS "Adding Client test ${T_NAME}")
add_test(NAME "${T_NAME}"
COMMAND ${CMAKE_SOURCE_DIR}/tests/TestRunner/tmp_cluster.py
--build-dir ${CMAKE_BINARY_DIR}
--
${T_COMMAND})
endfunction()
function(add_java_test)
set(options DISABLED ENABLED)
set(oneValueArgs NAME CLASS)
set(multiValueArgs CLASS_PATH)
cmake_parse_arguments(T "${options}" "${oneValueArgs}" "${multiValueArgs}" "${ARGN}")
if(NOT T_ENABLED AND T_DISABLED)
return()
endif()
if(NOT T_NAME)
message(FATAL_ERROR "NAME is a required argument for add_fdbclient_test")
endif()
if(NOT T_CLASS)
message(FATAL_ERROR "CLASS is a required argument for add_fdbclient_test")
endif()
set(cp "")
set(separator ":")
if (WIN32)
set(separator ";")
endif()
message(STATUS "CLASSPATH ${T_CLASS_PATH}")
foreach(path ${T_CLASS_PATH})
if(cp)
set(cp "${cp}${separator}${path}")
else()
set(cp "${path}")
endif()
endforeach()
add_fdbclient_test(
NAME ${T_NAME}
COMMAND ${Java_JAVA_EXECUTABLE}
-cp "${cp}"
-Djava.library.path=${CMAKE_BINARY_DIR}/lib
${T_CLASS} "@CLUSTER_FILE@")
endfunction()

View File

@ -7,7 +7,7 @@ SCRIPTID="${$}"
SAVEONERROR="${SAVEONERROR:-1}"
PYTHONDIR="${BINDIR}/tests/python"
testScript="${BINDIR}/tests/bindingtester/run_binding_tester.sh"
VERSION="1.8"
VERSION="1.9"
source ${SCRIPTDIR}/localClusterStart.sh
@ -28,7 +28,7 @@ then
echo "Log dir: ${LOGDIR}"
echo "Python path: ${PYTHONDIR}"
echo "Lib dir: ${LIBDIR}"
echo "Cluster String: ${CLUSTERSTRING}"
echo "Cluster String: ${FDBCLUSTERTEXT}"
echo "Script Id: ${SCRIPTID}"
echo "Version: ${VERSION}"
fi

View File

@ -6,6 +6,7 @@ LOGDIR="${WORKDIR}/log"
ETCDIR="${WORKDIR}/etc"
BINDIR="${BINDIR:-${SCRIPTDIR}}"
FDBPORTSTART="${FDBPORTSTART:-4000}"
FDBPORTTOTAL="${FDBPORTTOTAL:-1000}"
SERVERCHECKS="${SERVERCHECKS:-10}"
CONFIGUREWAIT="${CONFIGUREWAIT:-240}"
FDBCONF="${ETCDIR}/fdb.cluster"
@ -18,382 +19,384 @@ status=0
messagetime=0
messagecount=0
# Define a random ip address and port on localhost
if [ -z ${IPADDRESS} ]; then
let index2="${RANDOM} % 256"
let index3="${RANDOM} % 256"
let index4="(${RANDOM} % 255) + 1"
IPADDRESS="127.${index2}.${index3}.${index4}"
# Do nothing, if cluster string is already defined
if [ -n "${FDBCLUSTERTEXT}" ]
then
:
# Otherwise, define the cluster text
else
# Define a random ip address and port on localhost
if [ -z "${IPADDRESS}" ]; then
let index2="${RANDOM} % 256"
let index3="${RANDOM} % 256"
let index4="(${RANDOM} % 255) + 1"
IPADDRESS="127.${index2}.${index3}.${index4}"
fi
if [ -z "${FDBPORT}" ]; then
let FDBPORT="(${RANDOM} % ${FDBPORTTOTAL}) + ${FDBPORTSTART}"
fi
FDBCLUSTERTEXT="${IPADDRESS}:${FDBPORT}"
fi
if [ -z ${FDBPORT} ]; then
let FDBPORT="(${RANDOM} % 1000) + ${FDBPORTSTART}"
fi
CLUSTERSTRING="${IPADDRESS}:${FDBPORT}"
function log
{
local status=0
if [ "$#" -lt 1 ]
then
echo "Usage: log <message> [echo]"
echo
echo "Logs the message and timestamp to LOGFILE (${LOGFILE}) and, if the"
echo "second argument is either not present or is set to 1, stdout."
let status="${status} + 1"
else
# Log to stdout.
if [ "$#" -lt 2 ] || [ "${2}" -ge 1 ]
then
echo "${1}"
fi
local status=0
if [ "$#" -lt 1 ]
then
echo "Usage: log <message> [echo]"
echo
echo "Logs the message and timestamp to LOGFILE (${LOGFILE}) and, if the"
echo "second argument is either not present or is set to 1, stdout."
let status="${status} + 1"
else
# Log to stdout.
if [ "$#" -lt 2 ] || [ "${2}" -ge 1 ]
then
echo "${1}"
fi
# Log to file.
datestr=$(date +"%Y-%m-%d %H:%M:%S (%s)")
dir=$(dirname "${LOGFILE}")
if ! [ -d "${dir}" ] && ! mkdir -p "${dir}"
then
echo "Could not create directory to log output."
let status="${status} + 1"
elif ! [ -f "${LOGFILE}" ] && ! touch "${LOGFILE}"
then
echo "Could not create file ${LOGFILE} to log output."
let status="${status} + 1"
elif ! echo "[ ${datestr} ] ${1}" >> "${LOGFILE}"
then
echo "Could not log output to ${LOGFILE}."
let status="${status} + 1"
fi
fi
# Log to file.
datestr=$(date +"%Y-%m-%d %H:%M:%S (%s)")
dir=$(dirname "${LOGFILE}")
if ! [ -d "${dir}" ] && ! mkdir -p "${dir}"
then
echo "Could not create directory to log output."
let status="${status} + 1"
elif ! [ -f "${LOGFILE}" ] && ! touch "${LOGFILE}"
then
echo "Could not create file ${LOGFILE} to log output."
let status="${status} + 1"
elif ! echo "[ ${datestr} ] ${1}" >> "${LOGFILE}"
then
echo "Could not log output to ${LOGFILE}."
let status="${status} + 1"
fi
fi
return "${status}"
return "${status}"
}
# Display a message for the user.
function displayMessage
{
local status=0
local status=0
if [ "$#" -lt 1 ]
then
echo "displayMessage <message>"
let status="${status} + 1"
elif ! log "${1}" 0
then
log "Could not write message to file."
else
# Increment the message counter
let messagecount="${messagecount} + 1"
if [ "$#" -lt 1 ]
then
echo "displayMessage <message>"
let status="${status} + 1"
elif ! log "${1}" 0
then
log "Could not write message to file."
else
# Increment the message counter
let messagecount="${messagecount} + 1"
# Display successful message, if previous message
if [ "${messagecount}" -gt 1 ]
then
# Determine the amount of transpired time
let timespent="${SECONDS}-${messagetime}"
# Display successful message, if previous message
if [ "${messagecount}" -gt 1 ]
then
# Determine the amount of transpired time
let timespent="${SECONDS}-${messagetime}"
if [ "${DEBUGLEVEL}" -gt 0 ]; then
printf "... done in %3d seconds\n" "${timespent}"
fi
fi
if [ "${DEBUGLEVEL}" -gt 0 ]; then
printf "... done in %3d seconds\n" "${timespent}"
fi
fi
# Display message
if [ "${DEBUGLEVEL}" -gt 0 ]; then
printf "%-16s %-35s " "$(date "+%F %H-%M-%S")" "$1"
fi
# Display message
if [ "${DEBUGLEVEL}" -gt 0 ]; then
printf "%-16s %-35s " "$(date "+%F %H-%M-%S")" "$1"
fi
# Update the variables
messagetime="${SECONDS}"
fi
# Update the variables
messagetime="${SECONDS}"
fi
return "${status}"
return "${status}"
}
# Create the directories used by the server.
function createDirectories
{
local status=0
local status=0
# Display user message
if ! displayMessage "Creating directories"
then
echo 'Failed to display user message'
let status="${status} + 1"
# Display user message
if ! displayMessage "Creating directories"
then
echo 'Failed to display user message'
let status="${status} + 1"
elif ! mkdir -p "${LOGDIR}" "${ETCDIR}"
then
log "Failed to create directories"
let status="${status} + 1"
elif ! mkdir -p "${LOGDIR}" "${ETCDIR}"
then
log "Failed to create directories"
let status="${status} + 1"
# Display user message
elif ! displayMessage "Setting file permissions"
then
log 'Failed to display user message'
let status="${status} + 1"
# Display user message
elif ! displayMessage "Setting file permissions"
then
log 'Failed to display user message'
let status="${status} + 1"
elif ! chmod 755 "${BINDIR}/fdbserver" "${BINDIR}/fdbcli"
then
log "Failed to set file permissions"
let status="${status} + 1"
elif ! chmod 755 "${BINDIR}/fdbserver" "${BINDIR}/fdbcli"
then
log "Failed to set file permissions"
let status="${status} + 1"
else
while read filepath
do
if [ -f "${filepath}" ] && [ ! -x "${filepath}" ]
then
# if [ "${DEBUGLEVEL}" -gt 1 ]; then
# log " Enable executable: ${filepath}"
# fi
log " Enable executable: ${filepath}" "${DEBUGLEVEL}"
if ! chmod 755 "${filepath}"
then
log "Failed to set executable for file: ${filepath}"
let status="${status} + 1"
fi
fi
done < <(find "${BINDIR}" -iname '*.py' -o -iname '*.rb' -o -iname 'fdb_flow_tester' -o -iname '_stacktester' -o -iname '*.js' -o -iname '*.sh' -o -iname '*.ksh')
fi
else
while read filepath
do
if [ -f "${filepath}" ] && [ ! -x "${filepath}" ]
then
# if [ "${DEBUGLEVEL}" -gt 1 ]; then
# log " Enable executable: ${filepath}"
# fi
log " Enable executable: ${filepath}" "${DEBUGLEVEL}"
if ! chmod 755 "${filepath}"
then
log "Failed to set executable for file: ${filepath}"
let status="${status} + 1"
fi
fi
done < <(find "${BINDIR}" -iname '*.py' -o -iname '*.rb' -o -iname 'fdb_flow_tester' -o -iname '_stacktester' -o -iname '*.js' -o -iname '*.sh' -o -iname '*.ksh')
fi
return ${status}
return ${status}
}
# Create a cluster file for the local cluster.
function createClusterFile
{
local status=0
local status=0
if [ "${status}" -ne 0 ]; then
:
# Display user message
elif ! displayMessage "Creating Fdb Cluster file"
then
log 'Failed to display user message'
let status="${status} + 1"
else
description=$(LC_CTYPE=C tr -dc A-Za-z0-9 < /dev/urandom 2> /dev/null | head -c 8)
random_str=$(LC_CTYPE=C tr -dc A-Za-z0-9 < /dev/urandom 2> /dev/null | head -c 8)
echo "${description}:${random_str}@${CLUSTERSTRING}" > "${FDBCONF}"
fi
if [ "${status}" -ne 0 ]; then
:
# Display user message
elif ! displayMessage "Creating Fdb Cluster file"
then
log 'Failed to display user message'
let status="${status} + 1"
else
description=$(LC_CTYPE=C tr -dc A-Za-z0-9 < /dev/urandom 2> /dev/null | head -c 8)
random_str=$(LC_CTYPE=C tr -dc A-Za-z0-9 < /dev/urandom 2> /dev/null | head -c 8)
echo "${description}:${random_str}@${FDBCLUSTERTEXT}" > "${FDBCONF}"
fi
if [ "${status}" -ne 0 ]; then
:
elif ! chmod 0664 "${FDBCONF}"; then
log "Failed to set permissions on fdbconf: ${FDBCONF}"
let status="${status} + 1"
fi
if [ "${status}" -ne 0 ]; then
:
elif ! chmod 0664 "${FDBCONF}"; then
log "Failed to set permissions on fdbconf: ${FDBCONF}"
let status="${status} + 1"
fi
return ${status}
return ${status}
}
# Stop the Cluster from running.
function stopCluster
{
local status=0
local status=0
# Add an audit entry, if enabled
if [ "${AUDITCLUSTER}" -gt 0 ]; then
printf '%-15s (%6s) Stopping cluster %-20s (%6s): %s\n' "$(date +'%Y-%m-%d %H:%M:%S')" "${$}" "${CLUSTERSTRING}" "${FDBSERVERID}" >> "${AUDITLOG}"
fi
if [ -z "${FDBSERVERID}" ]; then
log 'FDB Server process is not defined'
let status="${status} + 1"
elif ! kill -0 "${FDBSERVERID}"; then
log "Failed to locate FDB Server process (${FDBSERVERID})"
let status="${status} + 1"
elif "${BINDIR}/fdbcli" -C "${FDBCONF}" --exec "kill; kill ${CLUSTERSTRING}; sleep 3" --timeout 120 &>> "${LOGDIR}/fdbcli-kill.log"
then
# Ensure that process is dead
if ! kill -0 "${FDBSERVERID}" 2> /dev/null; then
log "Killed cluster (${FDBSERVERID}) via cli"
elif ! kill -9 "${FDBSERVERID}"; then
log "Failed to kill FDB Server process (${FDBSERVERID}) via cli or kill command"
let status="${status} + 1"
else
log "Forcibly killed FDB Server process (${FDBSERVERID}) since cli failed"
fi
elif ! kill -9 "${FDBSERVERID}"; then
log "Failed to forcibly kill FDB Server process (${FDBSERVERID})"
let status="${status} + 1"
else
log "Forcibly killed FDB Server process (${FDBSERVERID})"
fi
return "${status}"
# Add an audit entry, if enabled
if [ "${AUDITCLUSTER}" -gt 0 ]; then
printf '%-15s (%6s) Stopping cluster %-20s (%6s): %s\n' "$(date +'%Y-%m-%d %H:%M:%S')" "${$}" "${FDBCLUSTERTEXT}" "${FDBSERVERID}" >> "${AUDITLOG}"
fi
if [ -z "${FDBSERVERID}" ]; then
log 'FDB Server process is not defined'
let status="${status} + 1"
elif ! kill -0 "${FDBSERVERID}"; then
log "Failed to locate FDB Server process (${FDBSERVERID})"
let status="${status} + 1"
elif "${BINDIR}/fdbcli" -C "${FDBCONF}" --exec "kill; kill ${FDBCLUSTERTEXT}; sleep 3" --timeout 120 &>> "${LOGDIR}/fdbcli-kill.log"
then
# Ensure that process is dead
if ! kill -0 "${FDBSERVERID}" 2> /dev/null; then
log "Killed cluster (${FDBSERVERID}) via cli"
elif ! kill -9 "${FDBSERVERID}"; then
log "Failed to kill FDB Server process (${FDBSERVERID}) via cli or kill command"
let status="${status} + 1"
else
log "Forcibly killed FDB Server process (${FDBSERVERID}) since cli failed"
fi
elif ! kill -9 "${FDBSERVERID}"; then
log "Failed to forcibly kill FDB Server process (${FDBSERVERID})"
let status="${status} + 1"
else
log "Forcibly killed FDB Server process (${FDBSERVERID})"
fi
return "${status}"
}
# Start the server running.
function startFdbServer
{
local status=0
local status=0
# Add an audit entry, if enabled
if [ "${AUDITCLUSTER}" -gt 0 ]; then
printf '%-15s (%6s) Starting cluster %-20s\n' "$(date +'%Y-%m-%d %H:%M:%S')" "${$}" "${CLUSTERSTRING}" >> "${AUDITLOG}"
fi
# Add an audit entry, if enabled
if [ "${AUDITCLUSTER}" -gt 0 ]; then
printf '%-15s (%6s) Starting cluster %-20s\n' "$(date +'%Y-%m-%d %H:%M:%S')" "${$}" "${FDBCLUSTERTEXT}" >> "${AUDITLOG}"
fi
if [ "${status}" -ne 0 ]; then
:
elif ! displayMessage "Starting Fdb Server"
then
log 'Failed to display user message'
let status="${status} + 1"
if ! displayMessage "Starting Fdb Server"
then
log 'Failed to display user message'
let status="${status} + 1"
else
"${BINDIR}/fdbserver" --knob_disable_posix_kernel_aio=1 -C "${FDBCONF}" -p "${CLUSTERSTRING}" -L "${LOGDIR}" -d "${WORKDIR}/fdb/${$}" &> "${LOGDIR}/fdbserver.log" &
fdbpid=$!
fdbrc=$?
if [ $fdbrc -ne 0 ]
then
log "Failed to start FDB Server"
let status="${status} + 1"
else
FDBSERVERID="${fdbpid}"
fi
fi
else
"${BINDIR}/fdbserver" --knob_disable_posix_kernel_aio=1 -C "${FDBCONF}" -p "${FDBCLUSTERTEXT}" -L "${LOGDIR}" -d "${WORKDIR}/fdb/${$}" &> "${LOGDIR}/fdbserver.log" &
if [ "${?}" -ne 0 ]
then
log "Failed to start FDB Server"
let status="${status} + 1"
else
FDBSERVERID="${!}"
fi
fi
if [ -z "${FDBSERVERID}" ]; then
log "FDB Server start failed because no process"
let status="${status} + 1"
elif ! kill -0 "${FDBSERVERID}" ; then
log "FDB Server start failed because process terminated unexpectedly"
let status="${status} + 1"
fi
if [ -z "${FDBSERVERID}" ]; then
log "FDB Server start failed because no process"
let status="${status} + 1"
elif ! kill -0 "${FDBSERVERID}" ; then
log "FDB Server start failed because process terminated unexpectedly"
let status="${status} + 1"
fi
return ${status}
return ${status}
}
function getStatus
{
local status=0
local status=0
if [ "${status}" -ne 0 ]; then
:
elif ! date &>> "${LOGDIR}/fdbclient.log"
then
log 'Failed to get date'
let status="${status} + 1"
elif ! "${BINDIR}/fdbcli" -C "${FDBCONF}" --exec 'status json' --timeout 120 &>> "${LOGDIR}/fdbclient.log"
then
log 'Failed to get status from fdbcli'
let status="${status} + 1"
elif ! date &>> "${LOGDIR}/fdbclient.log"
then
log 'Failed to get date'
let status="${status} + 1"
fi
if [ "${status}" -ne 0 ]; then
:
elif ! date &>> "${LOGDIR}/fdbclient.log"
then
log 'Failed to get date'
let status="${status} + 1"
elif ! "${BINDIR}/fdbcli" -C "${FDBCONF}" --exec 'status json' --timeout 120 &>> "${LOGDIR}/fdbclient.log"
then
log 'Failed to get status from fdbcli'
let status="${status} + 1"
elif ! date &>> "${LOGDIR}/fdbclient.log"
then
log 'Failed to get date'
let status="${status} + 1"
fi
return ${status}
return ${status}
}
# Verify that the cluster is available.
function verifyAvailable
{
local status=0
local status=0
if [ -z "${FDBSERVERID}" ]; then
log "FDB Server process is not defined."
let status="${status} + 1"
# Verify that the server is running.
elif ! kill -0 "${FDBSERVERID}"
then
log "FDB server process (${FDBSERVERID}) is not running"
let status="${status} + 1"
# Display user message.
elif ! displayMessage "Checking cluster availability"
then
log 'Failed to display user message'
let status="${status} + 1"
# Determine if status json says the database is available.
else
avail=`"${BINDIR}/fdbcli" -C "${FDBCONF}" --exec 'status json' --timeout "${SERVERCHECKS}" 2> /dev/null | grep -E '"database_available"|"available"' | grep 'true'`
log "Avail value: ${avail}" "${DEBUGLEVEL}"
if [[ -n "${avail}" ]] ; then
:
else
let status="${status} + 1"
fi
fi
return "${status}"
if [ -z "${FDBSERVERID}" ]; then
log "FDB Server process is not defined."
let status="${status} + 1"
# Verify that the server is running.
elif ! kill -0 "${FDBSERVERID}"
then
log "FDB server process (${FDBSERVERID}) is not running"
let status="${status} + 1"
# Display user message.
elif ! displayMessage "Checking cluster availability"
then
log 'Failed to display user message'
let status="${status} + 1"
# Determine if status json says the database is available.
else
avail=`"${BINDIR}/fdbcli" -C "${FDBCONF}" --exec 'status json' --timeout "${SERVERCHECKS}" 2> /dev/null | grep -E '"database_available"|"available"' | grep 'true'`
log "Avail value: ${avail}" "${DEBUGLEVEL}"
if [[ -n "${avail}" ]] ; then
:
else
let status="${status} + 1"
fi
fi
return "${status}"
}
# Configure the database on the server.
function createDatabase
{
local status=0
local status=0
if [ "${status}" -ne 0 ]; then
:
# Ensure that the server is running
elif ! kill -0 "${FDBSERVERID}"
then
log "FDB server process: (${FDBSERVERID}) is not running"
let status="${status} + 1"
if [ "${status}" -ne 0 ]; then
:
# Ensure that the server is running
elif ! kill -0 "${FDBSERVERID}"
then
log "FDB server process: (${FDBSERVERID}) is not running"
let status="${status} + 1"
# Display user message
elif ! displayMessage "Creating database"
then
log 'Failed to display user message'
let status="${status} + 1"
elif ! echo "Client log:" &> "${LOGDIR}/fdbclient.log"
then
log 'Failed to create fdbclient.log'
let status="${status} + 1"
elif ! getStatus
then
log 'Failed to get status'
let status="${status} + 1"
# Display user message
elif ! displayMessage "Creating database"
then
log 'Failed to display user message'
let status="${status} + 1"
elif ! echo "Client log:" &> "${LOGDIR}/fdbclient.log"
then
log 'Failed to create fdbclient.log'
let status="${status} + 1"
elif ! getStatus
then
log 'Failed to get status'
let status="${status} + 1"
# Configure the database.
else
"${BINDIR}/fdbcli" -C "${FDBCONF}" --exec 'configure new single memory; status' --timeout "${CONFIGUREWAIT}" --log --log-dir "${LOGDIR}" &>> "${LOGDIR}/fdbclient.log"
# Configure the database.
else
"${BINDIR}/fdbcli" -C "${FDBCONF}" --exec 'configure new single memory; status' --timeout "${CONFIGUREWAIT}" --log --log-dir "${LOGDIR}" &>> "${LOGDIR}/fdbclient.log"
if ! displayMessage "Checking if config succeeded"
then
log 'Failed to display user message.'
fi
if ! displayMessage "Checking if config succeeded"
then
log 'Failed to display user message.'
fi
iteration=0
while [[ "${iteration}" -lt "${SERVERCHECKS}" ]] && ! verifyAvailable
do
log "Database not created (iteration ${iteration})."
let iteration="${iteration} + 1"
done
iteration=0
while [[ "${iteration}" -lt "${SERVERCHECKS}" ]] && ! verifyAvailable
do
log "Database not created (iteration ${iteration})."
let iteration="${iteration} + 1"
done
if ! verifyAvailable
then
log "Failed to create database via cli"
getStatus
cat "${LOGDIR}/fdbclient.log"
log "Ignoring -- moving on"
#let status="${status} + 1"
fi
fi
if ! verifyAvailable
then
log "Failed to create database via cli"
getStatus
cat "${LOGDIR}/fdbclient.log"
log "Ignoring -- moving on"
#let status="${status} + 1"
fi
fi
return ${status}
return ${status}
}
# Begin the local cluster from scratch.
function startCluster
{
local status=0
local status=0
if [ "${status}" -ne 0 ]; then
:
elif ! createDirectories
then
log "Could not create directories."
let status="${status} + 1"
elif ! createClusterFile
then
log "Could not create cluster file."
let status="${status} + 1"
elif ! startFdbServer
then
log "Could not start FDB server."
let status="${status} + 1"
elif ! createDatabase
then
log "Could not create database."
let status="${status} + 1"
fi
if [ "${status}" -ne 0 ]; then
:
elif ! createDirectories
then
log "Could not create directories."
let status="${status} + 1"
elif ! createClusterFile
then
log "Could not create cluster file."
let status="${status} + 1"
elif ! startFdbServer
then
log "Could not start FDB server."
let status="${status} + 1"
elif ! createDatabase
then
log "Could not create database."
let status="${status} + 1"
fi
return ${status}
return ${status}
}

View File

@ -263,9 +263,9 @@ See :ref:`developer-guide-programming-with-futures` for further (language-indepe
.. function:: fdb_error_t fdb_future_block_until_ready(FDBFuture* future)
Blocks the calling thread until the given Future is ready. It will return success even if the Future is set to an error -- you must call :func:`fdb_future_get_error()` to determine that. :func:`fdb_future_block_until_ready()` will return an error only in exceptional conditions (e.g. out of memory or other operating system resources).
Blocks the calling thread until the given Future is ready. It will return success even if the Future is set to an error -- you must call :func:`fdb_future_get_error()` to determine that. :func:`fdb_future_block_until_ready()` will return an error only in exceptional conditions (e.g. deadlock detected, out of memory or other operating system resources).
.. warning:: Never call this function from a callback passed to :func:`fdb_future_set_callback()`. This may block the thread on which :func:`fdb_run_network()` was invoked, resulting in a deadlock.
.. warning:: Never call this function from a callback passed to :func:`fdb_future_set_callback()`. This may block the thread on which :func:`fdb_run_network()` was invoked, resulting in a deadlock. In some cases the client can detect the deadlock and throw a ``blocked_from_network_thread`` error.
.. function:: fdb_bool_t fdb_future_is_ready(FDBFuture* future)

View File

@ -114,8 +114,12 @@ FoundationDB may return the following error codes from API functions. If you nee
+-----------------------------------------------+-----+--------------------------------------------------------------------------------+
| transaction_read_only | 2023| Attempted to commit a transaction specified as read-only |
+-----------------------------------------------+-----+--------------------------------------------------------------------------------+
| invalid_cache_eviction_policy | 2024| Invalid cache eviction policy, only random and lru are supported |
+-----------------------------------------------+-----+--------------------------------------------------------------------------------+
| network_cannot_be_restarted | 2025| Network can only be started once |
+-----------------------------------------------+-----+--------------------------------------------------------------------------------+
| blocked_from_network_thread | 2026| Detected a deadlock in a callback called from the network thread |
+-----------------------------------------------+-----+--------------------------------------------------------------------------------+
| incompatible_protocol_version | 2100| Incompatible protocol version |
+-----------------------------------------------+-----+--------------------------------------------------------------------------------+
| transaction_too_large | 2101| Transaction exceeds byte limit |

View File

@ -10,38 +10,38 @@ macOS
The macOS installation package is supported on macOS 10.7+. It includes the client and (optionally) the server.
* `FoundationDB-6.3.5.pkg <https://www.foundationdb.org/downloads/6.3.5/macOS/installers/FoundationDB-6.3.5.pkg>`_
* `FoundationDB-6.3.8.pkg <https://www.foundationdb.org/downloads/6.3.8/macOS/installers/FoundationDB-6.3.8.pkg>`_
Ubuntu
------
The Ubuntu packages are supported on 64-bit Ubuntu 12.04+, but beware of the Linux kernel bug in Ubuntu 12.x.
* `foundationdb-clients-6.3.5-1_amd64.deb <https://www.foundationdb.org/downloads/6.3.5/ubuntu/installers/foundationdb-clients_6.3.5-1_amd64.deb>`_
* `foundationdb-server-6.3.5-1_amd64.deb <https://www.foundationdb.org/downloads/6.3.5/ubuntu/installers/foundationdb-server_6.3.5-1_amd64.deb>`_ (depends on the clients package)
* `foundationdb-clients-6.3.8-1_amd64.deb <https://www.foundationdb.org/downloads/6.3.8/ubuntu/installers/foundationdb-clients_6.3.8-1_amd64.deb>`_
* `foundationdb-server-6.3.8-1_amd64.deb <https://www.foundationdb.org/downloads/6.3.8/ubuntu/installers/foundationdb-server_6.3.8-1_amd64.deb>`_ (depends on the clients package)
RHEL/CentOS EL6
---------------
The RHEL/CentOS EL6 packages are supported on 64-bit RHEL/CentOS 6.x.
* `foundationdb-clients-6.3.5-1.el6.x86_64.rpm <https://www.foundationdb.org/downloads/6.3.5/rhel6/installers/foundationdb-clients-6.3.5-1.el6.x86_64.rpm>`_
* `foundationdb-server-6.3.5-1.el6.x86_64.rpm <https://www.foundationdb.org/downloads/6.3.5/rhel6/installers/foundationdb-server-6.3.5-1.el6.x86_64.rpm>`_ (depends on the clients package)
* `foundationdb-clients-6.3.8-1.el6.x86_64.rpm <https://www.foundationdb.org/downloads/6.3.8/rhel6/installers/foundationdb-clients-6.3.8-1.el6.x86_64.rpm>`_
* `foundationdb-server-6.3.8-1.el6.x86_64.rpm <https://www.foundationdb.org/downloads/6.3.8/rhel6/installers/foundationdb-server-6.3.8-1.el6.x86_64.rpm>`_ (depends on the clients package)
RHEL/CentOS EL7
---------------
The RHEL/CentOS EL7 packages are supported on 64-bit RHEL/CentOS 7.x.
* `foundationdb-clients-6.3.5-1.el7.x86_64.rpm <https://www.foundationdb.org/downloads/6.3.5/rhel7/installers/foundationdb-clients-6.3.5-1.el7.x86_64.rpm>`_
* `foundationdb-server-6.3.5-1.el7.x86_64.rpm <https://www.foundationdb.org/downloads/6.3.5/rhel7/installers/foundationdb-server-6.3.5-1.el7.x86_64.rpm>`_ (depends on the clients package)
* `foundationdb-clients-6.3.8-1.el7.x86_64.rpm <https://www.foundationdb.org/downloads/6.3.8/rhel7/installers/foundationdb-clients-6.3.8-1.el7.x86_64.rpm>`_
* `foundationdb-server-6.3.8-1.el7.x86_64.rpm <https://www.foundationdb.org/downloads/6.3.8/rhel7/installers/foundationdb-server-6.3.8-1.el7.x86_64.rpm>`_ (depends on the clients package)
Windows
-------
The Windows installer is supported on 64-bit Windows XP and later. It includes the client and (optionally) the server.
* `foundationdb-6.3.5-x64.msi <https://www.foundationdb.org/downloads/6.3.5/windows/installers/foundationdb-6.3.5-x64.msi>`_
* `foundationdb-6.3.8-x64.msi <https://www.foundationdb.org/downloads/6.3.8/windows/installers/foundationdb-6.3.8-x64.msi>`_
API Language Bindings
=====================
@ -58,18 +58,18 @@ On macOS and Windows, the FoundationDB Python API bindings are installed as part
If you need to use the FoundationDB Python API from other Python installations or paths, use the Python package manager ``pip`` (``pip install foundationdb``) or download the Python package:
* `foundationdb-6.3.5.tar.gz <https://www.foundationdb.org/downloads/6.3.5/bindings/python/foundationdb-6.3.5.tar.gz>`_
* `foundationdb-6.3.8.tar.gz <https://www.foundationdb.org/downloads/6.3.8/bindings/python/foundationdb-6.3.8.tar.gz>`_
Ruby 1.9.3/2.0.0+
-----------------
* `fdb-6.3.5.gem <https://www.foundationdb.org/downloads/6.3.5/bindings/ruby/fdb-6.3.5.gem>`_
* `fdb-6.3.8.gem <https://www.foundationdb.org/downloads/6.3.8/bindings/ruby/fdb-6.3.8.gem>`_
Java 8+
-------
* `fdb-java-6.3.5.jar <https://www.foundationdb.org/downloads/6.3.5/bindings/java/fdb-java-6.3.5.jar>`_
* `fdb-java-6.3.5-javadoc.jar <https://www.foundationdb.org/downloads/6.3.5/bindings/java/fdb-java-6.3.5-javadoc.jar>`_
* `fdb-java-6.3.8.jar <https://www.foundationdb.org/downloads/6.3.8/bindings/java/fdb-java-6.3.8.jar>`_
* `fdb-java-6.3.8-javadoc.jar <https://www.foundationdb.org/downloads/6.3.8/bindings/java/fdb-java-6.3.8-javadoc.jar>`_
Go 1.11+
--------

View File

@ -2,6 +2,10 @@
Release Notes
#############
6.2.26
======
* Attempt to detect when calling :func:`fdb_future_block_until_ready` would cause a deadlock, and throw ``blocked_from_network_thread`` if it would definitely cause a deadlock.
6.2.25
======

View File

@ -2,11 +2,9 @@
Release Notes
#############
6.3.5
6.3.8
=====
* Report missing old tlogs information when in recovery before storage servers are fully recovered. `(PR #3706) <https://github.com/apple/foundationdb/pull/3706>`_
Features
--------
@ -110,6 +108,10 @@ Other Changes
* Updated boost to 1.72. `(PR #2684) <https://github.com/apple/foundationdb/pull/2684>`_
* Calling ``fdb_run_network`` multiple times in a single run of a client program now returns an error instead of causing undefined behavior. [6.3.1] `(PR #3229) <https://github.com/apple/foundationdb/pull/3229>`_
* Blob backup URL parameter ``request_timeout`` changed to ``request_timeout_min``, with prior name still supported. `(PR #3533) <https://github.com/apple/foundationdb/pull/3533>`_
* Support query command in backup CLI that allows users to query restorable files by key ranges. [6.3.6] `(PR #3703) <https://github.com/apple/foundationdb/pull/3703>`_
* Report missing old tlogs information when in recovery before storage servers are fully recovered. [6.3.6] `(PR #3706) <https://github.com/apple/foundationdb/pull/3706>`_
* Updated OpenSSL to version 1.1.1h. [6.3.7] `(PR #3809) <https://github.com/apple/foundationdb/pull/3809>`_
* Lowered the amount of time a watch will remain registered on a storage server from 900 seconds to 30 seconds. [6.3.8] `(PR #3833) <https://github.com/apple/foundationdb/pull/3833>`_
Fixes from previous versions
----------------------------
@ -126,6 +128,8 @@ Fixes only impacting 6.3.0+
* Refreshing TLS certificates could cause crashes. [6.3.2] `(PR #3352) <https://github.com/apple/foundationdb/pull/3352>`_
* All storage class processes attempted to connect to the same coordinator. [6.3.2] `(PR #3361) <https://github.com/apple/foundationdb/pull/3361>`_
* Adjusted the proxy load balancing algorithm to be based on the CPU usage of the process instead of the number of requests processed. [6.3.5] `(PR #3653) <https://github.com/apple/foundationdb/pull/3653>`_
* Only return the error code ``batch_transaction_throttled`` for API versions greater than or equal to 630. [6.3.6] `(PR #3799) <https://github.com/apple/foundationdb/pull/3799>`_
* The fault tolerance calculation in status did not take into account region configurations. [6.3.8] `(PR #3836) <https://github.com/apple/foundationdb/pull/3836>`_
Earlier release notes
---------------------

View File

@ -18,6 +18,10 @@
* limitations under the License.
*/
#include "fdbclient/JsonBuilder.h"
#include "flow/Arena.h"
#include "flow/Error.h"
#include "flow/Trace.h"
#define BOOST_DATE_TIME_NO_LIB
#include <boost/interprocess/managed_shared_memory.hpp>
@ -81,7 +85,22 @@ enum enumProgramExe {
};
enum enumBackupType {
BACKUP_UNDEFINED=0, BACKUP_START, BACKUP_MODIFY, BACKUP_STATUS, BACKUP_ABORT, BACKUP_WAIT, BACKUP_DISCONTINUE, BACKUP_PAUSE, BACKUP_RESUME, BACKUP_EXPIRE, BACKUP_DELETE, BACKUP_DESCRIBE, BACKUP_LIST, BACKUP_DUMP, BACKUP_CLEANUP
BACKUP_UNDEFINED = 0,
BACKUP_START,
BACKUP_MODIFY,
BACKUP_STATUS,
BACKUP_ABORT,
BACKUP_WAIT,
BACKUP_DISCONTINUE,
BACKUP_PAUSE,
BACKUP_RESUME,
BACKUP_EXPIRE,
BACKUP_DELETE,
BACKUP_DESCRIBE,
BACKUP_LIST,
BACKUP_QUERY,
BACKUP_DUMP,
BACKUP_CLEANUP
};
enum enumDBType {
@ -121,6 +140,7 @@ enum {
OPT_TAGNAME,
OPT_BACKUPKEYS,
OPT_WAITFORDONE,
OPT_BACKUPKEYS_FILTER,
OPT_INCREMENTALONLY,
// Backup Modify
@ -624,6 +644,40 @@ CSimpleOpt::SOption g_rgBackupListOptions[] = {
SO_END_OF_OPTIONS
};
CSimpleOpt::SOption g_rgBackupQueryOptions[] = {
#ifdef _WIN32
{ OPT_PARENTPID, "--parentpid", SO_REQ_SEP },
#endif
{ OPT_RESTORE_TIMESTAMP, "--query_restore_timestamp", SO_REQ_SEP },
{ OPT_DESTCONTAINER, "-d", SO_REQ_SEP },
{ OPT_DESTCONTAINER, "--destcontainer", SO_REQ_SEP },
{ OPT_RESTORE_VERSION, "-qrv", SO_REQ_SEP },
{ OPT_RESTORE_VERSION, "--query_restore_version", SO_REQ_SEP },
{ OPT_BACKUPKEYS_FILTER, "-k", SO_REQ_SEP },
{ OPT_BACKUPKEYS_FILTER, "--keys", SO_REQ_SEP },
{ OPT_TRACE, "--log", SO_NONE },
{ OPT_TRACE_DIR, "--logdir", SO_REQ_SEP },
{ OPT_TRACE_FORMAT, "--trace_format", SO_REQ_SEP },
{ OPT_TRACE_LOG_GROUP, "--loggroup", SO_REQ_SEP },
{ OPT_QUIET, "-q", SO_NONE },
{ OPT_QUIET, "--quiet", SO_NONE },
{ OPT_VERSION, "-v", SO_NONE },
{ OPT_VERSION, "--version", SO_NONE },
{ OPT_CRASHONERROR, "--crash", SO_NONE },
{ OPT_MEMLIMIT, "-m", SO_REQ_SEP },
{ OPT_MEMLIMIT, "--memory", SO_REQ_SEP },
{ OPT_HELP, "-?", SO_NONE },
{ OPT_HELP, "-h", SO_NONE },
{ OPT_HELP, "--help", SO_NONE },
{ OPT_DEVHELP, "--dev-help", SO_NONE },
{ OPT_BLOB_CREDENTIALS, "--blob_credentials", SO_REQ_SEP },
{ OPT_KNOB, "--knob_", SO_REQ_SEP },
#ifndef TLS_DISABLED
TLS_OPTION_FLAGS
#endif
SO_END_OF_OPTIONS
};
// g_rgRestoreOptions is used by fdbrestore and fastrestore_tool
CSimpleOpt::SOption g_rgRestoreOptions[] = {
#ifdef _WIN32
@ -959,13 +1013,16 @@ void printBackupContainerInfo() {
static void printBackupUsage(bool devhelp) {
printf("FoundationDB " FDB_VT_PACKAGE_NAME " (v" FDB_VT_VERSION ")\n");
printf("Usage: %s (start | status | abort | wait | discontinue | pause | resume | expire | delete | describe | list | cleanup) [OPTIONS]\n\n", exeBackup.toString().c_str());
printf("Usage: %s (start | status | abort | wait | discontinue | pause | resume | expire | delete | describe | "
"list | query | cleanup) [OPTIONS]\n\n",
exeBackup.toString().c_str());
printf(" -C CONNFILE The path of a file containing the connection string for the\n"
" FoundationDB cluster. The default is first the value of the\n"
" FDB_CLUSTER_FILE environment variable, then `./fdb.cluster',\n"
" then `%s'.\n", platform::getDefaultClusterFilePath().c_str());
printf(" -d, --destcontainer URL\n"
" The Backup container URL for start, modify, describe, expire, and delete operations.\n");
" The Backup container URL for start, modify, describe, query, expire, and delete "
"operations.\n");
printBackupContainerInfo();
printf(" -b, --base_url BASEURL\n"
" Base backup URL for list operations. This looks like a Backup URL but without a backup name.\n");
@ -979,6 +1036,12 @@ static void printBackupUsage(bool devhelp) {
printf(" --delete_before_days NUM_DAYS\n"
" Another way to specify version cutoff for expire operations. Deletes data files containing no data at or after a\n"
" version approximately NUM_DAYS days worth of versions prior to the latest log version in the backup.\n");
printf(" -qrv --query_restore_version VERSION\n"
" For query operations, set target version for restoring a backup. Set -1 for maximum\n"
" restorable version (default) and -2 for minimum restorable version.\n");
printf(" --query_restore_timestamp DATETIME\n"
" For query operations, instead of a numeric version, use this to specify a timestamp in %s\n", BackupAgentBase::timeFormat().c_str());
printf(" and it will be converted to a version from that time using metadata in the cluster file.\n");
printf(" --restorable_after_timestamp DATETIME\n"
" For expire operations, set minimum acceptable restorability to the version equivalent of DATETIME and later.\n");
printf(" --restorable_after_version VERSION\n"
@ -997,8 +1060,8 @@ static void printBackupUsage(bool devhelp) {
" Specifies a UID to verify against the BackupUID of the running backup. If provided, the UID is verified in the same transaction\n"
" which sets the new backup parameters (if the UID matches).\n");
printf(" -e ERRORLIMIT The maximum number of errors printed by status (default is 10).\n");
printf(" -k KEYS List of key ranges to backup.\n"
" If not specified, the entire database will be backed up.\n");
printf(" -k KEYS List of key ranges to backup or to filter the backup in query operations.\n"
" If not specified, the entire database will be backed up or no filter will be applied.\n");
printf(" --partitioned_log_experimental Starts with new type of backup system using partitioned logs.\n");
printf(" -n, --dryrun For backup start or restore start, performs a trial run with no actual changes made.\n");
printf(" --log Enables trace file logging for the CLI session.\n"
@ -1320,6 +1383,7 @@ enumBackupType getBackupType(std::string backupType)
values["delete"] = BACKUP_DELETE;
values["describe"] = BACKUP_DESCRIBE;
values["list"] = BACKUP_LIST;
values["query"] = BACKUP_QUERY;
values["dump"] = BACKUP_DUMP;
values["modify"] = BACKUP_MODIFY;
}
@ -2458,6 +2522,135 @@ ACTOR Future<Void> describeBackup(const char *name, std::string destinationConta
return Void();
}
static void reportBackupQueryError(UID operationId, JsonBuilderObject& result, std::string errorMessage) {
result["error"] = errorMessage;
printf("%s\n", result.getJson().c_str());
TraceEvent("BackupQueryFailure").detail("OperationId", operationId).detail("Reason", errorMessage);
}
// If restoreVersion is invalidVersion or latestVersion, use the maximum or minimum restorable version respectively for
// selected key ranges. If restoreTimestamp is specified, any specified restoreVersion will be overriden to the version
// resolved to that timestamp.
ACTOR Future<Void> queryBackup(const char* name, std::string destinationContainer,
Standalone<VectorRef<KeyRangeRef>> keyRangesFilter, Version restoreVersion,
std::string originalClusterFile, std::string restoreTimestamp, bool verbose) {
state UID operationId = deterministicRandom()->randomUniqueID();
state JsonBuilderObject result;
state std::string errorMessage;
result["key_ranges_filter"] = printable(keyRangesFilter);
result["destination_container"] = destinationContainer;
TraceEvent("BackupQueryStart")
.detail("OperationId", operationId)
.detail("DestinationContainer", destinationContainer)
.detail("KeyRangesFilter", printable(keyRangesFilter))
.detail("SpecifiedRestoreVersion", restoreVersion)
.detail("RestoreTimestamp", restoreTimestamp)
.detail("BackupClusterFile", originalClusterFile);
// Resolve restoreTimestamp if given
if (!restoreTimestamp.empty()) {
if (originalClusterFile.empty()) {
reportBackupQueryError(
operationId, result,
format("an original cluster file must be given in order to resolve restore target timestamp '%s'",
restoreTimestamp.c_str()));
return Void();
}
if (!fileExists(originalClusterFile)) {
reportBackupQueryError(operationId, result,
format("The specified original source database cluster file '%s' does not exist\n",
originalClusterFile.c_str()));
return Void();
}
Database origDb = Database::createDatabase(originalClusterFile, Database::API_VERSION_LATEST);
Version v = wait(timeKeeperVersionFromDatetime(restoreTimestamp, origDb));
result["restore_timestamp"] = restoreTimestamp;
result["restore_timestamp_resolved_version"] = v;
restoreVersion = v;
}
try {
state Reference<IBackupContainer> bc = openBackupContainer(name, destinationContainer);
if (restoreVersion == invalidVersion) {
BackupDescription desc = wait(bc->describeBackup());
if (desc.maxRestorableVersion.present()) {
restoreVersion = desc.maxRestorableVersion.get();
// Use continuous log end version for the maximum restorable version for the key ranges.
} else if (keyRangesFilter.size() && desc.contiguousLogEnd.present()) {
restoreVersion = desc.contiguousLogEnd.get();
} else {
reportBackupQueryError(
operationId, result,
errorMessage = format("the backup for the specified key ranges is not restorable to any version"));
}
}
if (restoreVersion < 0 && restoreVersion != latestVersion) {
reportBackupQueryError(operationId, result,
errorMessage =
format("the specified restorable version %ld is not valid", restoreVersion));
return Void();
}
Optional<RestorableFileSet> fileSet = wait(bc->getRestoreSet(restoreVersion, keyRangesFilter));
if (fileSet.present()) {
int64_t totalRangeFilesSize = 0, totalLogFilesSize = 0;
result["restore_version"] = fileSet.get().targetVersion;
JsonBuilderArray rangeFilesJson;
JsonBuilderArray logFilesJson;
for (const auto& rangeFile : fileSet.get().ranges) {
JsonBuilderObject object;
object["file_name"] = rangeFile.fileName;
object["file_size"] = rangeFile.fileSize;
object["version"] = rangeFile.version;
object["key_range"] = fileSet.get().keyRanges.count(rangeFile.fileName) == 0
? "none"
: fileSet.get().keyRanges.at(rangeFile.fileName).toString();
rangeFilesJson.push_back(object);
totalRangeFilesSize += rangeFile.fileSize;
}
for (const auto& log : fileSet.get().logs) {
JsonBuilderObject object;
object["file_name"] = log.fileName;
object["file_size"] = log.fileSize;
object["begin_version"] = log.beginVersion;
object["end_version"] = log.endVersion;
logFilesJson.push_back(object);
totalLogFilesSize += log.fileSize;
}
result["total_range_files_size"] = totalRangeFilesSize;
result["total_log_files_size"] = totalLogFilesSize;
if (verbose) {
result["ranges"] = rangeFilesJson;
result["logs"] = logFilesJson;
}
TraceEvent("BackupQueryReceivedRestorableFilesSet")
.detail("DestinationContainer", destinationContainer)
.detail("KeyRangesFilter", printable(keyRangesFilter))
.detail("ActualRestoreVersion", fileSet.get().targetVersion)
.detail("NumRangeFiles", fileSet.get().ranges.size())
.detail("NumLogFiles", fileSet.get().logs.size())
.detail("RangeFilesBytes", totalRangeFilesSize)
.detail("LogFilesBytes", totalLogFilesSize);
} else {
reportBackupQueryError(operationId, result, "no restorable files set found for specified key ranges");
return Void();
}
} catch (Error& e) {
reportBackupQueryError(operationId, result, e.what());
return Void();
}
printf("%s\n", result.getJson().c_str());
return Void();
}
ACTOR Future<Void> listBackup(std::string baseUrl) {
try {
std::vector<std::string> containers = wait(IBackupContainer::listContainers(baseUrl));
@ -2827,6 +3020,9 @@ int main(int argc, char* argv[]) {
case BACKUP_LIST:
args = new CSimpleOpt(argc - 1, &argv[1], g_rgBackupListOptions, SO_O_EXACT);
break;
case BACKUP_QUERY:
args = new CSimpleOpt(argc - 1, &argv[1], g_rgBackupQueryOptions, SO_O_EXACT);
break;
case BACKUP_MODIFY:
args = new CSimpleOpt(argc - 1, &argv[1], g_rgBackupModifyOptions, SO_O_EXACT);
break;
@ -2966,6 +3162,7 @@ int main(int argc, char* argv[]) {
std::string addPrefix;
std::string removePrefix;
Standalone<VectorRef<KeyRangeRef>> backupKeys;
Standalone<VectorRef<KeyRangeRef>> backupKeysFilter;
int maxErrors = 20;
Version beginVersion = invalidVersion;
Version restoreVersion = invalidVersion;
@ -3188,6 +3385,15 @@ int main(int argc, char* argv[]) {
return FDB_EXIT_ERROR;
}
break;
case OPT_BACKUPKEYS_FILTER:
try {
addKeyRange(args->OptionArg(), backupKeysFilter);
}
catch (Error &) {
printHelpTeaser(argv[0]);
return FDB_EXIT_ERROR;
}
break;
case OPT_DESTCONTAINER:
destinationContainer = args->OptionArg();
// If the url starts with '/' then prepend "file://" for backwards compatibility
@ -3727,6 +3933,12 @@ int main(int argc, char* argv[]) {
f = stopAfter( listBackup(baseUrl) );
break;
case BACKUP_QUERY:
initTraceFile();
f = stopAfter(queryBackup(argv[0], destinationContainer, backupKeysFilter, restoreVersion,
restoreClusterFileOrig, restoreTimestamp, !quietDisplay));
break;
case BACKUP_DUMP:
initTraceFile();
f = stopAfter( dumpBackupData(argv[0], destinationContainer, dumpBegin, dumpEnd) );

View File

@ -471,8 +471,8 @@ void initHelp() {
"All keys between BEGINKEY (inclusive) and ENDKEY (exclusive) are cleared from the database. This command will succeed even if the specified range is empty, but may fail because of conflicts." ESCAPINGK);
helpMap["configure"] = CommandHelp(
"configure [new] "
"<single|double|triple|three_data_hall|three_datacenter|ssd|memory|memory-radixtree-beta|commit_proxies=<"
"COMMIT_PROXIES>|grv_proxies=<GRV_PROXIES>|logs=<LOGS>|resolvers=<RESOLVERS>>*",
"<single|double|triple|three_data_hall|three_datacenter|ssd|memory|memory-radixtree-beta|proxies=<PROXIES>|"
"commit_proxies=<COMMIT_PROXIES>|grv_proxies=<GRV_PROXIES>|logs=<LOGS>|resolvers=<RESOLVERS>>*",
"change the database configuration",
"The `new' option, if present, initializes a new database with the given configuration rather than changing "
"the configuration of an existing one. When used, both a redundancy mode and a storage engine must be "
@ -480,8 +480,13 @@ void initHelp() {
"of data (survive one failure).\n triple - three copies of data (survive two failures).\n three_data_hall - "
"See the Admin Guide.\n three_datacenter - See the Admin Guide.\n\nStorage engine:\n ssd - B-Tree storage "
"engine optimized for solid state disks.\n memory - Durable in-memory storage engine for small "
"datasets.\n\ncommit_proxies=<COMMIT_PROXIES>: Sets the desired number of commit proxies in the cluster. Must "
"be at least 1, or set to -1 which restores the number of commit proxies to the default "
"datasets.\n\nproxies=<PROXIES>: Sets the desired number of proxies in the cluster. The proxy role is being "
"deprecated and split into GRV proxy and Commit proxy, now prefer configure 'grv_proxies' and 'commit_proxies' "
"separately. Generally we should follow that 'commit_proxies' is three times of 'grv_proxies' and 'grv_proxies' "
"should be not more than 4. If 'proxies' is specified, it will be converted to 'grv_proxies' and 'commit_proxies'. "
"Must be at least 2 (1 GRV proxy, 1 Commit proxy), or set to -1 which restores the number of proxies to the "
"default value.\n\ncommit_proxies=<COMMIT_PROXIES>: Sets the desired number of commit proxies in the cluster. "
"Must be at least 1, or set to -1 which restores the number of commit proxies to the default "
"value.\n\ngrv_proxies=<GRV_PROXIES>: Sets the desired number of GRV proxies in the cluster. Must be at least "
"1, or set to -1 which restores the number of GRV proxies to the default value.\n\nlogs=<LOGS>: Sets the "
"desired number of log servers in the cluster. Must be at least 1, or set to -1 which restores the number of "
@ -1058,10 +1063,10 @@ void printStatus(StatusObjectReader statusObj, StatusClient::StatusLevel level,
if (statusObjConfig.has("regions")) {
outputString += "\n Regions: ";
regions = statusObjConfig["regions"].get_array();
bool isPrimary = false;
std::vector<std::string> regionSatelliteDCs;
std::string regionDC;
for (StatusObjectReader region : regions) {
bool isPrimary = false;
std::vector<std::string> regionSatelliteDCs;
std::string regionDC;
for (StatusObjectReader dc : region["datacenters"].get_array()) {
if (!dc.has("satellite")) {
regionDC = dc["id"].get_str();
@ -1807,7 +1812,7 @@ ACTOR Future<Void> commitTransaction( Reference<ReadYourWritesTransaction> tr )
}
ACTOR Future<bool> configure( Database db, std::vector<StringRef> tokens, Reference<ClusterConnectionFile> ccf, LineNoise* linenoise, Future<Void> warn ) {
state ConfigurationResult::Type result;
state ConfigurationResult result;
state int startToken = 1;
state bool force = false;
if (tokens.size() < 2)
@ -1888,7 +1893,8 @@ ACTOR Future<bool> configure( Database db, std::vector<StringRef> tokens, Refere
}
}
ConfigurationResult::Type r = wait( makeInterruptable( changeConfig( db, std::vector<StringRef>(tokens.begin()+startToken,tokens.end()), conf, force) ) );
ConfigurationResult r = wait(makeInterruptable(
changeConfig(db, std::vector<StringRef>(tokens.begin() + startToken, tokens.end()), conf, force)));
result = r;
}
@ -2014,7 +2020,7 @@ ACTOR Future<bool> fileConfigure(Database db, std::string filePath, bool isNewDa
return true;
}
}
ConfigurationResult::Type result = wait( makeInterruptable( changeConfig(db, configString, force) ) );
ConfigurationResult result = wait(makeInterruptable(changeConfig(db, configString, force)));
// Real errors get thrown from makeInterruptable and printed by the catch block in cli(), but
// there are various results specific to changeConfig() that we need to report:
bool ret;
@ -2145,7 +2151,7 @@ ACTOR Future<bool> coordinators( Database db, std::vector<StringRef> tokens, boo
}
if(setName.size()) change = nameQuorumChange( setName.toString(), change );
CoordinatorsResult::Type r = wait( makeInterruptable( changeQuorum( db, change ) ) );
CoordinatorsResult r = wait(makeInterruptable(changeQuorum(db, change)));
// Real errors get thrown from makeInterruptable and printed by the catch block in cli(), but
// there are various results specific to changeConfig() that we need to report:

View File

@ -23,6 +23,7 @@
#include "fdbclient/BackupAgent.actor.h"
#include "fdbclient/FDBTypes.h"
#include "fdbclient/JsonBuilder.h"
#include "flow/Arena.h"
#include "flow/Trace.h"
#include "flow/UnitTest.h"
#include "flow/Hash3.h"
@ -245,7 +246,7 @@ std::string BackupDescription::toJSON() const {
* file written will be after the start version of the snapshot's execution.
*
* Log files are at file paths like
* /plogs/...log,startVersion,endVersion,UID,tagID-of-N,blocksize
* /plogs/.../log,startVersion,endVersion,UID,tagID-of-N,blocksize
* /logs/.../log,startVersion,endVersion,UID,blockSize
* where ... is a multi level path which sorts lexically into version order and results in approximately 1
* unique folder per day containing about 5,000 files. Logs after FDB 6.3 are stored in "plogs"
@ -1403,8 +1404,15 @@ public:
}
ACTOR static Future<Optional<RestorableFileSet>> getRestoreSet_impl(Reference<BackupContainerFileSystem> bc,
Version targetVersion, bool logsOnly,
Version beginVersion) {
Version targetVersion,
VectorRef<KeyRangeRef> keyRangesFilter, bool logsOnly = false,
Version beginVersion = invalidVersion) {
// Does not support use keyRangesFilter for logsOnly yet
if (logsOnly && !keyRangesFilter.empty()) {
TraceEvent(SevError, "BackupContainerRestoreSetUnsupportedAPI").detail("KeyRangesFilter", keyRangesFilter.size());
return Optional<RestorableFileSet>();
}
if (logsOnly) {
state RestorableFileSet restorableSet;
state std::vector<LogFile> logFiles;
@ -1416,23 +1424,55 @@ public:
return getRestoreSetFromLogs(logFiles, targetVersion, restorableSet);
}
}
// Find the most recent keyrange snapshot to end at or before targetVersion
state Optional<KeyspaceSnapshotFile> snapshot;
std::vector<KeyspaceSnapshotFile> snapshots = wait(bc->listKeyspaceSnapshots());
for(auto const &s : snapshots) {
if(s.endVersion <= targetVersion)
snapshot = s;
}
if(snapshot.present()) {
// Find the most recent keyrange snapshot through which we can restore filtered key ranges into targetVersion.
state std::vector<KeyspaceSnapshotFile> snapshots = wait(bc->listKeyspaceSnapshots());
state int i = snapshots.size() - 1;
for (; i >= 0; i--) {
// The smallest version of filtered range files >= snapshot beginVersion > targetVersion
if (targetVersion >= 0 && snapshots[i].beginVersion > targetVersion) {
continue;
}
state RestorableFileSet restorable;
restorable.snapshot = snapshot.get();
restorable.targetVersion = targetVersion;
state Version minKeyRangeVersion = MAX_VERSION;
state Version maxKeyRangeVersion = -1;
std::pair<std::vector<RangeFile>, std::map<std::string, KeyRange>> results =
wait(bc->readKeyspaceSnapshot(snapshot.get()));
restorable.ranges = std::move(results.first);
restorable.keyRanges = std::move(results.second);
wait(bc->readKeyspaceSnapshot(snapshots[i]));
// Old backup does not have metadata about key ranges and can not be filtered with key ranges.
if (keyRangesFilter.size() && results.second.empty() && !results.first.empty()) {
throw backup_not_filterable_with_key_ranges();
}
// Filter by keyRangesFilter.
if (keyRangesFilter.empty()) {
restorable.ranges = std::move(results.first);
restorable.keyRanges = std::move(results.second);
minKeyRangeVersion = snapshots[i].beginVersion;
maxKeyRangeVersion = snapshots[i].endVersion;
} else {
for (const auto& rangeFile : results.first) {
const auto& keyRange = results.second.at(rangeFile.fileName);
if (keyRange.intersects(keyRangesFilter)) {
restorable.ranges.push_back(rangeFile);
restorable.keyRanges[rangeFile.fileName] = keyRange;
minKeyRangeVersion = std::min(minKeyRangeVersion, rangeFile.version);
maxKeyRangeVersion = std::max(maxKeyRangeVersion, rangeFile.version);
}
}
// No range file matches 'keyRangesFilter'.
if (restorable.ranges.empty()) {
throw backup_not_overlapped_with_keys_filter();
}
}
// 'latestVersion' represents using the minimum restorable version in a snapshot.
restorable.targetVersion = targetVersion == latestVersion ? maxKeyRangeVersion : targetVersion;
// Any version < maxKeyRangeVersion is not restorable.
if (restorable.targetVersion < maxKeyRangeVersion) continue;
restorable.snapshot = snapshots[i];
// TODO: Reenable the sanity check after TooManyFiles error is resolved
if (false && g_network->isSimulated()) {
// Sanity check key ranges
@ -1446,18 +1486,21 @@ public:
}
}
// No logs needed if there is a complete key space snapshot at the target version.
if (snapshot.get().beginVersion == snapshot.get().endVersion &&
snapshot.get().endVersion == targetVersion) {
// No logs needed if there is a complete filtered key space snapshot at the target version.
if (minKeyRangeVersion == maxKeyRangeVersion && maxKeyRangeVersion == restorable.targetVersion) {
restorable.continuousBeginVersion = restorable.continuousEndVersion = invalidVersion;
TraceEvent("BackupContainerGetRestorableFilesWithoutLogs")
.detail("KeyRangeVersion", restorable.targetVersion)
.detail("NumberOfRangeFiles", restorable.ranges.size())
.detail("KeyRangesFilter", printable(keyRangesFilter));
return Optional<RestorableFileSet>(restorable);
}
// FIXME: check if there are tagged logs. for each tag, there is no version gap.
state std::vector<LogFile> logs;
state std::vector<LogFile> plogs;
wait(store(logs, bc->listLogFiles(snapshot.get().beginVersion, targetVersion, false)) &&
store(plogs, bc->listLogFiles(snapshot.get().beginVersion, targetVersion, true)));
wait(store(logs, bc->listLogFiles(minKeyRangeVersion, restorable.targetVersion, false)) &&
store(plogs, bc->listLogFiles(minKeyRangeVersion, restorable.targetVersion, true)));
if (plogs.size() > 0) {
logs.swap(plogs);
@ -1469,13 +1512,12 @@ public:
// Remove duplicated log files that can happen for old epochs.
std::vector<LogFile> filtered = filterDuplicates(logs);
restorable.logs.swap(filtered);
// sort by version order again for continuous analysis
std::sort(restorable.logs.begin(), restorable.logs.end());
if (isPartitionedLogsContinuous(restorable.logs, snapshot.get().beginVersion, targetVersion)) {
restorable.continuousBeginVersion = snapshot.get().beginVersion;
restorable.continuousEndVersion = targetVersion + 1; // not inclusive
if (isPartitionedLogsContinuous(restorable.logs, minKeyRangeVersion, restorable.targetVersion)) {
restorable.continuousBeginVersion = minKeyRangeVersion;
restorable.continuousEndVersion = restorable.targetVersion + 1; // not inclusive
return Optional<RestorableFileSet>(restorable);
}
return Optional<RestorableFileSet>();
@ -1483,20 +1525,19 @@ public:
// List logs in version order so log continuity can be analyzed
std::sort(logs.begin(), logs.end());
// If there are logs and the first one starts at or before the snapshot begin version then proceed
if(!logs.empty() && logs.front().beginVersion <= snapshot.get().beginVersion) {
// If there are logs and the first one starts at or before the keyrange's snapshot begin version, then
// it is valid restore set and proceed
if (!logs.empty() && logs.front().beginVersion <= minKeyRangeVersion) {
return getRestoreSetFromLogs(logs, targetVersion, restorable);
}
}
return Optional<RestorableFileSet>();
}
Future<Optional<RestorableFileSet>> getRestoreSet(Version targetVersion, bool logsOnly,
Version beginVersion) final {
return getRestoreSet_impl(Reference<BackupContainerFileSystem>::addRef(this), targetVersion, logsOnly,
beginVersion);
Future<Optional<RestorableFileSet>> getRestoreSet(Version targetVersion, VectorRef<KeyRangeRef> keyRangesFilter,
bool logsOnly, Version beginVersion) final {
return getRestoreSet_impl(Reference<BackupContainerFileSystem>::addRef(this), targetVersion, keyRangesFilter,
logsOnly, beginVersion);
}
private:

View File

@ -280,10 +280,13 @@ public:
virtual Future<BackupFileList> dumpFileList(Version begin = 0, Version end = std::numeric_limits<Version>::max()) = 0;
// Get exactly the files necessary to restore to targetVersion. Returns non-present if
// restore to given version is not possible.
virtual Future<Optional<RestorableFileSet>> getRestoreSet(Version targetVersion, bool logsOnly = false,
Version beginVersion = -1) = 0;
// Get exactly the files necessary to restore the key space filtered by the specified key ranges to targetVersion.
// If targetVersion is 'latestVersion', use the minimum restorable version in a snapshot.
// If logsOnly is set, only use log files in [beginVersion, targetVervions) in restore set.
// Returns non-present if restoring to the given version is not possible.
virtual Future<Optional<RestorableFileSet>> getRestoreSet(Version targetVersion,
VectorRef<KeyRangeRef> keyRangesFilter = {},
bool logsOnly = false, Version beginVersion = -1) = 0;
// Get an IBackupContainer based on a container spec string
static Reference<IBackupContainer> openContainer(std::string url);

View File

@ -133,15 +133,19 @@ struct DatabaseConfiguration {
}
//Killing an entire datacenter counts as killing one zone in modes that support it
int32_t maxZoneFailuresTolerated() const {
int32_t maxZoneFailuresTolerated(int fullyReplicatedRegions, bool forAvailability) const {
int worstSatellite = regions.size() ? std::numeric_limits<int>::max() : 0;
int regionsWithNonNegativePriority = 0;
for(auto& r : regions) {
if(r.priority >= 0) {
regionsWithNonNegativePriority++;
}
worstSatellite = std::min(worstSatellite, r.satelliteTLogReplicationFactor - r.satelliteTLogWriteAntiQuorum);
if(r.satelliteTLogUsableDcsFallback > 0) {
worstSatellite = std::min(worstSatellite, r.satelliteTLogReplicationFactorFallback - r.satelliteTLogWriteAntiQuorumFallback);
}
}
if(usableRegions > 1 && worstSatellite > 0) {
if(usableRegions > 1 && fullyReplicatedRegions > 1 && worstSatellite > 0 && (!forAvailability || regionsWithNonNegativePriority > 1)) {
return 1 + std::min(std::max(tLogReplicationFactor - 1 - tLogWriteAntiQuorum, worstSatellite - 1), storageTeamSize - 1);
} else if(worstSatellite > 0) {
return std::min(tLogReplicationFactor + worstSatellite - 2 - tLogWriteAntiQuorum, storageTeamSize - 1);

View File

@ -257,6 +257,7 @@ struct Traceable<std::set<T>> : std::true_type {
std::string printable( const StringRef& val );
std::string printable( const std::string& val );
std::string printable( const KeyRangeRef& range );
std::string printable(const VectorRef<KeyRangeRef>& val);
std::string printable( const VectorRef<StringRef>& val );
std::string printable( const VectorRef<KeyValueRef>& val );
std::string printable( const KeyValueRef& val );
@ -289,6 +290,14 @@ struct KeyRangeRef {
bool contains( const KeyRef& key ) const { return begin <= key && key < end; }
bool contains( const KeyRangeRef& keys ) const { return begin <= keys.begin && keys.end <= end; }
bool intersects( const KeyRangeRef& keys ) const { return begin < keys.end && keys.begin < end; }
bool intersects(const VectorRef<KeyRangeRef>& keysVec) const {
for (const auto& keys : keysVec) {
if (intersects(keys)) {
return true;
}
}
return false;
}
bool empty() const { return begin == end; }
bool singleKeyRange() const { return equalsKeyAfter(begin, end); }

View File

@ -3470,12 +3470,13 @@ namespace fileBackup {
if (beginVersion == invalidVersion) {
beginVersion = 0;
}
Optional<RestorableFileSet> restorable = wait(bc->getRestoreSet(restoreVersion, incremental, beginVersion));
if (!incremental) {
beginVersion = restorable.get().snapshot.beginVersion;
}
Optional<RestorableFileSet> restorable =
wait(bc->getRestoreSet(restoreVersion, VectorRef<KeyRangeRef>(), incremental, beginVersion));
if (!incremental) {
beginVersion = restorable.get().snapshot.beginVersion;
}
if(!restorable.present())
if(!restorable.present())
throw restore_missing_data();
// First version for which log data should be applied
@ -4519,7 +4520,7 @@ public:
}
Optional<RestorableFileSet> restoreSet =
wait(bc->getRestoreSet(targetVersion, incrementalBackupOnly, beginVersion));
wait(bc->getRestoreSet(targetVersion, VectorRef<KeyRangeRef>(), incrementalBackupOnly, beginVersion));
if(!restoreSet.present()) {
TraceEvent(SevWarn, "FileBackupAgentRestoreNotPossible")

View File

@ -104,7 +104,7 @@ void ClientKnobs::initialize(bool randomize) {
init( WATCH_POLLING_TIME, 1.0 ); if( randomize && BUGGIFY ) WATCH_POLLING_TIME = 5.0;
init( NO_RECENT_UPDATES_DURATION, 20.0 ); if( randomize && BUGGIFY ) NO_RECENT_UPDATES_DURATION = 0.1;
init( FAST_WATCH_TIMEOUT, 20.0 ); if( randomize && BUGGIFY ) FAST_WATCH_TIMEOUT = 1.0;
init( WATCH_TIMEOUT, 900.0 ); if( randomize && BUGGIFY ) WATCH_TIMEOUT = 20.0;
init( WATCH_TIMEOUT, 30.0 ); if( randomize && BUGGIFY ) WATCH_TIMEOUT = 20.0;
// Core
init( CORE_VERSIONSPERSECOND, 1e6 );

View File

@ -82,11 +82,17 @@ std::map<std::string, std::string> configForToken( std::string const& mode ) {
std::string value = mode.substr(pos+1);
if (key == "proxies" && isInteger(value)) {
printf("\nWarning: Proxy role is being split into GRV Proxy and Commit Proxy, now prefer configuring "
"\"grv_proxies\" and \"commit_proxies\" separately.\n");
printf("Warning: Proxy role is being split into GRV Proxy and Commit Proxy, now prefer configuring "
"'grv_proxies' and 'commit_proxies' separately. Generally we should follow that 'commit_proxies'"
" is three times of 'grv_proxies' count and 'grv_proxies' should be not more than 4.\n");
int proxiesCount = atoi(value.c_str());
if (proxiesCount == -1) {
proxiesCount = CLIENT_KNOBS->DEFAULT_AUTO_GRV_PROXIES + CLIENT_KNOBS->DEFAULT_AUTO_COMMIT_PROXIES;
ASSERT_WE_THINK(proxiesCount >= 2);
}
if (proxiesCount < 2) {
printf("Error: At least 2 proxies (1 GRV proxy and Commit proxy) are required.\n");
printf("Error: At least 2 proxies (1 GRV proxy and 1 Commit proxy) are required.\n");
return out;
}
@ -102,7 +108,8 @@ std::map<std::string, std::string> configForToken( std::string const& mode ) {
grvProxyCount, commitProxyCount);
TraceEvent("DatabaseConfigurationProxiesSpecified")
.detail("SpecifiedProxies", grvProxyCount)
.detail("SpecifiedProxies", atoi(value.c_str()))
.detail("EffectiveSpecifiedProxies", proxiesCount)
.detail("ConvertedGrvProxies", grvProxyCount)
.detail("ConvertedCommitProxies", commitProxyCount);
}
@ -259,7 +266,8 @@ std::map<std::string, std::string> configForToken( std::string const& mode ) {
return out;
}
ConfigurationResult::Type buildConfiguration( std::vector<StringRef> const& modeTokens, std::map<std::string, std::string>& outConf ) {
ConfigurationResult buildConfiguration(std::vector<StringRef> const& modeTokens,
std::map<std::string, std::string>& outConf) {
for(auto it : modeTokens) {
std::string mode = it.toString();
auto m = configForToken( mode );
@ -295,7 +303,7 @@ ConfigurationResult::Type buildConfiguration( std::vector<StringRef> const& mode
return ConfigurationResult::SUCCESS;
}
ConfigurationResult::Type buildConfiguration( std::string const& configMode, std::map<std::string, std::string>& outConf ) {
ConfigurationResult buildConfiguration(std::string const& configMode, std::map<std::string, std::string>& outConf) {
std::vector<StringRef> modes;
int p = 0;
@ -335,7 +343,7 @@ ACTOR Future<DatabaseConfiguration> getDatabaseConfiguration( Database cx ) {
}
}
ACTOR Future<ConfigurationResult::Type> changeConfig( Database cx, std::map<std::string, std::string> m, bool force ) {
ACTOR Future<ConfigurationResult> changeConfig(Database cx, std::map<std::string, std::string> m, bool force) {
state StringRef initIdKey = LiteralStringRef( "\xff/init_id" );
state Transaction tr(cx);
@ -852,7 +860,7 @@ ConfigureAutoResult parseConfig( StatusObject const& status ) {
return result;
}
ACTOR Future<ConfigurationResult::Type> autoConfig( Database cx, ConfigureAutoResult conf ) {
ACTOR Future<ConfigurationResult> autoConfig(Database cx, ConfigureAutoResult conf) {
state Transaction tr(cx);
state Key versionKey = BinaryWriter::toValue(deterministicRandom()->randomUniqueID(),Unversioned());
@ -919,7 +927,8 @@ ACTOR Future<ConfigurationResult::Type> autoConfig( Database cx, ConfigureAutoRe
}
}
Future<ConfigurationResult::Type> changeConfig( Database const& cx, std::vector<StringRef> const& modes, Optional<ConfigureAutoResult> const& conf, bool force ) {
Future<ConfigurationResult> changeConfig(Database const& cx, std::vector<StringRef> const& modes,
Optional<ConfigureAutoResult> const& conf, bool force) {
if( modes.size() && modes[0] == LiteralStringRef("auto") && conf.present() ) {
return autoConfig(cx, conf.get());
}
@ -931,7 +940,7 @@ Future<ConfigurationResult::Type> changeConfig( Database const& cx, std::vector<
return changeConfig(cx, m, force);
}
Future<ConfigurationResult::Type> changeConfig( Database const& cx, std::string const& modes, bool force ) {
Future<ConfigurationResult> changeConfig(Database const& cx, std::string const& modes, bool force) {
TraceEvent("ChangeConfig").detail("Mode", modes);
std::map<std::string,std::string> m;
auto r = buildConfiguration( modes, m );
@ -1000,7 +1009,7 @@ ACTOR Future<std::vector<NetworkAddress>> getCoordinators( Database cx ) {
}
}
ACTOR Future<CoordinatorsResult::Type> changeQuorum( Database cx, Reference<IQuorumChange> change ) {
ACTOR Future<CoordinatorsResult> changeQuorum(Database cx, Reference<IQuorumChange> change) {
state Transaction tr(cx);
state int retries = 0;
state std::vector<NetworkAddress> desiredCoordinators;
@ -1020,7 +1029,7 @@ ACTOR Future<CoordinatorsResult::Type> changeQuorum( Database cx, Reference<IQuo
if ( cx->getConnectionFile() && old.clusterKeyName().toString() != cx->getConnectionFile()->getConnectionString().clusterKeyName() )
return CoordinatorsResult::BAD_DATABASE_STATE; // Someone changed the "name" of the database??
state CoordinatorsResult::Type result = CoordinatorsResult::SUCCESS;
state CoordinatorsResult result = CoordinatorsResult::SUCCESS;
if(!desiredCoordinators.size()) {
std::vector<NetworkAddress> _desiredCoordinators = wait( change->getDesiredCoordinators( &tr, old.coordinators(), Reference<ClusterConnectionFile>(new ClusterConnectionFile(old)), result ) );
desiredCoordinators = _desiredCoordinators;
@ -1090,14 +1099,20 @@ ACTOR Future<CoordinatorsResult::Type> changeQuorum( Database cx, Reference<IQuo
struct SpecifiedQuorumChange : IQuorumChange {
vector<NetworkAddress> desired;
explicit SpecifiedQuorumChange( vector<NetworkAddress> const& desired ) : desired(desired) {}
virtual Future<vector<NetworkAddress>> getDesiredCoordinators( Transaction* tr, vector<NetworkAddress> oldCoordinators, Reference<ClusterConnectionFile>, CoordinatorsResult::Type& ) {
virtual Future<vector<NetworkAddress>> getDesiredCoordinators(Transaction* tr,
vector<NetworkAddress> oldCoordinators,
Reference<ClusterConnectionFile>,
CoordinatorsResult&) {
return desired;
}
};
Reference<IQuorumChange> specifiedQuorumChange(vector<NetworkAddress> const& addresses) { return Reference<IQuorumChange>(new SpecifiedQuorumChange(addresses)); }
struct NoQuorumChange : IQuorumChange {
virtual Future<vector<NetworkAddress>> getDesiredCoordinators( Transaction* tr, vector<NetworkAddress> oldCoordinators, Reference<ClusterConnectionFile>, CoordinatorsResult::Type& ) {
virtual Future<vector<NetworkAddress>> getDesiredCoordinators(Transaction* tr,
vector<NetworkAddress> oldCoordinators,
Reference<ClusterConnectionFile>,
CoordinatorsResult&) {
return oldCoordinators;
}
};
@ -1107,7 +1122,10 @@ struct NameQuorumChange : IQuorumChange {
std::string newName;
Reference<IQuorumChange> otherChange;
explicit NameQuorumChange( std::string const& newName, Reference<IQuorumChange> const& otherChange ) : newName(newName), otherChange(otherChange) {}
virtual Future<vector<NetworkAddress>> getDesiredCoordinators( Transaction* tr, vector<NetworkAddress> oldCoordinators, Reference<ClusterConnectionFile> cf, CoordinatorsResult::Type& t ) {
virtual Future<vector<NetworkAddress>> getDesiredCoordinators(Transaction* tr,
vector<NetworkAddress> oldCoordinators,
Reference<ClusterConnectionFile> cf,
CoordinatorsResult& t) {
return otherChange->getDesiredCoordinators(tr, oldCoordinators, cf, t);
}
virtual std::string getDesiredClusterKeyName() {
@ -1122,7 +1140,10 @@ struct AutoQuorumChange : IQuorumChange {
int desired;
explicit AutoQuorumChange( int desired ) : desired(desired) {}
virtual Future<vector<NetworkAddress>> getDesiredCoordinators( Transaction* tr, vector<NetworkAddress> oldCoordinators, Reference<ClusterConnectionFile> ccf, CoordinatorsResult::Type& err ) {
virtual Future<vector<NetworkAddress>> getDesiredCoordinators(Transaction* tr,
vector<NetworkAddress> oldCoordinators,
Reference<ClusterConnectionFile> ccf,
CoordinatorsResult& err) {
return getDesired( this, tr, oldCoordinators, ccf, &err );
}
@ -1174,7 +1195,10 @@ struct AutoQuorumChange : IQuorumChange {
return true; // The status quo seems fine
}
ACTOR static Future<vector<NetworkAddress>> getDesired( AutoQuorumChange* self, Transaction* tr, vector<NetworkAddress> oldCoordinators, Reference<ClusterConnectionFile> ccf, CoordinatorsResult::Type* err ) {
ACTOR static Future<vector<NetworkAddress>> getDesired(AutoQuorumChange* self, Transaction* tr,
vector<NetworkAddress> oldCoordinators,
Reference<ClusterConnectionFile> ccf,
CoordinatorsResult* err) {
state int desiredCount = self->desired;
if(desiredCount == -1) {

View File

@ -43,41 +43,35 @@ standard API and some knowledge of the contents of the system key space.
// ConfigurationResult enumerates normal outcomes of changeConfig() and various error
// conditions specific to it. changeConfig may also throw an Error to report other problems.
class ConfigurationResult {
public:
enum Type {
NO_OPTIONS_PROVIDED,
CONFLICTING_OPTIONS,
UNKNOWN_OPTION,
INCOMPLETE_CONFIGURATION,
INVALID_CONFIGURATION,
DATABASE_ALREADY_CREATED,
DATABASE_CREATED,
DATABASE_UNAVAILABLE,
STORAGE_IN_UNKNOWN_DCID,
REGION_NOT_FULLY_REPLICATED,
MULTIPLE_ACTIVE_REGIONS,
REGIONS_CHANGED,
NOT_ENOUGH_WORKERS,
REGION_REPLICATION_MISMATCH,
DCID_MISSING,
LOCKED_NOT_NEW,
SUCCESS,
};
enum class ConfigurationResult {
NO_OPTIONS_PROVIDED,
CONFLICTING_OPTIONS,
UNKNOWN_OPTION,
INCOMPLETE_CONFIGURATION,
INVALID_CONFIGURATION,
DATABASE_ALREADY_CREATED,
DATABASE_CREATED,
DATABASE_UNAVAILABLE,
STORAGE_IN_UNKNOWN_DCID,
REGION_NOT_FULLY_REPLICATED,
MULTIPLE_ACTIVE_REGIONS,
REGIONS_CHANGED,
NOT_ENOUGH_WORKERS,
REGION_REPLICATION_MISMATCH,
DCID_MISSING,
LOCKED_NOT_NEW,
SUCCESS,
};
class CoordinatorsResult {
public:
enum Type {
INVALID_NETWORK_ADDRESSES,
SAME_NETWORK_ADDRESSES,
NOT_COORDINATORS, //FIXME: not detected
DATABASE_UNREACHABLE, //FIXME: not detected
BAD_DATABASE_STATE,
COORDINATOR_UNREACHABLE,
NOT_ENOUGH_MACHINES,
SUCCESS
};
enum class CoordinatorsResult {
INVALID_NETWORK_ADDRESSES,
SAME_NETWORK_ADDRESSES,
NOT_COORDINATORS, // FIXME: not detected
DATABASE_UNREACHABLE, // FIXME: not detected
BAD_DATABASE_STATE,
COORDINATOR_UNREACHABLE,
NOT_ENOUGH_MACHINES,
SUCCESS
};
struct ConfigureAutoResult {
@ -116,17 +110,24 @@ struct ConfigureAutoResult {
bool isValid() const { return processes != -1; }
};
ConfigurationResult::Type buildConfiguration( std::vector<StringRef> const& modeTokens, std::map<std::string, std::string>& outConf ); // Accepts a vector of configuration tokens
ConfigurationResult::Type buildConfiguration( std::string const& modeString, std::map<std::string, std::string>& outConf ); // Accepts tokens separated by spaces in a single string
ConfigurationResult buildConfiguration(
std::vector<StringRef> const& modeTokens,
std::map<std::string, std::string>& outConf); // Accepts a vector of configuration tokens
ConfigurationResult buildConfiguration(
std::string const& modeString,
std::map<std::string, std::string>& outConf); // Accepts tokens separated by spaces in a single string
bool isCompleteConfiguration( std::map<std::string, std::string> const& options );
// All versions of changeConfig apply the given set of configuration tokens to the database, and return a ConfigurationResult (or error).
Future<ConfigurationResult::Type> changeConfig( Database const& cx, std::string const& configMode, bool force ); // Accepts tokens separated by spaces in a single string
Future<ConfigurationResult> changeConfig(Database const& cx, std::string const& configMode,
bool force); // Accepts tokens separated by spaces in a single string
ConfigureAutoResult parseConfig( StatusObject const& status );
Future<ConfigurationResult::Type> changeConfig( Database const& cx, std::vector<StringRef> const& modes, Optional<ConfigureAutoResult> const& conf, bool force ); // Accepts a vector of configuration tokens
ACTOR Future<ConfigurationResult::Type> changeConfig(
Future<ConfigurationResult> changeConfig(Database const& cx, std::vector<StringRef> const& modes,
Optional<ConfigureAutoResult> const& conf,
bool force); // Accepts a vector of configuration tokens
ACTOR Future<ConfigurationResult> changeConfig(
Database cx, std::map<std::string, std::string> m,
bool force); // Accepts a full configuration in key/value format (from buildConfiguration)
@ -135,12 +136,15 @@ ACTOR Future<Void> waitForFullReplication(Database cx);
struct IQuorumChange : ReferenceCounted<IQuorumChange> {
virtual ~IQuorumChange() {}
virtual Future<vector<NetworkAddress>> getDesiredCoordinators( Transaction* tr, vector<NetworkAddress> oldCoordinators, Reference<ClusterConnectionFile>, CoordinatorsResult::Type& ) = 0;
virtual Future<vector<NetworkAddress>> getDesiredCoordinators(Transaction* tr,
vector<NetworkAddress> oldCoordinators,
Reference<ClusterConnectionFile>,
CoordinatorsResult&) = 0;
virtual std::string getDesiredClusterKeyName() { return std::string(); }
};
// Change to use the given set of coordination servers
ACTOR Future<CoordinatorsResult::Type> changeQuorum(Database cx, Reference<IQuorumChange> change);
ACTOR Future<CoordinatorsResult> changeQuorum(Database cx, Reference<IQuorumChange> change);
Reference<IQuorumChange> autoQuorumChange(int desired = -1);
Reference<IQuorumChange> noQuorumChange();
Reference<IQuorumChange> specifiedQuorumChange(vector<NetworkAddress> const&);

View File

@ -189,6 +189,12 @@ std::string printable( const KeyRangeRef& range ) {
return printable(range.begin) + " - " + printable(range.end);
}
std::string printable(const VectorRef<KeyRangeRef>& val) {
std::string s;
for (int i = 0; i < val.size(); i++) s = s + printable(val[i]) + " ";
return s;
}
int unhex( char c ) {
if (c >= '0' && c <= '9')
return c-'0';
@ -3896,12 +3902,14 @@ ACTOR Future<GetReadVersionReply> getConsistentReadVersion(SpanID parentSpan, Da
TransactionPriority priority, uint32_t flags,
TransactionTagMap<uint32_t> tags, Optional<UID> debugID) {
state Span span("NAPI:getConsistentReadVersion"_loc, parentSpan);
try {
++cx->transactionReadVersionBatches;
if( debugID.present() )
g_traceBatch.addEvent("TransactionDebug", debugID.get().first(), "NativeAPI.getConsistentReadVersion.Before");
loop {
++cx->transactionReadVersionBatches;
if( debugID.present() )
g_traceBatch.addEvent("TransactionDebug", debugID.get().first(), "NativeAPI.getConsistentReadVersion.Before");
loop {
try {
state GetReadVersionRequest req( span.context, transactionCount, priority, flags, tags, debugID );
choose {
when ( wait( cx->onProxiesChanged() ) ) {}
when ( GetReadVersionReply v = wait( basicLoadBalance( cx->getGrvProxies(flags & GetReadVersionRequest::FLAG_USE_PROVISIONAL_PROXIES), &GrvProxyInterface::getConsistentReadVersion, req, cx->taskID ) ) ) {
@ -3930,12 +3938,17 @@ ACTOR Future<GetReadVersionReply> getConsistentReadVersion(SpanID parentSpan, Da
return v;
}
}
} catch (Error& e) {
if (e.code() != error_code_broken_promise && e.code() != error_code_batch_transaction_throttled)
TraceEvent(SevError, "GetConsistentReadVersionError").error(e);
if(e.code() == error_code_batch_transaction_throttled && !cx->apiVersionAtLeast(630)) {
wait(delayJittered(5.0));
} else {
throw;
}
}
} catch (Error& e) {
if (e.code() != error_code_broken_promise && e.code() != error_code_batch_transaction_throttled)
TraceEvent(SevError, "GetConsistentReadVersionError").error(e);
throw;
}
}
ACTOR Future<Void> readVersionBatcher( DatabaseContext *cx, FutureStream<DatabaseContext::VersionRequest> versionStream, TransactionPriority priority, uint32_t flags ) {

View File

@ -54,6 +54,7 @@ struct RestoreSysInfo;
struct RestoreApplierInterface;
struct RestoreFinishRequest;
struct RestoreSamplesRequest;
struct RestoreUpdateRateRequest;
// RestoreSysInfo includes information each (type of) restore roles should know.
// At this moment, it only include appliers. We keep the name for future extension.
@ -174,6 +175,7 @@ struct RestoreApplierInterface : RestoreRoleInterface {
RequestStream<RestoreVersionBatchRequest> initVersionBatch;
RequestStream<RestoreSimpleRequest> collectRestoreRoleInterfaces;
RequestStream<RestoreFinishRequest> finishRestore;
RequestStream<RestoreUpdateRateRequest> updateRate;
bool operator==(RestoreWorkerInterface const& r) const { return id() == r.id(); }
bool operator!=(RestoreWorkerInterface const& r) const { return id() != r.id(); }
@ -193,12 +195,13 @@ struct RestoreApplierInterface : RestoreRoleInterface {
initVersionBatch.getEndpoint(TaskPriority::LoadBalancedEndpoint);
collectRestoreRoleInterfaces.getEndpoint(TaskPriority::LoadBalancedEndpoint);
finishRestore.getEndpoint(TaskPriority::LoadBalancedEndpoint);
updateRate.getEndpoint(TaskPriority::LoadBalancedEndpoint);
}
template <class Ar>
void serialize(Ar& ar) {
serializer(ar, *(RestoreRoleInterface*)this, heartbeat, sendMutationVector, applyToDB, initVersionBatch,
collectRestoreRoleInterfaces, finishRestore);
collectRestoreRoleInterfaces, finishRestore, updateRate);
}
std::string toString() const { return nodeID.toString(); }
@ -616,6 +619,50 @@ struct RestoreFinishRequest : TimedRequest {
}
};
struct RestoreUpdateRateReply : TimedRequest {
constexpr static FileIdentifier file_identifier = 13018414;
UID id;
double remainMB; // remaining data in MB to write to DB;
RestoreUpdateRateReply() = default;
explicit RestoreUpdateRateReply(UID id, double remainMB) : id(id), remainMB(remainMB) {}
std::string toString() const {
std::stringstream ss;
ss << "RestoreUpdateRateReply NodeID:" << id.toString() << " remainMB:" << remainMB;
return ss.str();
}
template <class Ar>
void serialize(Ar& ar) {
serializer(ar, id, remainMB);
}
};
struct RestoreUpdateRateRequest : TimedRequest {
constexpr static FileIdentifier file_identifier = 13018415;
int batchIndex;
double writeMB;
ReplyPromise<RestoreUpdateRateReply> reply;
RestoreUpdateRateRequest() = default;
explicit RestoreUpdateRateRequest(int batchIndex, double writeMB) : batchIndex(batchIndex), writeMB(writeMB) {}
template <class Ar>
void serialize(Ar& ar) {
serializer(ar, batchIndex, writeMB, reply);
}
std::string toString() const {
std::stringstream ss;
ss << "RestoreUpdateRateRequest batchIndex:" << batchIndex << " writeMB:" << writeMB;
return ss.str();
}
};
struct RestoreRequest {
constexpr static FileIdentifier file_identifier = 16035338;

View File

@ -156,7 +156,7 @@ Future<Void> SimpleFailureMonitor::onStateChanged(Endpoint const& endpoint) {
return endpointKnownFailed.onChange(endpoint);
}
FailureStatus SimpleFailureMonitor::getState(Endpoint const& endpoint) {
FailureStatus SimpleFailureMonitor::getState(Endpoint const& endpoint) const {
if (failedEndpoints.count(endpoint))
return FailureStatus(true);
else {
@ -170,7 +170,7 @@ FailureStatus SimpleFailureMonitor::getState(Endpoint const& endpoint) {
}
}
FailureStatus SimpleFailureMonitor::getState(NetworkAddress const& address) {
FailureStatus SimpleFailureMonitor::getState(NetworkAddress const& address) const {
auto a = addressStatus.find(address);
if (a == addressStatus.end())
return FailureStatus();
@ -178,7 +178,7 @@ FailureStatus SimpleFailureMonitor::getState(NetworkAddress const& address) {
return a->second;
}
bool SimpleFailureMonitor::onlyEndpointFailed(Endpoint const& endpoint) {
bool SimpleFailureMonitor::onlyEndpointFailed(Endpoint const& endpoint) const {
if (!failedEndpoints.count(endpoint)) return false;
auto a = addressStatus.find(endpoint.getPrimaryAddress());
if (a == addressStatus.end())
@ -187,7 +187,7 @@ bool SimpleFailureMonitor::onlyEndpointFailed(Endpoint const& endpoint) {
return !a->second.failed;
}
bool SimpleFailureMonitor::permanentlyFailed(Endpoint const& endpoint) {
bool SimpleFailureMonitor::permanentlyFailed(Endpoint const& endpoint) const {
return failedEndpoints.count(endpoint);
}

View File

@ -87,10 +87,10 @@ struct FailureStatus {
class IFailureMonitor {
public:
// Returns the currently known status for the endpoint
virtual FailureStatus getState(Endpoint const& endpoint) = 0;
virtual FailureStatus getState(Endpoint const& endpoint) const = 0;
// Returns the currently known status for the address
virtual FailureStatus getState(NetworkAddress const& address) = 0;
virtual FailureStatus getState(NetworkAddress const& address) const = 0;
// Only use this function when the endpoint is known to be failed
virtual void endpointNotFound(Endpoint const&) = 0;
@ -102,10 +102,10 @@ public:
virtual Future<Void> onDisconnectOrFailure(Endpoint const& endpoint) = 0;
// Returns true if the endpoint is failed but the address of the endpoint is not failed.
virtual bool onlyEndpointFailed(Endpoint const& endpoint) = 0;
virtual bool onlyEndpointFailed(Endpoint const& endpoint) const = 0;
// Returns true if the endpoint will never become available.
virtual bool permanentlyFailed(Endpoint const& endpoint) = 0;
virtual bool permanentlyFailed(Endpoint const& endpoint) const = 0;
// Called by FlowTransport when a connection closes and a prior request or reply might be lost
virtual void notifyDisconnect(NetworkAddress const&) = 0;
@ -140,14 +140,14 @@ public:
SimpleFailureMonitor();
void setStatus(NetworkAddress const& address, FailureStatus const& status);
void endpointNotFound(Endpoint const&);
virtual void notifyDisconnect(NetworkAddress const&);
void notifyDisconnect(NetworkAddress const&) override;
virtual Future<Void> onStateChanged(Endpoint const& endpoint);
virtual FailureStatus getState(Endpoint const& endpoint);
virtual FailureStatus getState(NetworkAddress const& address);
virtual Future<Void> onDisconnectOrFailure(Endpoint const& endpoint);
virtual bool onlyEndpointFailed(Endpoint const& endpoint);
virtual bool permanentlyFailed(Endpoint const& endpoint);
Future<Void> onStateChanged(Endpoint const& endpoint) override;
FailureStatus getState(Endpoint const& endpoint) const override;
FailureStatus getState(NetworkAddress const& address) const override;
Future<Void> onDisconnectOrFailure(Endpoint const& endpoint) override;
bool onlyEndpointFailed(Endpoint const& endpoint) const override;
bool permanentlyFailed(Endpoint const& endpoint) const override;
void reset();

View File

@ -78,6 +78,11 @@ public:
else if (s=="transaction") _class = TransactionClass;
else if (s=="resolution") _class = ResolutionClass;
else if (s=="commit_proxy") _class = CommitProxyClass;
else if (s=="proxy") {
_class = CommitProxyClass;
printf("WARNING: 'proxy' machine class is deprecated and will be automatically converted "
"'commit_proxy' machine class. Please use 'grv_proxy' or 'commit_proxy' specifically\n");
}
else if (s=="grv_proxy") _class = GrvProxyClass;
else if (s=="master") _class = MasterClass;
else if (s=="test") _class = TesterClass;
@ -100,6 +105,11 @@ public:
else if (classStr=="transaction") _class = TransactionClass;
else if (classStr=="resolution") _class = ResolutionClass;
else if (classStr=="commit_proxy") _class = CommitProxyClass;
else if (classStr=="proxy") {
_class = CommitProxyClass;
printf("WARNING: 'proxy' machine class is deprecated and will be automatically converted "
"'commit_proxy' machine class. Please use 'grv_proxy' or 'commit_proxy' specifically\n");
}
else if (classStr=="grv_proxy") _class = GrvProxyClass;
else if (classStr=="master") _class = MasterClass;
else if (classStr=="test") _class = TesterClass;

View File

@ -63,6 +63,8 @@ public:
virtual void enableSnapshot() {}
virtual bool canPipelineCommits() const = 0;
/*
Concurrency contract
Causal consistency:

View File

@ -35,6 +35,7 @@ struct KeyValueStoreCompressTestData : IKeyValueStore {
KeyValueStoreCompressTestData(IKeyValueStore* store) : store(store) {}
virtual bool canPipelineCommits() const override {return false;}
virtual Future<Void> getError() override { return store->getError(); }
virtual Future<Void> onClosed() override { return store->onClosed(); }
virtual void dispose() override {

View File

@ -63,6 +63,8 @@ public:
// IKeyValueStore
virtual KeyValueStoreType getType() const override { return type; }
virtual bool canPipelineCommits() const override { return false; }
virtual std::tuple<size_t, size_t, size_t> getSize() const override { return data.size(); }
int64_t getAvailableSize() const {

View File

@ -287,6 +287,8 @@ struct RocksDBKeyValueStore : IKeyValueStore {
return errorPromise.getFuture();
}
bool canPipelineCommits() const override { return true; }
ACTOR static void doClose(RocksDBKeyValueStore* self, bool deleteOnClose) {
wait(self->readThreads->stop());
auto a = new Writer::CloseAction(self->path, deleteOnClose);

View File

@ -1453,6 +1453,7 @@ public:
virtual KeyValueStoreType getType() const override { return type; }
virtual StorageBytes getStorageBytes() const override;
virtual bool canPipelineCommits() const override { return false; }
virtual void set(KeyValueRef keyValue, const Arena* arena = nullptr) override;
virtual void clear(KeyRangeRef range, const Arena* arena = nullptr) override;

View File

@ -46,7 +46,6 @@ void ServerKnobs::initialize(bool randomize, ClientKnobs* clientKnobs, bool isSi
init( RECOVERY_TLOG_SMART_QUORUM_DELAY, 0.25 ); if( randomize && BUGGIFY ) RECOVERY_TLOG_SMART_QUORUM_DELAY = 0.0; // smaller might be better for bug amplification
init( TLOG_STORAGE_MIN_UPDATE_INTERVAL, 0.5 );
init( BUGGIFY_TLOG_STORAGE_MIN_UPDATE_INTERVAL, 30 );
init( UNFLUSHED_DATA_RATIO, 0.05 ); if( randomize && BUGGIFY ) UNFLUSHED_DATA_RATIO = 0.0;
init( DESIRED_TOTAL_BYTES, 150000 ); if( randomize && BUGGIFY ) DESIRED_TOTAL_BYTES = 10000;
init( DESIRED_UPDATE_BYTES, 2*DESIRED_TOTAL_BYTES );
init( UPDATE_DELAY, 0.001 );
@ -549,6 +548,7 @@ void ServerKnobs::initialize(bool randomize, ClientKnobs* clientKnobs, bool isSi
init( FETCH_KEYS_LOWER_PRIORITY, 0 );
init( BUGGIFY_BLOCK_BYTES, 10000 );
init( STORAGE_COMMIT_BYTES, 10000000 ); if( randomize && BUGGIFY ) STORAGE_COMMIT_BYTES = 2000000;
init( STORAGE_COMMIT_PIPELINE_BYTES_PER_YIELD, 100000 );
init( STORAGE_DURABILITY_LAG_REJECT_THRESHOLD, 0.25 );
init( STORAGE_DURABILITY_LAG_MIN_RATE, 0.1 );
init( STORAGE_COMMIT_INTERVAL, 0.5 ); if( randomize && BUGGIFY ) STORAGE_COMMIT_INTERVAL = 2.0;
@ -623,7 +623,7 @@ void ServerKnobs::initialize(bool randomize, ClientKnobs* clientKnobs, bool isSi
init( FASTRESTORE_NUM_LOADERS, 3 ); if( randomize && BUGGIFY ) { FASTRESTORE_NUM_LOADERS = deterministicRandom()->random01() * 10 + 1; }
init( FASTRESTORE_NUM_APPLIERS, 3 ); if( randomize && BUGGIFY ) { FASTRESTORE_NUM_APPLIERS = deterministicRandom()->random01() * 10 + 1; }
init( FASTRESTORE_TXN_BATCH_MAX_BYTES, 1024.0 * 1024.0 ); if( randomize && BUGGIFY ) { FASTRESTORE_TXN_BATCH_MAX_BYTES = deterministicRandom()->random01() * 1024.0 * 1024.0 + 1.0; }
init( FASTRESTORE_VERSIONBATCH_MAX_BYTES, 10.0 * 1024.0 * 1024.0 ); if( randomize && BUGGIFY ) { FASTRESTORE_VERSIONBATCH_MAX_BYTES = deterministicRandom()->random01() < 0.2 ? 5 * 1024 : deterministicRandom()->random01() < 0.4 ? 100 * 1024 * 1024 : deterministicRandom()->random01() * 1000.0 * 1024.0 * 1024.0; } // too small value may increase chance of TooManyFile error
init( FASTRESTORE_VERSIONBATCH_MAX_BYTES, 10.0 * 1024.0 * 1024.0 ); if( randomize && BUGGIFY ) { FASTRESTORE_VERSIONBATCH_MAX_BYTES = deterministicRandom()->random01() < 0.2 ? 10 * 1024 : deterministicRandom()->random01() < 0.4 ? 100 * 1024 * 1024 : deterministicRandom()->random01() * 1000.0 * 1024.0 * 1024.0; } // too small value may increase chance of TooManyFile error
init( FASTRESTORE_VB_PARALLELISM, 5 ); if( randomize && BUGGIFY ) { FASTRESTORE_VB_PARALLELISM = deterministicRandom()->random01() < 0.2 ? 2 : deterministicRandom()->random01() * 10 + 1; }
init( FASTRESTORE_VB_MONITOR_DELAY, 30 ); if( randomize && BUGGIFY ) { FASTRESTORE_VB_MONITOR_DELAY = deterministicRandom()->random01() * 20 + 1; }
init( FASTRESTORE_VB_LAUNCH_DELAY, 1.0 ); if( randomize && BUGGIFY ) { FASTRESTORE_VB_LAUNCH_DELAY = deterministicRandom()->random01() < 0.2 ? 0.1 : deterministicRandom()->random01() * 10.0 + 1; }
@ -646,7 +646,7 @@ void ServerKnobs::initialize(bool randomize, ClientKnobs* clientKnobs, bool isSi
init( FASTRESTORE_REQBATCH_LOG, false ); if( randomize && BUGGIFY ) { FASTRESTORE_REQBATCH_LOG = deterministicRandom()->random01() < 0.2 ? true : false; }
init( FASTRESTORE_TXN_CLEAR_MAX, 100 ); if( randomize && BUGGIFY ) { FASTRESTORE_TXN_CLEAR_MAX = deterministicRandom()->random01() * 100 + 1; }
init( FASTRESTORE_TXN_RETRY_MAX, 10 ); if( randomize && BUGGIFY ) { FASTRESTORE_TXN_RETRY_MAX = deterministicRandom()->random01() * 100 + 1; }
init( FASTRESTORE_TXN_EXTRA_DELAY, 0.1 ); if( randomize && BUGGIFY ) { FASTRESTORE_TXN_EXTRA_DELAY = deterministicRandom()->random01() * 1 + 0.001;}
init( FASTRESTORE_TXN_EXTRA_DELAY, 0.0 ); if( randomize && BUGGIFY ) { FASTRESTORE_TXN_EXTRA_DELAY = deterministicRandom()->random01() * 1 + 0.001;}
init( FASTRESTORE_NOT_WRITE_DB, false ); // Perf test only: set it to true will cause simulation failure
init( FASTRESTORE_USE_RANGE_FILE, true ); // Perf test only: set it to false will cause simulation failure
init( FASTRESTORE_USE_LOG_FILE, true ); // Perf test only: set it to false will cause simulation failure
@ -661,7 +661,8 @@ void ServerKnobs::initialize(bool randomize, ClientKnobs* clientKnobs, bool isSi
init( FASTRESTORE_SCHED_SEND_FUTURE_VB_REQS_BATCH, 2 ); if( randomize && BUGGIFY ) { FASTRESTORE_SCHED_SEND_FUTURE_VB_REQS_BATCH = deterministicRandom()->random01() < 0.2 ? 1 : deterministicRandom()->random01() * 15 + 1;}
init( FASTRESTORE_NUM_TRACE_EVENTS, 100 ); if( randomize && BUGGIFY ) { FASTRESTORE_NUM_TRACE_EVENTS = deterministicRandom()->random01() < 0.2 ? 1 : deterministicRandom()->random01() * 500 + 1;}
init( FASTRESTORE_EXPENSIVE_VALIDATION, false ); if( randomize && BUGGIFY ) { FASTRESTORE_EXPENSIVE_VALIDATION = deterministicRandom()->random01() < 0.5 ? true : false;}
init( FASTRESTORE_WRITE_BW_MB, 70 ); if( randomize && BUGGIFY ) { FASTRESTORE_WRITE_BW_MB = deterministicRandom()->random01() < 0.5 ? 2 : 100;}
init( FASTRESTORE_RATE_UPDATE_SECONDS, 1.0 ); if( randomize && BUGGIFY ) { FASTRESTORE_RATE_UPDATE_SECONDS = deterministicRandom()->random01() < 0.5 ? 0.1 : 2;}
init( REDWOOD_DEFAULT_PAGE_SIZE, 4096 );
init( REDWOOD_KVSTORE_CONCURRENT_READS, 64 );

View File

@ -45,7 +45,6 @@ public:
double RECOVERY_TLOG_SMART_QUORUM_DELAY; // smaller might be better for bug amplification
double TLOG_STORAGE_MIN_UPDATE_INTERVAL;
double BUGGIFY_TLOG_STORAGE_MIN_UPDATE_INTERVAL;
double UNFLUSHED_DATA_RATIO;
int DESIRED_TOTAL_BYTES;
int DESIRED_UPDATE_BYTES;
double UPDATE_DELAY;
@ -482,6 +481,7 @@ public:
double STORAGE_DURABILITY_LAG_MIN_RATE;
int STORAGE_COMMIT_BYTES;
double STORAGE_COMMIT_INTERVAL;
int STORAGE_COMMIT_PIPELINE_BYTES_PER_YIELD;
double UPDATE_SHARD_VERSION_INTERVAL;
int BYTE_SAMPLING_FACTOR;
int BYTE_SAMPLING_OVERHEAD;
@ -547,6 +547,7 @@ public:
int64_t TIME_KEEPER_MAX_ENTRIES;
// Fast Restore
// TODO: After 6.3, review FR knobs, remove unneeded ones and change default value
int64_t FASTRESTORE_FAILURE_TIMEOUT;
int64_t FASTRESTORE_HEARTBEAT_INTERVAL;
double FASTRESTORE_SAMPLING_PERCENT;
@ -594,6 +595,8 @@ public:
int FASTRESTORE_SCHED_SEND_FUTURE_VB_REQS_BATCH; // number of future VB sendLoadingParam requests to process at once
int FASTRESTORE_NUM_TRACE_EVENTS;
bool FASTRESTORE_EXPENSIVE_VALIDATION; // when set true, performance will be heavily affected
double FASTRESTORE_WRITE_BW_MB; // target aggregated write bandwidth from all appliers
double FASTRESTORE_RATE_UPDATE_SECONDS; // how long to update appliers target write rate
int REDWOOD_DEFAULT_PAGE_SIZE; // Page size for new Redwood files
int REDWOOD_KVSTORE_CONCURRENT_READS; // Max number of simultaneous point or range reads in progress.

View File

@ -40,6 +40,7 @@ ACTOR static Future<Void> handleSendMutationVectorRequest(RestoreSendVersionedMu
Reference<RestoreApplierData> self);
ACTOR static Future<Void> handleApplyToDBRequest(RestoreVersionBatchRequest req, Reference<RestoreApplierData> self,
Database cx);
void handleUpdateRateRequest(RestoreUpdateRateRequest req, Reference<RestoreApplierData> self);
ACTOR Future<Void> restoreApplierCore(RestoreApplierInterface applierInterf, int nodeIndex, Database cx) {
state Reference<RestoreApplierData> self =
@ -71,6 +72,10 @@ ACTOR Future<Void> restoreApplierCore(RestoreApplierInterface applierInterf, int
req, self, cx)); // TODO: Check how FDB uses TaskPriority for ACTORS. We may need to add
// priority here to avoid requests at later VB block requests at earlier VBs
}
when(RestoreUpdateRateRequest req = waitNext(applierInterf.updateRate.getFuture())) {
requestTypeStr = "updateRate";
handleUpdateRateRequest(req, self);
}
when(RestoreVersionBatchRequest req = waitNext(applierInterf.initVersionBatch.getFuture())) {
requestTypeStr = "initVersionBatch";
actors.add(handleInitVersionBatchRequest(req, self));
@ -218,6 +223,7 @@ ACTOR static Future<Void> applyClearRangeMutations(Standalone<VectorRef<KeyRange
loop {
try {
// TODO: Consider clearrange traffic in write traffic control
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
tr->setOption(FDBTransactionOptions::LOCK_AWARE);
for (auto& range : ranges) {
@ -463,31 +469,55 @@ ACTOR static Future<Void> precomputeMutationsResult(Reference<ApplierBatchData>
return Void();
}
bool okToReleaseTxns(double targetMB, double applyingDataBytes) {
return applyingDataBytes < targetMB * 1024 * 1024;
}
ACTOR static Future<Void> shouldReleaseTransaction(double* targetMB, double* applyingDataBytes,
AsyncTrigger* releaseTxns) {
loop {
if (okToReleaseTxns(*targetMB, *applyingDataBytes)) {
break;
} else {
wait(releaseTxns->onTrigger());
wait(delay(0.0)); // Avoid all waiting txns are triggered at the same time and all decide to proceed before
// applyingDataBytes has a chance to update
}
}
return Void();
}
// Apply mutations in batchData->stagingKeys [begin, end).
ACTOR static Future<Void> applyStagingKeysBatch(std::map<Key, StagingKey>::iterator begin,
std::map<Key, StagingKey>::iterator end, Database cx,
FlowLock* applyStagingKeysBatchLock, UID applierID,
ApplierBatchData::Counters* cc) {
std::map<Key, StagingKey>::iterator end, Database cx, UID applierID,
ApplierBatchData::Counters* cc, double* appliedBytes,
double* applyingDataBytes, double* targetMB,
AsyncTrigger* releaseTxnTrigger) {
if (SERVER_KNOBS->FASTRESTORE_NOT_WRITE_DB) {
TraceEvent("FastRestoreApplierPhaseApplyStagingKeysBatchSkipped", applierID).detail("Begin", begin->first);
ASSERT(!g_network->isSimulated());
return Void();
}
wait(applyStagingKeysBatchLock->take(TaskPriority::RestoreApplierWriteDB)); // Q: Do we really need the lock?
state FlowLock::Releaser releaser(*applyStagingKeysBatchLock);
wait(shouldReleaseTransaction(targetMB, applyingDataBytes, releaseTxnTrigger));
state Reference<ReadYourWritesTransaction> tr(new ReadYourWritesTransaction(cx));
state int sets = 0;
state int clears = 0;
state Key endKey = begin->first;
state double txnSize = 0;
state double txnSizeUsed = 0; // txn size accounted in applyingDataBytes
TraceEvent(SevFRDebugInfo, "FastRestoreApplierPhaseApplyStagingKeysBatch", applierID).detail("Begin", begin->first);
loop {
try {
txnSize = 0;
txnSizeUsed = 0;
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
tr->setOption(FDBTransactionOptions::LOCK_AWARE);
std::map<Key, StagingKey>::iterator iter = begin;
while (iter != end) {
if (iter->second.type == MutationRef::SetValue) {
tr->set(iter->second.key, iter->second.val);
txnSize += iter->second.totalSize();
cc->appliedMutations += 1;
TraceEvent(SevFRMutationInfo, "FastRestoreApplierPhaseApplyStagingKeysBatch", applierID)
.detail("SetKey", iter->second.key);
@ -501,6 +531,7 @@ ACTOR static Future<Void> applyStagingKeysBatch(std::map<Key, StagingKey>::itera
.detail("SubVersion", iter->second.version.sub);
}
tr->clear(singleKeyRange(iter->second.key));
txnSize += iter->second.totalSize();
cc->appliedMutations += 1;
TraceEvent(SevFRMutationInfo, "FastRestoreApplierPhaseApplyStagingKeysBatch", applierID)
.detail("ClearKey", iter->second.key);
@ -523,12 +554,21 @@ ACTOR static Future<Void> applyStagingKeysBatch(std::map<Key, StagingKey>::itera
.detail("Sets", sets)
.detail("Clears", clears);
tr->addWriteConflictRange(KeyRangeRef(begin->first, keyAfter(endKey))); // Reduce resolver load
txnSizeUsed = txnSize;
*applyingDataBytes += txnSizeUsed; // Must account for applying bytes before wait for write traffic control
wait(tr->commit());
cc->appliedTxns += 1;
cc->appliedBytes += txnSize;
*appliedBytes += txnSize;
*applyingDataBytes -= txnSizeUsed;
if (okToReleaseTxns(*targetMB, *applyingDataBytes)) {
releaseTxnTrigger->trigger();
}
break;
} catch (Error& e) {
cc->appliedTxnRetries += 1;
wait(tr->onError(e));
*applyingDataBytes -= txnSizeUsed;
}
}
return Void();
@ -545,13 +585,14 @@ ACTOR static Future<Void> applyStagingKeys(Reference<ApplierBatchData> batchData
TraceEvent("FastRestoreApplerPhaseApplyStagingKeysStart", applierID)
.detail("BatchIndex", batchIndex)
.detail("StagingKeys", batchData->stagingKeys.size());
batchData->totalBytesToWrite = 0;
while (cur != batchData->stagingKeys.end()) {
txnSize += cur->second.expectedMutationSize();
txnSize += cur->second.totalSize(); // should be consistent with receivedBytes accounting method
if (txnSize > SERVER_KNOBS->FASTRESTORE_TXN_BATCH_MAX_BYTES) {
fBatches.push_back(applyStagingKeysBatch(begin, cur, cx, &batchData->applyStagingKeysBatchLock, applierID,
&batchData->counters));
batchData->counters.appliedBytes += txnSize;
batchData->appliedBytes += txnSize;
fBatches.push_back(applyStagingKeysBatch(begin, cur, cx, applierID, &batchData->counters,
&batchData->appliedBytes, &batchData->applyingDataBytes,
&batchData->targetWriteRateMB, &batchData->releaseTxnTrigger));
batchData->totalBytesToWrite += txnSize;
begin = cur;
txnSize = 0;
txnBatches++;
@ -559,10 +600,10 @@ ACTOR static Future<Void> applyStagingKeys(Reference<ApplierBatchData> batchData
cur++;
}
if (begin != batchData->stagingKeys.end()) {
fBatches.push_back(applyStagingKeysBatch(begin, cur, cx, &batchData->applyStagingKeysBatchLock, applierID,
&batchData->counters));
batchData->counters.appliedBytes += txnSize;
batchData->appliedBytes += txnSize;
fBatches.push_back(applyStagingKeysBatch(begin, cur, cx, applierID, &batchData->counters,
&batchData->appliedBytes, &batchData->applyingDataBytes,
&batchData->targetWriteRateMB, &batchData->releaseTxnTrigger));
batchData->totalBytesToWrite += txnSize;
txnBatches++;
}
@ -571,18 +612,19 @@ ACTOR static Future<Void> applyStagingKeys(Reference<ApplierBatchData> batchData
TraceEvent("FastRestoreApplerPhaseApplyStagingKeysDone", applierID)
.detail("BatchIndex", batchIndex)
.detail("StagingKeys", batchData->stagingKeys.size())
.detail("TransactionBatches", txnBatches);
.detail("TransactionBatches", txnBatches)
.detail("TotalBytesToWrite", batchData->totalBytesToWrite);
return Void();
}
// Write mutations to the destination DB
ACTOR Future<Void> writeMutationsToDB(UID applierID, int64_t batchIndex, Reference<ApplierBatchData> batchData,
Database cx) {
TraceEvent("FastRestoreApplerPhaseApplyTxnStart", applierID).detail("BatchIndex", batchIndex);
TraceEvent("FastRestoreApplierPhaseApplyTxnStart", applierID).detail("BatchIndex", batchIndex);
wait(precomputeMutationsResult(batchData, applierID, batchIndex, cx));
wait(applyStagingKeys(batchData, applierID, batchIndex, cx));
TraceEvent("FastRestoreApplerPhaseApplyTxnDone", applierID)
TraceEvent("FastRestoreApplierPhaseApplyTxnDone", applierID)
.detail("BatchIndex", batchIndex)
.detail("AppliedBytes", batchData->appliedBytes)
.detail("ReceivedBytes", batchData->receivedBytes);
@ -590,6 +632,55 @@ ACTOR Future<Void> writeMutationsToDB(UID applierID, int64_t batchIndex, Referen
return Void();
}
void handleUpdateRateRequest(RestoreUpdateRateRequest req, Reference<RestoreApplierData> self) {
TraceEvent ev("FastRestoreApplierUpdateRateRequest", self->id());
ev.suppressFor(10)
.detail("BatchIndex", req.batchIndex)
.detail("FinishedBatch", self->finishedBatch.get())
.detail("WriteMB", req.writeMB);
double remainingDataMB = 0;
if (self->finishedBatch.get() == req.batchIndex - 1) { // current applying batch
Reference<ApplierBatchData> batchData = self->batch[req.batchIndex];
ASSERT(batchData.isValid());
batchData->targetWriteRateMB = req.writeMB;
remainingDataMB = batchData->totalBytesToWrite > 0
? std::max(0.0, batchData->totalBytesToWrite - batchData->appliedBytes) / 1024 / 1024
: batchData->receivedBytes / 1024 / 1024;
ev.detail("TotalBytesToWrite", batchData->totalBytesToWrite)
.detail("AppliedBytes", batchData->appliedBytes)
.detail("ReceivedBytes", batchData->receivedBytes)
.detail("TargetWriteRateMB", batchData->targetWriteRateMB)
.detail("RemainingDataMB", remainingDataMB);
}
req.reply.send(RestoreUpdateRateReply(self->id(), remainingDataMB));
return;
}
ACTOR static Future<Void> traceRate(const char* context, Reference<ApplierBatchData> batchData, int batchIndex,
UID nodeID, NotifiedVersion* finishedVB, bool once = false) {
loop {
if ((finishedVB->get() != batchIndex - 1) || !batchData.isValid()) {
break;
}
TraceEvent(context, nodeID)
.suppressFor(10)
.detail("BatchIndex", batchIndex)
.detail("FinishedBatchIndex", finishedVB->get())
.detail("TotalDataToWriteMB", batchData->totalBytesToWrite / 1024 / 1024)
.detail("AppliedBytesMB", batchData->appliedBytes / 1024 / 1024)
.detail("TargetBytesMB", batchData->targetWriteRateMB)
.detail("InflightBytesMB", batchData->applyingDataBytes)
.detail("ReceivedBytes", batchData->receivedBytes);
if (once) {
break;
}
wait(delay(5.0));
}
return Void();
}
ACTOR static Future<Void> handleApplyToDBRequest(RestoreVersionBatchRequest req, Reference<RestoreApplierData> self,
Database cx) {
TraceEvent("FastRestoreApplierPhaseHandleApplyToDBStart", self->id())
@ -601,9 +692,9 @@ ACTOR static Future<Void> handleApplyToDBRequest(RestoreVersionBatchRequest req,
wait(self->finishedBatch.whenAtLeast(req.batchIndex - 1));
state bool isDuplicated = true;
if (self->finishedBatch.get() ==
req.batchIndex - 1) { // duplicate request from earlier version batch will be ignored
Reference<ApplierBatchData> batchData = self->batch[req.batchIndex];
if (self->finishedBatch.get() == req.batchIndex - 1) {
// duplicate request from earlier version batch will be ignored
state Reference<ApplierBatchData> batchData = self->batch[req.batchIndex];
ASSERT(batchData.isValid());
TraceEvent("FastRestoreApplierPhaseHandleApplyToDBRunning", self->id())
.detail("BatchIndex", req.batchIndex)
@ -618,6 +709,8 @@ ACTOR static Future<Void> handleApplyToDBRequest(RestoreVersionBatchRequest req,
batchData->dbApplier = Never();
batchData->dbApplier = writeMutationsToDB(self->id(), req.batchIndex, batchData, cx);
batchData->vbState = ApplierVersionBatchState::WRITE_TO_DB;
batchData->rateTracer = traceRate("FastRestoreApplierTransactionRateControl", batchData, req.batchIndex,
self->id(), &self->finishedBatch);
}
ASSERT(batchData->dbApplier.present());
@ -626,9 +719,12 @@ ACTOR static Future<Void> handleApplyToDBRequest(RestoreVersionBatchRequest req,
wait(batchData->dbApplier.get());
// Multiple actor invokation can wait on req.batchIndex-1;
// Multiple actors can wait on req.batchIndex-1;
// Avoid setting finishedBatch when finishedBatch > req.batchIndex
if (self->finishedBatch.get() == req.batchIndex - 1) {
batchData->rateTracer =
traceRate("FastRestoreApplierTransactionRateControlDone", batchData, req.batchIndex, self->id(),
&self->finishedBatch, true /*print once*/); // Track the last rate info
self->finishedBatch.set(req.batchIndex);
// self->batch[req.batchIndex]->vbState = ApplierVersionBatchState::DONE;
// Free memory for the version batch

View File

@ -199,7 +199,7 @@ struct StagingKey {
return pendingMutations.empty() || version >= pendingMutations.rbegin()->first;
}
int expectedMutationSize() { return key.size() + val.size(); }
int totalSize() { return MutationRef::OVERHEAD_BYTES + key.size() + val.size(); }
};
// The range mutation received on applier.
@ -244,7 +244,6 @@ struct ApplierBatchData : public ReferenceCounted<ApplierBatchData> {
VersionedMutationsMap kvOps; // Mutations at each version
std::map<Key, StagingKey> stagingKeys;
std::set<StagingKeyRange> stagingKeyRanges;
FlowLock applyStagingKeysBatchLock;
Future<Void> pollMetrics;
@ -253,8 +252,13 @@ struct ApplierBatchData : public ReferenceCounted<ApplierBatchData> {
long receiveMutationReqs;
// Stats
long receivedBytes;
long appliedBytes;
double receivedBytes; // received mutation size
double appliedBytes; // after coalesce, how many bytes to write to DB
double targetWriteRateMB; // target amount of data outstanding for DB;
double totalBytesToWrite; // total amount of data in bytes to write
double applyingDataBytes; // amount of data in flight of committing
AsyncTrigger releaseTxnTrigger; // trigger to release more txns
Future<Void> rateTracer; // trace transaction rate control info
// Status counters
struct Counters {
@ -280,14 +284,18 @@ struct ApplierBatchData : public ReferenceCounted<ApplierBatchData> {
void delref() { return ReferenceCounted<ApplierBatchData>::delref(); }
explicit ApplierBatchData(UID nodeID, int batchIndex)
: counters(this, nodeID, batchIndex), applyStagingKeysBatchLock(SERVER_KNOBS->FASTRESTORE_APPLYING_PARALLELISM),
vbState(ApplierVersionBatchState::NOT_INIT), receiveMutationReqs(0), receivedBytes(0), appliedBytes(0) {
: counters(this, nodeID, batchIndex),
targetWriteRateMB(SERVER_KNOBS->FASTRESTORE_WRITE_BW_MB / SERVER_KNOBS->FASTRESTORE_NUM_APPLIERS),
totalBytesToWrite(-1), applyingDataBytes(0), vbState(ApplierVersionBatchState::NOT_INIT),
receiveMutationReqs(0), receivedBytes(0), appliedBytes(0) {
pollMetrics = traceCounters(format("FastRestoreApplierMetrics%d", batchIndex), nodeID,
SERVER_KNOBS->FASTRESTORE_ROLE_LOGGING_DELAY, &counters.cc,
nodeID.toString() + "/RestoreApplierMetrics/" + std::to_string(batchIndex));
TraceEvent("FastRestoreApplierMetricsCreated").detail("Node", nodeID);
}
~ApplierBatchData() = default;
~ApplierBatchData() {
rateTracer = Void(); // cancel actor
}
void addMutation(MutationRef m, LogMessageVersion ver) {
if (!isRangeMutation(m)) {

View File

@ -105,24 +105,23 @@ ACTOR Future<Void> sampleBackups(Reference<RestoreControllerData> self, RestoreC
}
ACTOR Future<Void> startRestoreController(Reference<RestoreWorkerData> controllerWorker, Database cx) {
state ActorCollection actors(false);
ASSERT(controllerWorker.isValid());
ASSERT(controllerWorker->controllerInterf.present());
state Reference<RestoreControllerData> self =
Reference<RestoreControllerData>(new RestoreControllerData(controllerWorker->controllerInterf.get().id()));
state Future<Void> error = actorCollection(self->addActor.getFuture());
try {
// recruitRestoreRoles must come after controllerWorker has finished collectWorkerInterface
wait(recruitRestoreRoles(controllerWorker, self));
actors.add(updateHeartbeatTime(self));
actors.add(checkRolesLiveness(self));
actors.add(updateProcessMetrics(self));
actors.add(traceProcessMetrics(self, "RestoreController"));
actors.add(sampleBackups(self, controllerWorker->controllerInterf.get()));
// self->addActor.send(updateHeartbeatTime(self));
self->addActor.send(checkRolesLiveness(self));
self->addActor.send(updateProcessMetrics(self));
self->addActor.send(traceProcessMetrics(self, "RestoreController"));
self->addActor.send(sampleBackups(self, controllerWorker->controllerInterf.get()));
wait(startProcessRestoreRequests(self, cx));
wait(startProcessRestoreRequests(self, cx) || error);
} catch (Error& e) {
if (e.code() != error_code_operation_cancelled) {
TraceEvent(SevError, "FastRestoreControllerStart").detail("Reason", "Unexpected unhandled error").error(e);
@ -304,7 +303,6 @@ ACTOR static Future<Version> processRestoreRequest(Reference<RestoreControllerDa
state std::vector<RestoreFileFR> logFiles;
state std::vector<RestoreFileFR> allFiles;
state Version minRangeVersion = MAX_VERSION;
state Future<Void> error = actorCollection(self->addActor.getFuture());
self->initBackupContainer(request.url);
@ -383,7 +381,7 @@ ACTOR static Future<Version> processRestoreRequest(Reference<RestoreControllerDa
}
try {
wait(waitForAll(fBatches) || error);
wait(waitForAll(fBatches));
} catch (Error& e) {
TraceEvent(SevError, "FastRestoreControllerDispatchVersionBatchesUnexpectedError").error(e);
}
@ -748,7 +746,9 @@ ACTOR static Future<Version> collectBackupFiles(Reference<IBackupContainer> bc,
std::cout << "Restore to version: " << request.targetVersion << "\nBackupDesc: \n" << desc.toString() << "\n\n";
}
Optional<RestorableFileSet> restorable = wait(bc->getRestoreSet(request.targetVersion));
state VectorRef<KeyRangeRef> restoreRanges;
restoreRanges.add(request.range);
Optional<RestorableFileSet> restorable = wait(bc->getRestoreSet(request.targetVersion, restoreRanges));
if (!restorable.present()) {
TraceEvent(SevWarn, "FastRestoreControllerPhaseCollectBackupFiles")
@ -908,6 +908,49 @@ ACTOR static Future<Void> initializeVersionBatch(std::map<UID, RestoreApplierInt
return Void();
}
// Calculate the amount of data each applier should keep outstanding to DB;
// This is the amount of data that are in in-progress transactions.
ACTOR static Future<Void> updateApplierWriteBW(Reference<ControllerBatchData> batchData,
std::map<UID, RestoreApplierInterface> appliersInterf, int batchIndex) {
state std::unordered_map<UID, double> applierRemainMB;
state double totalRemainMB = SERVER_KNOBS->FASTRESTORE_WRITE_BW_MB;
state double standardAvgBW = SERVER_KNOBS->FASTRESTORE_WRITE_BW_MB / SERVER_KNOBS->FASTRESTORE_NUM_APPLIERS;
state int loopCount = 0;
state std::vector<RestoreUpdateRateReply> replies;
state std::vector<std::pair<UID, RestoreUpdateRateRequest>> requests;
for (auto& applier : appliersInterf) {
applierRemainMB[applier.first] = SERVER_KNOBS->FASTRESTORE_WRITE_BW_MB / SERVER_KNOBS->FASTRESTORE_NUM_APPLIERS;
}
loop {
requests.clear();
for (auto& applier : appliersInterf) {
double writeRate = totalRemainMB > 1 ? (applierRemainMB[applier.first] / totalRemainMB) *
SERVER_KNOBS->FASTRESTORE_WRITE_BW_MB
: standardAvgBW;
requests.emplace_back(applier.first, RestoreUpdateRateRequest(batchIndex, writeRate));
}
replies.clear();
wait(getBatchReplies(
&RestoreApplierInterface::updateRate, appliersInterf, requests, &replies,
TaskPriority::DefaultEndpoint)); // DefaultEndpoint has higher priority than fast restore endpoints
ASSERT(replies.size() == requests.size());
totalRemainMB = 0;
for (int i = 0; i < replies.size(); i++) {
UID& applierID = requests[i].first;
applierRemainMB[applierID] = replies[i].remainMB;
totalRemainMB += replies[i].remainMB;
}
ASSERT(totalRemainMB >= 0);
double delayTime = SERVER_KNOBS->FASTRESTORE_RATE_UPDATE_SECONDS;
if (loopCount == 0) { // First loop: Need to update writeRate quicker
delayTime = 0.2;
}
loopCount++;
wait(delay(delayTime));
}
}
// Ask each applier to apply its received mutations to DB
// NOTE: Controller cannot start applying mutations at batchIndex until all appliers have applied for (batchIndex - 1)
// because appliers at different batchIndex may have overlapped key ranges.
@ -921,6 +964,8 @@ ACTOR static Future<Void> notifyApplierToApplyMutations(Reference<ControllerBatc
wait(finishedBatch->whenAtLeast(batchIndex - 1));
state Future<Void> updateRate;
if (finishedBatch->get() == batchIndex - 1) {
// Prepare the applyToDB requests
std::vector<std::pair<UID, RestoreVersionBatchRequest>> requests;
@ -940,6 +985,7 @@ ACTOR static Future<Void> notifyApplierToApplyMutations(Reference<ControllerBatc
batchData->applyToDB = Never();
batchData->applyToDB = getBatchReplies(&RestoreApplierInterface::applyToDB, appliersInterf, requests,
&replies, TaskPriority::RestoreApplierWriteDB);
updateRate = updateApplierWriteBW(batchData, appliersInterf, batchIndex);
} else {
TraceEvent(SevError, "FastRestoreControllerPhaseApplyToDB")
.detail("BatchIndex", batchIndex)
@ -1051,6 +1097,7 @@ ACTOR static Future<Void> signalRestoreCompleted(Reference<RestoreControllerData
}
// Update the most recent time when controller receives hearbeat from each loader and applier
// TODO: Replace the heartbeat mechanism with FDB failure monitoring mechanism
ACTOR static Future<Void> updateHeartbeatTime(Reference<RestoreControllerData> self) {
wait(self->recruitedRoles.getFuture());
@ -1086,10 +1133,18 @@ ACTOR static Future<Void> updateHeartbeatTime(Reference<RestoreControllerData> s
}
fTimeout = delay(SERVER_KNOBS->FASTRESTORE_HEARTBEAT_DELAY);
wait(waitForAll(fReplies) || fTimeout);
// Here we have to handle error, otherwise controller worker will fail and exit.
try {
wait(waitForAll(fReplies) || fTimeout);
} catch (Error& e) {
// This should be an ignorable error.
TraceEvent(g_network->isSimulated() ? SevWarnAlways : SevError, "FastRestoreUpdateHeartbeatError").error(e);
}
// Update the most recent heart beat time for each role
for (int i = 0; i < fReplies.size(); ++i) {
if (fReplies[i].isReady()) {
if (!fReplies[i].isError() && fReplies[i].isReady()) {
double currentTime = now();
auto item = self->rolesHeartBeatTime.emplace(nodes[i], currentTime);
item.first->second = currentTime;

View File

@ -177,7 +177,8 @@ struct RestoreControllerData : RestoreRoleData, public ReferenceCounted<RestoreC
versionBatches.clear();
batch.clear();
batchStatus.clear();
finishedBatch = NotifiedVersion();
finishedBatch = NotifiedVersion(0);
versionBatchId = NotifiedVersion(0);
ASSERT(runningVersionBatches.get() == 0);
}

View File

@ -715,7 +715,7 @@ void SimulationConfig::set_config(std::string config) {
// The only mechanism we have for turning "single" into what single means
// is buildConfiguration()... :/
std::map<std::string, std::string> hack_map;
ASSERT( buildConfiguration(config, hack_map) );
ASSERT(buildConfiguration(config, hack_map) != ConfigurationResult::NO_OPTIONS_PROVIDED);
for(auto kv : hack_map) db.set( kv.first, kv.second );
}

View File

@ -1700,25 +1700,28 @@ static int getExtraTLogEligibleZones(const vector<WorkerDetails>& workers, const
if(configuration.regions.size() == 0) {
return allZones.size() - std::max(configuration.tLogReplicationFactor, configuration.storageTeamSize);
}
int extraTlogEligibleZones = configuration.usableRegions == 1 ? 0 : std::numeric_limits<int>::max();
int extraTlogEligibleZones = 0;
int regionsWithNonNegativePriority = 0;
for(auto& region : configuration.regions) {
int eligible = dcId_zone[region.dcId].size() - std::max(configuration.remoteTLogReplicationFactor, std::max(configuration.tLogReplicationFactor, configuration.storageTeamSize) );
//FIXME: does not take into account fallback satellite policies
if(region.satelliteTLogReplicationFactor > 0 && configuration.usableRegions > 1) {
int totalSatelliteEligible = 0;
for(auto& sat : region.satellites) {
totalSatelliteEligible += dcId_zone[sat.dcId].size();
if( region.priority >= 0 ) {
int eligible = dcId_zone[region.dcId].size() - std::max(configuration.remoteTLogReplicationFactor, std::max(configuration.tLogReplicationFactor, configuration.storageTeamSize) );
//FIXME: does not take into account fallback satellite policies
if(region.satelliteTLogReplicationFactor > 0 && configuration.usableRegions > 1) {
int totalSatelliteEligible = 0;
for(auto& sat : region.satellites) {
totalSatelliteEligible += dcId_zone[sat.dcId].size();
}
eligible = std::min<int>( eligible, totalSatelliteEligible - region.satelliteTLogReplicationFactor );
}
eligible = std::min<int>( eligible, totalSatelliteEligible - region.satelliteTLogReplicationFactor );
}
if( configuration.usableRegions == 1 ) {
if( region.priority >= 0 ) {
extraTlogEligibleZones = std::max( extraTlogEligibleZones, eligible );
if(eligible >= 0) {
regionsWithNonNegativePriority++;
}
} else {
extraTlogEligibleZones = std::min( extraTlogEligibleZones, eligible );
extraTlogEligibleZones = std::max( extraTlogEligibleZones, eligible );
}
}
if(regionsWithNonNegativePriority > 1) {
extraTlogEligibleZones++;
}
return extraTlogEligibleZones;
}
@ -2020,7 +2023,8 @@ static JsonBuilderObject tlogFetcher(int* logFaultTolerance, const std::vector<T
log_replication_factor, log_write_anti_quorum, log_fault_tolerance, remote_log_replication_factor,
remote_log_fault_tolerance;
int maxFaultTolerance = 0;
int minFaultTolerance = 1000;
int localSetsWithNonNegativeFaultTolerance = 0;
for (int i = 0; i < tLogs.size(); i++) {
int failedLogs = 0;
@ -2037,9 +2041,15 @@ static JsonBuilderObject tlogFetcher(int* logFaultTolerance, const std::vector<T
failedLogs++;
}
}
// The log generation's fault tolerance is the maximum tlog fault tolerance of each region.
maxFaultTolerance =
std::max(maxFaultTolerance, tLogs[i].tLogReplicationFactor - 1 - tLogs[i].tLogWriteAntiQuorum - failedLogs);
if (tLogs[i].isLocal) {
int currentFaultTolerance = tLogs[i].tLogReplicationFactor - 1 - tLogs[i].tLogWriteAntiQuorum - failedLogs;
if(currentFaultTolerance >= 0) {
localSetsWithNonNegativeFaultTolerance++;
}
minFaultTolerance = std::min(minFaultTolerance, currentFaultTolerance);
}
if (tLogs[i].isLocal && tLogs[i].locality == tagLocalitySatellite) {
sat_log_replication_factor = tLogs[i].tLogReplicationFactor;
sat_log_write_anti_quorum = tLogs[i].tLogWriteAntiQuorum;
@ -2053,11 +2063,18 @@ static JsonBuilderObject tlogFetcher(int* logFaultTolerance, const std::vector<T
remote_log_fault_tolerance = tLogs[i].tLogReplicationFactor - 1 - failedLogs;
}
}
*logFaultTolerance = std::min(*logFaultTolerance, maxFaultTolerance);
if(minFaultTolerance == 1000) {
//just in case we do not have any tlog sets
minFaultTolerance = 0;
}
if(localSetsWithNonNegativeFaultTolerance > 1) {
minFaultTolerance++;
}
*logFaultTolerance = std::min(*logFaultTolerance, minFaultTolerance);
statusObj["log_interfaces"] = logsObj;
// We may lose logs in this log generation, storage servers may never be able to catch up this log
// generation.
statusObj["possibly_losing_data"] = maxFaultTolerance < 0;
statusObj["possibly_losing_data"] = minFaultTolerance < 0;
if (sat_log_replication_factor.present())
statusObj["satellite_log_replication_factor"] = sat_log_replication_factor.get();
@ -2102,12 +2119,13 @@ static JsonBuilderArray tlogFetcher(int* logFaultTolerance, Reference<AsyncVar<S
static JsonBuilderObject faultToleranceStatusFetcher(DatabaseConfiguration configuration,
ServerCoordinators coordinators,
std::vector<WorkerDetails>& workers, int extraTlogEligibleZones,
int minReplicasRemaining, int oldLogFaultTolerance,
int minReplicasRemaining, int oldLogFaultTolerance,
int fullyReplicatedRegions,
bool underMaintenance) {
JsonBuilderObject statusObj;
// without losing data
int32_t maxZoneFailures = configuration.maxZoneFailuresTolerated();
int32_t maxZoneFailures = configuration.maxZoneFailuresTolerated(fullyReplicatedRegions, false);
if(underMaintenance) {
maxZoneFailures--;
}
@ -2145,8 +2163,14 @@ static JsonBuilderObject faultToleranceStatusFetcher(DatabaseConfiguration confi
// oldLogFaultTolerance means max failures we can tolerate to lose logs data. -1 means we lose data or availability.
zoneFailuresWithoutLosingData = std::max(std::min(zoneFailuresWithoutLosingData, oldLogFaultTolerance), -1);
statusObj["max_zone_failures_without_losing_data"] = zoneFailuresWithoutLosingData;
int32_t maxAvaiabilityZoneFailures = configuration.maxZoneFailuresTolerated(fullyReplicatedRegions, true);
if(underMaintenance) {
maxAvaiabilityZoneFailures--;
}
statusObj["max_zone_failures_without_losing_availability"] =
std::max(std::min(extraTlogEligibleZones, zoneFailuresWithoutLosingData), -1);
std::max(std::min(maxAvaiabilityZoneFailures,std::min(extraTlogEligibleZones, zoneFailuresWithoutLosingData)), -1);
return statusObj;
}
@ -2323,7 +2347,7 @@ ACTOR Future<JsonBuilderObject> lockedStatusFetcher(Reference<AsyncVar<ServerDBI
return statusObj;
}
ACTOR Future<Optional<Value>> getActivePrimaryDC(Database cx, JsonBuilderArray* messages) {
ACTOR Future<Optional<Value>> getActivePrimaryDC(Database cx, int* fullyReplicatedRegions, JsonBuilderArray* messages) {
state ReadYourWritesTransaction tr(cx);
state Future<Void> readTimeout = delay(5); // so that we won't loop forever
@ -2334,12 +2358,17 @@ ACTOR Future<Optional<Value>> getActivePrimaryDC(Database cx, JsonBuilderArray*
}
tr.setOption(FDBTransactionOptions::READ_SYSTEM_KEYS);
tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
Optional<Value> res = wait(timeoutError(tr.get(primaryDatacenterKey), 5));
if (!res.present()) {
state Future<Standalone<RangeResultRef>> fReplicaKeys = tr.getRange(datacenterReplicasKeys, CLIENT_KNOBS->TOO_MANY);
state Future<Optional<Value>> fPrimaryDatacenterKey = tr.get(primaryDatacenterKey);
wait(timeoutError(success(fPrimaryDatacenterKey) && success(fReplicaKeys), 5));
*fullyReplicatedRegions = fReplicaKeys.get().size();
if (!fPrimaryDatacenterKey.get().present()) {
messages->push_back(
JsonString::makeMessage("primary_dc_missing", "Unable to determine primary datacenter."));
}
return res;
return fPrimaryDatacenterKey.get();
} catch (Error& e) {
if (e.code() == error_code_timed_out) {
messages->push_back(
@ -2533,7 +2562,8 @@ ACTOR Future<StatusReply> clusterGetStatus(
state Future<ErrorOr<vector<std::pair<GrvProxyInterface, EventMap>>>> grvProxyFuture = errorOr(getGrvProxiesAndMetrics(db, address_workers));
state int minReplicasRemaining = -1;
state Future<Optional<Value>> primaryDCFO = getActivePrimaryDC(cx, &messages);
state int fullyReplicatedRegions = -1;
state Future<Optional<Value>> primaryDCFO = getActivePrimaryDC(cx, &fullyReplicatedRegions, &messages);
std::vector<Future<JsonBuilderObject>> futures2;
futures2.push_back(dataStatusFetcher(ddWorker, configuration.get(), &minReplicasRemaining));
futures2.push_back(workloadStatusFetcher(db, workers, mWorker, rkWorker, &qos, &data_overlay, &status_incomplete_reasons, storageServerFuture));
@ -2541,6 +2571,7 @@ ACTOR Future<StatusReply> clusterGetStatus(
futures2.push_back(lockedStatusFetcher(db, &messages, &status_incomplete_reasons));
futures2.push_back(clusterSummaryStatisticsFetcher(pMetrics, storageServerFuture, tLogFuture, &status_incomplete_reasons));
state std::vector<JsonBuilderObject> workerStatuses = wait(getAll(futures2));
wait(success(primaryDCFO));
int logFaultTolerance = 100;
if (db->get().recoveryState >= RecoveryState::ACCEPTING_COMMITS) {
@ -2551,13 +2582,12 @@ ACTOR Future<StatusReply> clusterGetStatus(
int extraTlogEligibleZones = getExtraTLogEligibleZones(workers, configuration.get());
statusObj["fault_tolerance"] = faultToleranceStatusFetcher(
configuration.get(), coordinators, workers, extraTlogEligibleZones, minReplicasRemaining,
logFaultTolerance, loadResult.present() && loadResult.get().healthyZone.present());
logFaultTolerance, fullyReplicatedRegions, loadResult.present() && loadResult.get().healthyZone.present());
}
state JsonBuilderObject configObj =
configurationFetcher(configuration, coordinators, &status_incomplete_reasons);
wait(success(primaryDCFO));
if (primaryDCFO.get().present()) {
statusObj["active_primary_dc"] = primaryDCFO.get().get();
}

View File

@ -5738,6 +5738,8 @@ public:
KeyValueStoreType getType() const override { return KeyValueStoreType::SSD_REDWOOD_V1; }
bool canPipelineCommits() const override { return true; }
StorageBytes getStorageBytes() const override { return m_tree->getStorageBytes(); }
Future<Void> getError() { return delayed(m_error.getFuture()); };

View File

@ -1900,11 +1900,13 @@ int main(int argc, char* argv[]) {
g_network->run();
}
} else if (role == MultiTester) {
setupRunLoopProfiler();
f = stopAfter(runTests(opts.connectionFile, TEST_TYPE_FROM_FILE,
opts.testOnServers ? TEST_ON_SERVERS : TEST_ON_TESTERS, opts.minTesterCount,
opts.testFile, StringRef(), opts.localities));
g_network->run();
} else if (role == Test) {
setupRunLoopProfiler();
auto m = startSystemMonitor(opts.dataFolder, opts.zoneId, opts.zoneId);
f = stopAfter(runTests(opts.connectionFile, TEST_TYPE_FROM_FILE, TEST_HERE, 1, opts.testFile, StringRef(),
opts.localities));

View File

@ -151,10 +151,11 @@ struct ShardInfo : ReferenceCounted<ShardInfo>, NonCopyable {
};
struct StorageServerDisk {
explicit StorageServerDisk( struct StorageServer* data, IKeyValueStore* storage ) : data(data), storage(storage) {}
explicit StorageServerDisk( struct StorageServer* data, IKeyValueStore* storage ) : data(data), storage(storage), _canPipelineCommits(storage->canPipelineCommits()) {}
void makeNewStorageServerDurable();
bool makeVersionMutationsDurable( Version& prevStorageVersion, Version newStorageVersion, int64_t& bytesLeft );
// Asyncronously move data from mutation log into SE's commit buffer for next commit.
Future<bool> asyncPrepareVersionsForCommit(Version desiredOldestVersion, Future<Void> durable, Future<Void>durableMinDelay);
void makeVersionDurable( Version version );
Future<bool> restoreDurableState();
@ -177,12 +178,15 @@ struct StorageServerDisk {
KeyValueStoreType getKeyValueStoreType() const { return storage->getType(); }
StorageBytes getStorageBytes() const { return storage->getStorageBytes(); }
std::tuple<size_t, size_t, size_t> getSize() const { return storage->getSize(); }
bool canPipelineCommits() const {return _canPipelineCommits;}
void set(KeyValueRef kv) { storage->set(kv);}
void clear(KeyRangeRef kr) { storage->clear(kr);}
private:
struct StorageServer* data;
IKeyValueStore* storage;
void writeMutations(const VectorRef<MutationRef>& mutations, Version debugVersion, const char* debugContext);
bool _canPipelineCommits;
ACTOR static Future<Key> readFirstKey( IKeyValueStore* storage, KeyRangeRef range ) {
Standalone<RangeResultRef> r = wait( storage->readRange( range, 1 ) );
@ -1020,6 +1024,11 @@ ACTOR Future<Void> getValueQ( StorageServer* data, GetValueRequest req ) {
return Void();
};
// Pessimistic estimate the number of overhead bytes used by each
// watch. Watch key references are stored in an AsyncMap<Key,bool>, and actors
// must be kept alive until the watch is finished.
static constexpr size_t WATCH_OVERHEAD_BYTES = 1000;
ACTOR Future<Void> watchValue_impl( StorageServer* data, WatchValueRequest req, SpanID parent ) {
state Location spanLocation = "SS:WatchValueImpl"_loc;
state Span span(spanLocation, { parent });
@ -1070,7 +1079,7 @@ ACTOR Future<Void> watchValue_impl( StorageServer* data, WatchValueRequest req,
}
++data->numWatches;
data->watchBytes += ( req.key.expectedSize() + req.value.expectedSize() + 1000 );
data->watchBytes += (req.key.expectedSize() + req.value.expectedSize() + WATCH_OVERHEAD_BYTES);
try {
if(latest < minVersion) {
// If the version we read is less than minVersion, then we may fail to be notified of any changes that occur up to or including minVersion
@ -1083,10 +1092,10 @@ ACTOR Future<Void> watchValue_impl( StorageServer* data, WatchValueRequest req,
}
wait(watchFuture);
--data->numWatches;
data->watchBytes -= ( req.key.expectedSize() + req.value.expectedSize() + 1000 );
data->watchBytes -= (req.key.expectedSize() + req.value.expectedSize() + WATCH_OVERHEAD_BYTES);
} catch( Error &e ) {
--data->numWatches;
data->watchBytes -= ( req.key.expectedSize() + req.value.expectedSize() + 1000 );
data->watchBytes -= (req.key.expectedSize() + req.value.expectedSize() + WATCH_OVERHEAD_BYTES);
throw;
}
} catch( Error &e ) {
@ -3071,75 +3080,67 @@ ACTOR Future<Void> updateStorage(StorageServer* data) {
wait( delay(0, TaskPriority::UpdateStorage) );
state Promise<Void> durableInProgress;
data->durableInProgress = durableInProgress.getFuture();
state Version startOldestVersion = data->storageVersion();
state Version newOldestVersion = data->storageVersion();
state Version desiredVersion = data->desiredOldestVersion.get();
state int64_t bytesLeft = SERVER_KNOBS->STORAGE_COMMIT_BYTES;
state Version desiredOldestVersion = data->desiredOldestVersion.get();
// Write mutations to storage until we reach the desiredVersion or have written too much (bytesleft)
state Future<Void> durableMinDelay = Void();
state Future<Void> durable = Void();
state int64_t ssCommitQuotaBytes;
state Version pendingCommitVersion;
state int64_t bytesWritten = 0;
state bool finalCommit = false;
state bool done = false;
loop {
state bool done = data->storage.makeVersionMutationsDurable(newOldestVersion, desiredVersion, bytesLeft);
// We want to forget things from these data structures atomically with changing oldestVersion (and "before", since oldestVersion.set() may trigger waiting actors)
// forgetVersionsBeforeAsync visibly forgets immediately (without waiting) but asynchronously frees memory.
Future<Void> finishedForgetting = data->mutableData().forgetVersionsBeforeAsync( newOldestVersion, TaskPriority::UpdateStorage );
data->oldestVersion.set( newOldestVersion );
wait( finishedForgetting );
wait( yield(TaskPriority::UpdateStorage) );
// Keep making data from mutation log durable, until no data left whose version is <= desiredOldestVersion
pendingCommitVersion = data->storageVersion();
ssCommitQuotaBytes = SERVER_KNOBS->STORAGE_COMMIT_BYTES;
durableInProgress.reset();
data->durableInProgress = durableInProgress.getFuture();
durable = data->storage.commit(); // Commit data up to(inclusive) version pendingCommitVersion
durableMinDelay = delay(SERVER_KNOBS->STORAGE_COMMIT_INTERVAL, TaskPriority::UpdateStorage);
if (finalCommit) {
wait(durable && durableMinDelay);
done = true;
} else {
// Move data start from pendingCommitVersion+1 to SE's commit buffer.
bool _finalCommit = wait(data->storage.asyncPrepareVersionsForCommit(desiredOldestVersion, durable, durableMinDelay));
finalCommit = _finalCommit;
}
debug_advanceMinCommittedVersion( data->thisServerID, pendingCommitVersion );
if(pendingCommitVersion > data->rebootAfterDurableVersion) {
TraceEvent("RebootWhenDurableTriggered", data->thisServerID).detail("PendingCommitVersion", pendingCommitVersion).detail("RebootAfterDurableVersion", data->rebootAfterDurableVersion);
// To avoid brokenPromise error, which is caused by the sender of the durableInProgress (i.e., this process)
// never sets durableInProgress, we should set durableInProgress before send the please_reboot() error.
// Otherwise, in the race situation when storage server receives both reboot and
// brokenPromise of durableInProgress, the worker of the storage server will die.
// We will eventually end up with no worker for storage server role.
// The data distributor's buildTeam() will get stuck in building a team
durableInProgress.sendError(please_reboot());
throw please_reboot();
}
durableInProgress.send(Void());
wait( delay(0, TaskPriority::UpdateStorage) ); //Setting durableInProgess could cause the storage server to shut down, so delay to check for cancellation
// Taking and releasing the durableVersionLock ensures that no eager reads both begin before the commit was effective and
// are applied after we change the durable version. Also ensure that we have to lock while calling changeDurableVersion,
// because otherwise the latest version of mutableData might be partially loaded.
wait( data->durableVersionLock.take() );
data->popVersion( data->durableVersion.get() + 1 );
// Update durableVersion to pendingCommitVersion, which has been committed.
while (!changeDurableVersion( data, pendingCommitVersion )) {
if(g_network->check_yield(TaskPriority::UpdateStorage)) {
data->durableVersionLock.release();
wait(delay(0, TaskPriority::UpdateStorage));
wait( data->durableVersionLock.take() );
}
}
data->durableVersionLock.release();
if (done) break;
}
// Set the new durable version as part of the outstanding change set, before commit
if (startOldestVersion != newOldestVersion)
data->storage.makeVersionDurable( newOldestVersion );
debug_advanceMaxCommittedVersion( data->thisServerID, newOldestVersion );
state Future<Void> durable = data->storage.commit();
state Future<Void> durableDelay = Void();
if (bytesLeft > 0) {
durableDelay = delay(SERVER_KNOBS->STORAGE_COMMIT_INTERVAL, TaskPriority::UpdateStorage);
}
wait( durable );
debug_advanceMinCommittedVersion( data->thisServerID, newOldestVersion );
if(newOldestVersion > data->rebootAfterDurableVersion) {
TraceEvent("RebootWhenDurableTriggered", data->thisServerID).detail("NewOldestVersion", newOldestVersion).detail("RebootAfterDurableVersion", data->rebootAfterDurableVersion);
// To avoid brokenPromise error, which is caused by the sender of the durableInProgress (i.e., this process)
// never sets durableInProgress, we should set durableInProgress before send the please_reboot() error.
// Otherwise, in the race situation when storage server receives both reboot and
// brokenPromise of durableInProgress, the worker of the storage server will die.
// We will eventually end up with no worker for storage server role.
// The data distributor's buildTeam() will get stuck in building a team
durableInProgress.sendError(please_reboot());
throw please_reboot();
}
durableInProgress.send(Void());
wait( delay(0, TaskPriority::UpdateStorage) ); //Setting durableInProgess could cause the storage server to shut down, so delay to check for cancellation
// Taking and releasing the durableVersionLock ensures that no eager reads both begin before the commit was effective and
// are applied after we change the durable version. Also ensure that we have to lock while calling changeDurableVersion,
// because otherwise the latest version of mutableData might be partially loaded.
wait( data->durableVersionLock.take() );
data->popVersion( data->durableVersion.get() + 1 );
while (!changeDurableVersion( data, newOldestVersion )) {
if(g_network->check_yield(TaskPriority::UpdateStorage)) {
data->durableVersionLock.release();
wait(delay(0, TaskPriority::UpdateStorage));
wait( data->durableVersionLock.take() );
}
}
data->durableVersionLock.release();
//TraceEvent("StorageServerDurable", data->thisServerID).detail("Version", newOldestVersion);
wait( durableDelay );
}
}
@ -3212,36 +3213,97 @@ void StorageServerDisk::writeMutation( MutationRef mutation ) {
ASSERT(false);
}
void StorageServerDisk::writeMutations(const VectorRef<MutationRef>& mutations, Version debugVersion,
const char* debugContext) {
for (const auto& m : mutations) {
ACTOR Future<int64_t> asyncWriteMutationsToCommitBuffer(StorageServer* data, VectorRef<MutationRef> mutations, Version debugVersion, const char* debugContext, int64_t ssCommitQuotaBytes) {
state int bytesWritten = 0;
state int i = 0;
for (;i < mutations.size(); i++) {
const auto& m = mutations[i];
DEBUG_MUTATION(debugContext, debugVersion, m).detail("UID", data->thisServerID);
if (m.type == MutationRef::SetValue) {
storage->set(KeyValueRef(m.param1, m.param2));
data->storage.set(KeyValueRef(m.param1, m.param2));
} else if (m.type == MutationRef::ClearRange) {
storage->clear(KeyRangeRef(m.param1, m.param2));
data->storage.clear(KeyRangeRef(m.param1, m.param2));
}
auto mutationBytes = mvccStorageBytes(m);
bytesWritten += mutationBytes;
ssCommitQuotaBytes -= mutationBytes;
if (data->storage.canPipelineCommits() && bytesWritten >= SERVER_KNOBS->STORAGE_COMMIT_PIPELINE_BYTES_PER_YIELD) {
bytesWritten = 0;
wait(yield());
}
}
return ssCommitQuotaBytes;
}
bool StorageServerDisk::makeVersionMutationsDurable( Version& prevStorageVersion, Version newStorageVersion, int64_t& bytesLeft ) {
if (bytesLeft <= 0) return true;
ACTOR Future<bool> asyncPrepareVersionsForCommit_impl(StorageServerDisk* self, StorageServer* data, Version desiredOldestVersion, Future<Void> durable, Future<Void>durableMinDelay) {
state int64_t ssCommitQuotaBytes = SERVER_KNOBS->STORAGE_COMMIT_BYTES;
state bool finalCommit = false;
state Version startOldestVersion = data->storageVersion();
state Version newOldestVersion = data->storageVersion();
state SignalableActorCollection forgetter;
loop {
// While committing previously written data, keep writting new data from later versions until
// 1.) commit is done, or
// 2.) ssCommitQuotaBytes <= 0, or
// 3.) no data in mutation log to write.
if (!data->storage.canPipelineCommits()) {
// Don't write version data while a commit is going on if the storage engine does not support pipelining
wait(durable && durableMinDelay);
}
state Future<Void> stopEarly = data->storage.canPipelineCommits() ? (durable && durableMinDelay) : Never();
// Apply mutations from the mutationLog
auto u = data->getMutationLog().upper_bound(newOldestVersion);
if (u != data->getMutationLog().end() && u->first <= desiredOldestVersion) {
VerUpdateRef const& v = u->second;
newOldestVersion = v.version;
ASSERT( newOldestVersion > data->storageVersion() && newOldestVersion <= desiredOldestVersion );
// TODO(alexmiller): Update to version tracking.
DEBUG_KEY_RANGE("asyncPrepareVersionsForCommit", newOldestVersion, KeyRangeRef());
int64_t _ssCommitQuotaBytes = wait(asyncWriteMutationsToCommitBuffer(data, v.mutations, newOldestVersion, "asyncPrepareVersionsForCommit", ssCommitQuotaBytes));
ssCommitQuotaBytes = _ssCommitQuotaBytes;
// Apply mutations from the mutationLog
auto u = data->getMutationLog().upper_bound(prevStorageVersion);
if (u != data->getMutationLog().end() && u->first <= newStorageVersion) {
VerUpdateRef const& v = u->second;
ASSERT( v.version > prevStorageVersion && v.version <= newStorageVersion );
// TODO(alexmiller): Update to version tracking.
DEBUG_KEY_RANGE("makeVersionMutationsDurable", v.version, KeyRangeRef());
writeMutations(v.mutations, v.version, "makeVersionDurable");
for (const auto& m : v.mutations) bytesLeft -= mvccStorageBytes(m);
prevStorageVersion = v.version;
return false;
} else {
prevStorageVersion = newStorageVersion;
return true;
// We want to forget things from these data structures atomically with changing oldestVersion (and "before", since oldestVersion.set() may trigger waiting actors)
// forgetVersionsBeforeAsync visibly forgets immediately (without waiting) but asynchronously frees memory.
forgetter.add(data->mutableData().forgetVersionsBeforeAsync( newOldestVersion, TaskPriority::UpdateStorage ));
data->oldestVersion.set( newOldestVersion );
if (ssCommitQuotaBytes <= 0) {
// No quota left. Wait for previous commit to finish.
wait(durable && durableMinDelay);
break;
}
if (stopEarly.isReady()) {
// Previous commit is done.
break;
}
} else {
// Since there is no data in mutation log, in order to make progress,
// advance it to desiredOldestVersion directly
newOldestVersion = desiredOldestVersion;
// We want to forget things from these data structures atomically with changing oldestVersion (and "before", since oldestVersion.set() may trigger waiting actors)
// forgetVersionsBeforeAsync visibly forgets immediately (without waiting) but asynchronously frees memory.
forgetter.add(data->mutableData().forgetVersionsBeforeAsync( newOldestVersion, TaskPriority::UpdateStorage ));
data->oldestVersion.set( newOldestVersion );
// No more data in mutation log can be written.
finalCommit = true;
// Wait the previously written data to be committed
wait(durable && durableMinDelay);
break;
}
}
if (newOldestVersion > startOldestVersion){
// Set the new durable version as part of the outstanding change set, before commit
data->storage.makeVersionDurable( newOldestVersion );
}
debug_advanceMaxCommittedVersion( data->thisServerID, newOldestVersion );
wait(forgetter.signal());
return finalCommit;
}
Future<bool> StorageServerDisk::asyncPrepareVersionsForCommit(Version desiredOldestVersion, Future<Void> durable, Future<Void>durableMinDelay) {
return asyncPrepareVersionsForCommit_impl(this, data, desiredOldestVersion, durable, durableMinDelay);
}
// Update data->storage to persist the changes from (data->storageVersion(),version]
@ -4175,4 +4237,3 @@ void versionedMapTest() {
printf("Memory used: %f MB\n",
(after - before)/ 1e6);
}

View File

@ -101,7 +101,7 @@ struct AtomicOpsWorkload : TestWorkload {
// case 10:
// TEST(true); // Testing atomic CompareAndClear Not supported yet
// opType = MutationRef::CompareAndClear
// break;
// break;
default:
ASSERT(false);
}

View File

@ -476,6 +476,7 @@ struct BackupAndParallelRestoreCorrectnessWorkload : TestWorkload {
.detail("LastBackupContainer", lastBackupContainer->getURL())
.detail("RestoreAfter", self->restoreAfter)
.detail("BackupTag", printable(self->backupTag));
// start restoring
auto container = IBackupContainer::openContainer(lastBackupContainer->getURL());
BackupDescription desc = wait(container->describeBackup());

View File

@ -243,7 +243,7 @@ struct ConfigureDatabaseWorkload : TestWorkload {
return StringRef(format("DestroyDB%d", dbIndex));
}
static Future<ConfigurationResult::Type> IssueConfigurationChange( Database cx, const std::string& config, bool force ) {
static Future<ConfigurationResult> IssueConfigurationChange(Database cx, const std::string& config, bool force) {
printf("Issuing configuration change: %s\n", config.c_str());
return changeConfig(cx, config, force);
}

View File

@ -549,7 +549,7 @@ struct RemoveServersSafelyWorkload : TestWorkload {
while (true) {
cycle ++;
nQuorum = ((g_simulator.desiredCoordinators+1)/2)*2-1;
CoordinatorsResult::Type result = wait( changeQuorum( cx, autoQuorumChange(nQuorum) ) );
CoordinatorsResult result = wait(changeQuorum(cx, autoQuorumChange(nQuorum)));
TraceEvent(result==CoordinatorsResult::SUCCESS || result==CoordinatorsResult::SAME_NETWORK_ADDRESSES ? SevInfo : SevWarn, "RemoveAndKillQuorumChangeResult").detail("Step", "coordinators auto").detail("Result", (int)result).detail("Attempt", cycle).detail("Quorum", nQuorum).detail("DesiredCoordinators", g_simulator.desiredCoordinators);
if (result==CoordinatorsResult::SUCCESS || result==CoordinatorsResult::SAME_NETWORK_ADDRESSES)
break;

View File

@ -104,6 +104,10 @@ struct SpecialKeySpaceCorrectnessWorkload : TestWorkload {
Future<Void> f;
{
ReadYourWritesTransaction ryw{ cx->clone() };
if(!ryw.getDatabase()->apiVersionAtLeast(630)) {
//This test is not valid for API versions smaller than 630
return;
}
f = success(ryw.get(LiteralStringRef("\xff\xff/status/json")));
TEST(!f.isReady());
}

View File

@ -71,7 +71,7 @@ struct TriggerRecoveryLoopWorkload : TestWorkload {
state StringRef configStr(format("resolvers=%d", numResolversToSet));
loop {
Optional<ConfigureAutoResult> conf;
ConfigurationResult::Type r = wait(changeConfig(cx, { configStr }, conf, true));
ConfigurationResult r = wait(changeConfig(cx, { configStr }, conf, true));
if (r == ConfigurationResult::SUCCESS) {
self->currentNumOfResolvers = numResolversToSet;
TraceEvent(SevInfo, "TriggerRecoveryLoop_ChangeResolverConfigSuccess").detail("NumOfResolvers", self->currentNumOfResolvers.get());

View File

@ -85,11 +85,12 @@ Error systemErrorCodeToError();
inline Error actor_cancelled() { return Error( error_code_operation_cancelled ); }
enum { error_code_actor_cancelled = error_code_operation_cancelled };
extern Error internal_error_impl( const char* file, int line );
extern Error internal_error_impl(const char* file, int line);
extern Error internal_error_impl(const char* msg, const char* file, int line);
extern Error internal_error_impl(const char * a_nm, long long a, const char * op_nm, const char * b_nm, long long b, const char * file, int line);
#define inernal_error_msg(msg) internal_error_impl(msg, __FILE__, __LINE__)
#define internal_error() internal_error_impl(__FILE__, __LINE__)
#define internal_error_msg(msg) internal_error_impl(msg, __FILE__, __LINE__)
extern bool isAssertDisabled( int line );
//#define ASSERT( condition ) ((void)0)

View File

@ -204,7 +204,16 @@ public:
}
}
ThreadSingleAssignmentVarBase() : status(Unset), callback(nullptr), valueReferenceCount(0) {} //, referenceCount(1) {}
void blockUntilReadyCheckOnMainThread() {
if (!isReady()) {
if (g_network->isOnMainThread()) {
throw blocked_from_network_thread();
}
BlockCallback cb(*this);
}
}
ThreadSingleAssignmentVarBase() : status(Unset), callback(NULL), valueReferenceCount(0) {} //, referenceCount(1) {}
~ThreadSingleAssignmentVarBase() {
this->mutex.assertNotEntered();
@ -310,12 +319,12 @@ public:
}
virtual void cancel() {
// Cancels the action and decrements the reference count by 1
// The if statement is just an optimization. It's ok if we take the wrong path due to a race
if(isReadyUnsafe())
delref();
else
onMainThreadVoid( [this](){ this->cancelFuture.cancel(); this->delref(); }, nullptr );
onMainThreadVoid(
[this]() {
this->cancelFuture.cancel();
this->delref();
},
nullptr);
}
void releaseMemory() {
@ -329,6 +338,7 @@ private:
int32_t valueReferenceCount;
protected:
// The caller of any of these *Unsafe functions should be holding |mutex|
bool isReadyUnsafe() const { return status >= Set; }
bool isErrorUnsafe() const { return status == ErrorSet; }
bool canBeSetUnsafe() const { return status == Unset; }
@ -426,6 +436,8 @@ public:
sav->blockUntilReady();
}
void blockUntilReadyCheckOnMainThread() { sav->blockUntilReadyCheckOnMainThread(); }
bool isValid() const {
return sav != 0;
}

View File

@ -145,6 +145,7 @@ ERROR( environment_variable_network_option_failed, 2022, "Environment variable n
ERROR( transaction_read_only, 2023, "Attempted to commit a transaction specified as read-only" )
ERROR( invalid_cache_eviction_policy, 2024, "Invalid cache eviction policy, only random and lru are supported" )
ERROR( network_cannot_be_restarted, 2025, "Network can only be started once" )
ERROR( blocked_from_network_thread, 2026, "Detected a deadlock in a callback called from the network thread" )
ERROR( incompatible_protocol_version, 2100, "Incompatible protocol version" )
ERROR( transaction_too_large, 2101, "Transaction exceeds byte limit" )
@ -204,6 +205,8 @@ ERROR( backup_cannot_expire, 2316, "Cannot expire requested data from backup wit
ERROR( backup_auth_missing, 2317, "Cannot find authentication details (such as a password or secret key) for the specified Backup Container URL")
ERROR( backup_auth_unreadable, 2318, "Cannot read or parse one or more sources of authentication information for Backup Container URLs")
ERROR( backup_does_not_exist, 2319, "Backup does not exist")
ERROR( backup_not_filterable_with_key_ranges, 2320, "Backup before 6.3 cannot be filtered with key ranges")
ERROR( backup_not_overlapped_with_keys_filter, 2321, "Backup key ranges doesn't overlap with key ranges filter")
ERROR( restore_invalid_version, 2361, "Invalid restore version")
ERROR( restore_corrupted_data, 2362, "Corrupted backup data")
ERROR( restore_missing_data, 2363, "Missing backup data")

View File

@ -32,7 +32,7 @@
<Wix xmlns='http://schemas.microsoft.com/wix/2006/wi'>
<Product Name='$(var.Title)'
Id='{707FC06F-9954-4A7E-AC9C-A52C99AE776D}'
Id='{0AB36B0F-2187-4ECD-9E7E-983EDD966CEB}'
UpgradeCode='{A95EA002-686E-4164-8356-C715B7F8B1C8}'
Version='$(var.Version)'
Manufacturer='$(var.Manufacturer)'

View File

@ -0,0 +1,121 @@
from pathlib import Path
from argparse import ArgumentParser
import random
import string
import subprocess
import sys
import socket
def get_free_port():
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
s.bind(('0.0.0.0', 0))
return s.getsockname()[1]
class LocalCluster:
configuration_template = """
## foundationdb.conf
##
## Configuration file for FoundationDB server processes
## Full documentation is available at
## https://apple.github.io/foundationdb/configuration.html#the-configuration-file
[fdbmonitor]
[general]
restart_delay = 10
## by default, restart_backoff = restart_delay_reset_interval = restart_delay
# initial_restart_delay = 0
# restart_backoff = 60
# restart_delay_reset_interval = 60
cluster_file = {etcdir}/fdb.cluster
# delete_envvars =
# kill_on_configuration_change = true
## Default parameters for individual fdbserver processes
[fdbserver]
command = {fdbserver_bin}
public_address = auto:$ID
listen_address = public
datadir = {datadir}
logdir = {logdir}
# logsize = 10MiB
# maxlogssize = 100MiB
# machine_id =
# datacenter_id =
# class =
# memory = 8GiB
# storage_memory = 1GiB
# cache_memory = 2GiB
# metrics_cluster =
# metrics_prefix =
## An individual fdbserver process with id 4000
## Parameters set here override defaults from the [fdbserver] section
[fdbserver.{server_port}]
"""
valid_letters_for_secret = string.ascii_letters + string.digits
def __init__(self, basedir: str, fdbserver_binary: str, fdbmonitor_binary: str,
fdbcli_binary: str, create_config=True, port=None, ip_address=None):
self.basedir = Path(basedir)
self.fdbserver_binary = Path(fdbserver_binary)
self.fdbmonitor_binary = Path(fdbmonitor_binary)
self.fdbcli_binary = Path(fdbcli_binary)
for b in (self.fdbserver_binary, self.fdbmonitor_binary, self.fdbcli_binary):
assert b.exists(), "{} does not exist".format(b)
if not self.basedir.exists():
self.basedir.mkdir()
self.etc = self.basedir.joinpath('etc')
self.log = self.basedir.joinpath('log')
self.data = self.basedir.joinpath('data')
self.etc.mkdir(exist_ok=True)
self.log.mkdir(exist_ok=True)
self.data.mkdir(exist_ok=True)
self.port = get_free_port() if port is None else port
self.ip_address = '127.0.0.1' if ip_address is None else ip_address
self.running = False
self.process = None
self.fdbmonitor_logfile = None
if create_config:
with open(self.etc.joinpath('fdb.cluster'), 'x') as f:
random_string = lambda len : ''.join(random.choice(LocalCluster.valid_letters_for_secret) for i in range(len))
f.write('{desc}:{secret}@{ip_addr}:{server_port}'.format(
desc=random_string(8),
secret=random_string(8),
ip_addr=self.ip_address,
server_port=self.port
))
with open(self.etc.joinpath('foundationdb.conf'), 'x') as f:
f.write(LocalCluster.configuration_template.format(
etcdir=self.etc,
fdbserver_bin=self.fdbserver_binary,
datadir=self.data,
logdir=self.log,
server_port=self.port
))
def __enter__(self):
assert not self.running, "Can't start a server that is already running"
args = [str(self.fdbmonitor_binary),
'--conffile',
str(self.etc.joinpath('foundationdb.conf')),
'--lockfile',
str(self.etc.joinpath('fdbmonitor.lock'))]
self.fdbmonitor_logfile = open(self.log.joinpath('fdbmonitor.log'), 'w')
self.process = subprocess.Popen(args, stdout=self.fdbmonitor_logfile, stderr=self.fdbmonitor_logfile)
self.running = True
return self
def __exit__(self, xc_type, exc_value, traceback):
assert self.running, "Server is not running"
if self.process.poll() is None:
self.process.terminate()
self.running = False
def create_database(self, storage='ssd'):
args = [self.fdbcli_binary, '-C', self.etc.joinpath('fdb.cluster'), '--exec',
'configure new single {}'.format(storage)]
subprocess.run(args)

78
tests/TestRunner/tmp_cluster.py Executable file
View File

@ -0,0 +1,78 @@
#!/usr/bin/env python3
import shutil
import subprocess
import sys
import socket
from local_cluster import LocalCluster
from argparse import ArgumentParser, RawDescriptionHelpFormatter
from random import choice
from pathlib import Path
class TempCluster:
def __init__(self, build_dir: str):
self.build_dir = Path(build_dir).resolve()
assert self.build_dir.exists(), "{} does not exist".format(build_dir)
assert self.build_dir.is_dir(), "{} is not a directory".format(build_dir)
tmp_dir = self.build_dir.joinpath(
'tmp',
''.join(choice(LocalCluster.valid_letters_for_secret) for i in range(16)))
tmp_dir.mkdir(parents=True)
self.cluster = LocalCluster(tmp_dir,
self.build_dir.joinpath('bin', 'fdbserver'),
self.build_dir.joinpath('bin', 'fdbmonitor'),
self.build_dir.joinpath('bin', 'fdbcli'))
self.log = self.cluster.log
self.etc = self.cluster.etc
self.data = self.cluster.data
self.tmp_dir = tmp_dir
def __enter__(self):
self.cluster.__enter__()
self.cluster.create_database()
return self
def __exit__(self, xc_type, exc_value, traceback):
self.cluster.__exit__(xc_type, exc_value, traceback)
shutil.rmtree(self.tmp_dir)
if __name__ == '__main__':
parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter,
description="""
This script automatically configures a temporary local cluster on the machine
and then calls a command while this cluster is running. As soon as the command
returns, the configured cluster is killed and all generated data is deleted.
This is useful for testing: if a test needs access to a fresh fdb cluster, one
can simply pass the test command to this script.
The command to run after the cluster started. Before the command is executed,
the following arguments will be preprocessed:
- All occurrences of @CLUSTER_FILE@ will be replaced with the path to the generated cluster file.
- All occurrences of @DATA_DIR@ will be replaced with the path to the data directory.
- All occurrences of @LOG_DIR@ will be replaced with the path to the log directory.
- All occurrences of @ETC_DIR@ will be replaced with the path to the configuration directory.
""")
parser.add_argument('--build-dir', '-b', metavar='BUILD_DIRECTORY', help='FDB build directory', required=True)
parser.add_argument('cmd', metavar="COMMAND", nargs="+", help="The command to run")
args = parser.parse_args()
errcode = 1
with TempCluster(args.build_dir) as cluster:
print("log-dir: {}".format(cluster.log))
print("etc-dir: {}".format(cluster.etc))
print("data-dir: {}".format(cluster.data))
print("cluster-file: {}".format(cluster.etc.joinpath('fdb.cluster')))
cmd_args = []
for cmd in args.cmd:
if cmd == '@CLUSTER_FILE@':
cmd_args.append(str(cluster.etc.joinpath('fdb.cluster')))
elif cmd == '@DATA_DIR@':
cmd_args.append(str(cluster.data))
elif cmd == '@LOG_DIR@':
cmd_args.append(str(cluster.log))
elif cmd == '@ETC_DIR@':
cmd_args.append(str(cluster.etc))
else:
cmd_args.append(cmd)
errcode = subprocess.run(cmd_args, stdout=sys.stdout, stderr=sys.stderr).returncode
sys.exit(errcode)