Merge branch 'master' of https://github.com/apple/foundationdb into jfu-snapshot-record-version

This commit is contained in:
Jon Fu 2020-10-05 13:15:56 -04:00
commit c622a11c70
24 changed files with 445 additions and 376 deletions

View File

@ -153,7 +153,7 @@ void fdb_future_destroy( FDBFuture* f ) {
extern "C" DLLEXPORT
fdb_error_t fdb_future_block_until_ready( FDBFuture* f ) {
CATCH_AND_RETURN( TSAVB(f)->blockUntilReady(); );
CATCH_AND_RETURN(TSAVB(f)->blockUntilReadyCheckOnMainThread(););
}
fdb_bool_t fdb_future_is_error_v22( FDBFuture* f ) {

View File

@ -45,13 +45,13 @@ RUN cd /tmp && curl -L https://github.com/ninja-build/ninja/archive/v1.9.0.zip -
cd .. && rm -rf ninja-1.9.0 ninja.zip
# install openssl
RUN cd /tmp && curl -L https://www.openssl.org/source/openssl-1.1.1d.tar.gz -o openssl.tar.gz &&\
echo "1e3a91bc1f9dfce01af26026f856e064eab4c8ee0a8f457b5ae30b40b8b711f2 openssl.tar.gz" > openssl-sha.txt &&\
RUN cd /tmp && curl -L https://www.openssl.org/source/openssl-1.1.1h.tar.gz -o openssl.tar.gz &&\
echo "5c9ca8774bd7b03e5784f26ae9e9e6d749c9da2438545077e6b3d755a06595d9 openssl.tar.gz" > openssl-sha.txt &&\
sha256sum -c openssl-sha.txt && tar -xzf openssl.tar.gz &&\
cd openssl-1.1.1d && scl enable devtoolset-8 -- ./config CFLAGS="-fPIC -O3" --prefix=/usr/local &&\
cd openssl-1.1.1h && scl enable devtoolset-8 -- ./config CFLAGS="-fPIC -O3" --prefix=/usr/local &&\
scl enable devtoolset-8 -- make -j`nproc` && scl enable devtoolset-8 -- make -j1 install &&\
ln -sv /usr/local/lib64/lib*.so.1.1 /usr/lib64/ &&\
cd /tmp/ && rm -rf /tmp/openssl-1.1.1d /tmp/openssl.tar.gz
cd /tmp/ && rm -rf /tmp/openssl-1.1.1h /tmp/openssl.tar.gz
RUN cd /opt/ && curl -L https://github.com/facebook/rocksdb/archive/v6.10.1.tar.gz -o rocksdb.tar.gz &&\
echo "d573d2f15cdda883714f7e0bc87b814a8d4a53a82edde558f08f940e905541ee rocksdb.tar.gz" > rocksdb-sha.txt &&\
@ -61,8 +61,8 @@ RUN cd /opt/ && curl -L https://github.com/facebook/rocksdb/archive/v6.10.1.tar.
ARG TIMEZONEINFO=America/Los_Angeles
RUN rm -f /etc/localtime && ln -s /usr/share/zoneinfo/${TIMEZONEINFO} /etc/localtime
LABEL version=0.1.15
ENV DOCKER_IMAGEVER=0.1.15
LABEL version=0.1.17
ENV DOCKER_IMAGEVER=0.1.17
ENV JAVA_HOME=/usr/lib/jvm/java-1.8.0
ENV CC=/opt/rh/devtoolset-8/root/usr/bin/gcc
ENV CXX=/opt/rh/devtoolset-8/root/usr/bin/g++

View File

@ -1,4 +1,4 @@
FROM foundationdb/foundationdb-build:0.1.15
FROM foundationdb/foundationdb-build:0.1.17
USER root

View File

@ -2,7 +2,7 @@ version: "3"
services:
common: &common
image: foundationdb/foundationdb-build:0.1.15
image: foundationdb/foundationdb-build:0.1.17
build-setup: &build-setup
<<: *common

View File

@ -7,7 +7,7 @@ SCRIPTID="${$}"
SAVEONERROR="${SAVEONERROR:-1}"
PYTHONDIR="${BINDIR}/tests/python"
testScript="${BINDIR}/tests/bindingtester/run_binding_tester.sh"
VERSION="1.8"
VERSION="1.9"
source ${SCRIPTDIR}/localClusterStart.sh
@ -28,7 +28,7 @@ then
echo "Log dir: ${LOGDIR}"
echo "Python path: ${PYTHONDIR}"
echo "Lib dir: ${LIBDIR}"
echo "Cluster String: ${CLUSTERSTRING}"
echo "Cluster String: ${FDBCLUSTERTEXT}"
echo "Script Id: ${SCRIPTID}"
echo "Version: ${VERSION}"
fi

View File

@ -6,6 +6,7 @@ LOGDIR="${WORKDIR}/log"
ETCDIR="${WORKDIR}/etc"
BINDIR="${BINDIR:-${SCRIPTDIR}}"
FDBPORTSTART="${FDBPORTSTART:-4000}"
FDBPORTTOTAL="${FDBPORTTOTAL:-1000}"
SERVERCHECKS="${SERVERCHECKS:-10}"
CONFIGUREWAIT="${CONFIGUREWAIT:-240}"
FDBCONF="${ETCDIR}/fdb.cluster"
@ -18,382 +19,384 @@ status=0
messagetime=0
messagecount=0
# Define a random ip address and port on localhost
if [ -z ${IPADDRESS} ]; then
let index2="${RANDOM} % 256"
let index3="${RANDOM} % 256"
let index4="(${RANDOM} % 255) + 1"
IPADDRESS="127.${index2}.${index3}.${index4}"
# Do nothing, if cluster string is already defined
if [ -n "${FDBCLUSTERTEXT}" ]
then
:
# Otherwise, define the cluster text
else
# Define a random ip address and port on localhost
if [ -z "${IPADDRESS}" ]; then
let index2="${RANDOM} % 256"
let index3="${RANDOM} % 256"
let index4="(${RANDOM} % 255) + 1"
IPADDRESS="127.${index2}.${index3}.${index4}"
fi
if [ -z "${FDBPORT}" ]; then
let FDBPORT="(${RANDOM} % ${FDBPORTTOTAL}) + ${FDBPORTSTART}"
fi
FDBCLUSTERTEXT="${IPADDRESS}:${FDBPORT}"
fi
if [ -z ${FDBPORT} ]; then
let FDBPORT="(${RANDOM} % 1000) + ${FDBPORTSTART}"
fi
CLUSTERSTRING="${IPADDRESS}:${FDBPORT}"
function log
{
local status=0
if [ "$#" -lt 1 ]
then
echo "Usage: log <message> [echo]"
echo
echo "Logs the message and timestamp to LOGFILE (${LOGFILE}) and, if the"
echo "second argument is either not present or is set to 1, stdout."
let status="${status} + 1"
else
# Log to stdout.
if [ "$#" -lt 2 ] || [ "${2}" -ge 1 ]
then
echo "${1}"
fi
local status=0
if [ "$#" -lt 1 ]
then
echo "Usage: log <message> [echo]"
echo
echo "Logs the message and timestamp to LOGFILE (${LOGFILE}) and, if the"
echo "second argument is either not present or is set to 1, stdout."
let status="${status} + 1"
else
# Log to stdout.
if [ "$#" -lt 2 ] || [ "${2}" -ge 1 ]
then
echo "${1}"
fi
# Log to file.
datestr=$(date +"%Y-%m-%d %H:%M:%S (%s)")
dir=$(dirname "${LOGFILE}")
if ! [ -d "${dir}" ] && ! mkdir -p "${dir}"
then
echo "Could not create directory to log output."
let status="${status} + 1"
elif ! [ -f "${LOGFILE}" ] && ! touch "${LOGFILE}"
then
echo "Could not create file ${LOGFILE} to log output."
let status="${status} + 1"
elif ! echo "[ ${datestr} ] ${1}" >> "${LOGFILE}"
then
echo "Could not log output to ${LOGFILE}."
let status="${status} + 1"
fi
fi
# Log to file.
datestr=$(date +"%Y-%m-%d %H:%M:%S (%s)")
dir=$(dirname "${LOGFILE}")
if ! [ -d "${dir}" ] && ! mkdir -p "${dir}"
then
echo "Could not create directory to log output."
let status="${status} + 1"
elif ! [ -f "${LOGFILE}" ] && ! touch "${LOGFILE}"
then
echo "Could not create file ${LOGFILE} to log output."
let status="${status} + 1"
elif ! echo "[ ${datestr} ] ${1}" >> "${LOGFILE}"
then
echo "Could not log output to ${LOGFILE}."
let status="${status} + 1"
fi
fi
return "${status}"
return "${status}"
}
# Display a message for the user.
function displayMessage
{
local status=0
local status=0
if [ "$#" -lt 1 ]
then
echo "displayMessage <message>"
let status="${status} + 1"
elif ! log "${1}" 0
then
log "Could not write message to file."
else
# Increment the message counter
let messagecount="${messagecount} + 1"
if [ "$#" -lt 1 ]
then
echo "displayMessage <message>"
let status="${status} + 1"
elif ! log "${1}" 0
then
log "Could not write message to file."
else
# Increment the message counter
let messagecount="${messagecount} + 1"
# Display successful message, if previous message
if [ "${messagecount}" -gt 1 ]
then
# Determine the amount of transpired time
let timespent="${SECONDS}-${messagetime}"
# Display successful message, if previous message
if [ "${messagecount}" -gt 1 ]
then
# Determine the amount of transpired time
let timespent="${SECONDS}-${messagetime}"
if [ "${DEBUGLEVEL}" -gt 0 ]; then
printf "... done in %3d seconds\n" "${timespent}"
fi
fi
if [ "${DEBUGLEVEL}" -gt 0 ]; then
printf "... done in %3d seconds\n" "${timespent}"
fi
fi
# Display message
if [ "${DEBUGLEVEL}" -gt 0 ]; then
printf "%-16s %-35s " "$(date "+%F %H-%M-%S")" "$1"
fi
# Display message
if [ "${DEBUGLEVEL}" -gt 0 ]; then
printf "%-16s %-35s " "$(date "+%F %H-%M-%S")" "$1"
fi
# Update the variables
messagetime="${SECONDS}"
fi
# Update the variables
messagetime="${SECONDS}"
fi
return "${status}"
return "${status}"
}
# Create the directories used by the server.
function createDirectories
{
local status=0
local status=0
# Display user message
if ! displayMessage "Creating directories"
then
echo 'Failed to display user message'
let status="${status} + 1"
# Display user message
if ! displayMessage "Creating directories"
then
echo 'Failed to display user message'
let status="${status} + 1"
elif ! mkdir -p "${LOGDIR}" "${ETCDIR}"
then
log "Failed to create directories"
let status="${status} + 1"
elif ! mkdir -p "${LOGDIR}" "${ETCDIR}"
then
log "Failed to create directories"
let status="${status} + 1"
# Display user message
elif ! displayMessage "Setting file permissions"
then
log 'Failed to display user message'
let status="${status} + 1"
# Display user message
elif ! displayMessage "Setting file permissions"
then
log 'Failed to display user message'
let status="${status} + 1"
elif ! chmod 755 "${BINDIR}/fdbserver" "${BINDIR}/fdbcli"
then
log "Failed to set file permissions"
let status="${status} + 1"
elif ! chmod 755 "${BINDIR}/fdbserver" "${BINDIR}/fdbcli"
then
log "Failed to set file permissions"
let status="${status} + 1"
else
while read filepath
do
if [ -f "${filepath}" ] && [ ! -x "${filepath}" ]
then
# if [ "${DEBUGLEVEL}" -gt 1 ]; then
# log " Enable executable: ${filepath}"
# fi
log " Enable executable: ${filepath}" "${DEBUGLEVEL}"
if ! chmod 755 "${filepath}"
then
log "Failed to set executable for file: ${filepath}"
let status="${status} + 1"
fi
fi
done < <(find "${BINDIR}" -iname '*.py' -o -iname '*.rb' -o -iname 'fdb_flow_tester' -o -iname '_stacktester' -o -iname '*.js' -o -iname '*.sh' -o -iname '*.ksh')
fi
else
while read filepath
do
if [ -f "${filepath}" ] && [ ! -x "${filepath}" ]
then
# if [ "${DEBUGLEVEL}" -gt 1 ]; then
# log " Enable executable: ${filepath}"
# fi
log " Enable executable: ${filepath}" "${DEBUGLEVEL}"
if ! chmod 755 "${filepath}"
then
log "Failed to set executable for file: ${filepath}"
let status="${status} + 1"
fi
fi
done < <(find "${BINDIR}" -iname '*.py' -o -iname '*.rb' -o -iname 'fdb_flow_tester' -o -iname '_stacktester' -o -iname '*.js' -o -iname '*.sh' -o -iname '*.ksh')
fi
return ${status}
return ${status}
}
# Create a cluster file for the local cluster.
function createClusterFile
{
local status=0
local status=0
if [ "${status}" -ne 0 ]; then
:
# Display user message
elif ! displayMessage "Creating Fdb Cluster file"
then
log 'Failed to display user message'
let status="${status} + 1"
else
description=$(LC_CTYPE=C tr -dc A-Za-z0-9 < /dev/urandom 2> /dev/null | head -c 8)
random_str=$(LC_CTYPE=C tr -dc A-Za-z0-9 < /dev/urandom 2> /dev/null | head -c 8)
echo "${description}:${random_str}@${CLUSTERSTRING}" > "${FDBCONF}"
fi
if [ "${status}" -ne 0 ]; then
:
# Display user message
elif ! displayMessage "Creating Fdb Cluster file"
then
log 'Failed to display user message'
let status="${status} + 1"
else
description=$(LC_CTYPE=C tr -dc A-Za-z0-9 < /dev/urandom 2> /dev/null | head -c 8)
random_str=$(LC_CTYPE=C tr -dc A-Za-z0-9 < /dev/urandom 2> /dev/null | head -c 8)
echo "${description}:${random_str}@${FDBCLUSTERTEXT}" > "${FDBCONF}"
fi
if [ "${status}" -ne 0 ]; then
:
elif ! chmod 0664 "${FDBCONF}"; then
log "Failed to set permissions on fdbconf: ${FDBCONF}"
let status="${status} + 1"
fi
if [ "${status}" -ne 0 ]; then
:
elif ! chmod 0664 "${FDBCONF}"; then
log "Failed to set permissions on fdbconf: ${FDBCONF}"
let status="${status} + 1"
fi
return ${status}
return ${status}
}
# Stop the Cluster from running.
function stopCluster
{
local status=0
local status=0
# Add an audit entry, if enabled
if [ "${AUDITCLUSTER}" -gt 0 ]; then
printf '%-15s (%6s) Stopping cluster %-20s (%6s): %s\n' "$(date +'%Y-%m-%d %H:%M:%S')" "${$}" "${CLUSTERSTRING}" "${FDBSERVERID}" >> "${AUDITLOG}"
fi
if [ -z "${FDBSERVERID}" ]; then
log 'FDB Server process is not defined'
let status="${status} + 1"
elif ! kill -0 "${FDBSERVERID}"; then
log "Failed to locate FDB Server process (${FDBSERVERID})"
let status="${status} + 1"
elif "${BINDIR}/fdbcli" -C "${FDBCONF}" --exec "kill; kill ${CLUSTERSTRING}; sleep 3" --timeout 120 &>> "${LOGDIR}/fdbcli-kill.log"
then
# Ensure that process is dead
if ! kill -0 "${FDBSERVERID}" 2> /dev/null; then
log "Killed cluster (${FDBSERVERID}) via cli"
elif ! kill -9 "${FDBSERVERID}"; then
log "Failed to kill FDB Server process (${FDBSERVERID}) via cli or kill command"
let status="${status} + 1"
else
log "Forcibly killed FDB Server process (${FDBSERVERID}) since cli failed"
fi
elif ! kill -9 "${FDBSERVERID}"; then
log "Failed to forcibly kill FDB Server process (${FDBSERVERID})"
let status="${status} + 1"
else
log "Forcibly killed FDB Server process (${FDBSERVERID})"
fi
return "${status}"
# Add an audit entry, if enabled
if [ "${AUDITCLUSTER}" -gt 0 ]; then
printf '%-15s (%6s) Stopping cluster %-20s (%6s): %s\n' "$(date +'%Y-%m-%d %H:%M:%S')" "${$}" "${FDBCLUSTERTEXT}" "${FDBSERVERID}" >> "${AUDITLOG}"
fi
if [ -z "${FDBSERVERID}" ]; then
log 'FDB Server process is not defined'
let status="${status} + 1"
elif ! kill -0 "${FDBSERVERID}"; then
log "Failed to locate FDB Server process (${FDBSERVERID})"
let status="${status} + 1"
elif "${BINDIR}/fdbcli" -C "${FDBCONF}" --exec "kill; kill ${FDBCLUSTERTEXT}; sleep 3" --timeout 120 &>> "${LOGDIR}/fdbcli-kill.log"
then
# Ensure that process is dead
if ! kill -0 "${FDBSERVERID}" 2> /dev/null; then
log "Killed cluster (${FDBSERVERID}) via cli"
elif ! kill -9 "${FDBSERVERID}"; then
log "Failed to kill FDB Server process (${FDBSERVERID}) via cli or kill command"
let status="${status} + 1"
else
log "Forcibly killed FDB Server process (${FDBSERVERID}) since cli failed"
fi
elif ! kill -9 "${FDBSERVERID}"; then
log "Failed to forcibly kill FDB Server process (${FDBSERVERID})"
let status="${status} + 1"
else
log "Forcibly killed FDB Server process (${FDBSERVERID})"
fi
return "${status}"
}
# Start the server running.
function startFdbServer
{
local status=0
local status=0
# Add an audit entry, if enabled
if [ "${AUDITCLUSTER}" -gt 0 ]; then
printf '%-15s (%6s) Starting cluster %-20s\n' "$(date +'%Y-%m-%d %H:%M:%S')" "${$}" "${CLUSTERSTRING}" >> "${AUDITLOG}"
fi
# Add an audit entry, if enabled
if [ "${AUDITCLUSTER}" -gt 0 ]; then
printf '%-15s (%6s) Starting cluster %-20s\n' "$(date +'%Y-%m-%d %H:%M:%S')" "${$}" "${FDBCLUSTERTEXT}" >> "${AUDITLOG}"
fi
if [ "${status}" -ne 0 ]; then
:
elif ! displayMessage "Starting Fdb Server"
then
log 'Failed to display user message'
let status="${status} + 1"
if ! displayMessage "Starting Fdb Server"
then
log 'Failed to display user message'
let status="${status} + 1"
else
"${BINDIR}/fdbserver" --knob_disable_posix_kernel_aio=1 -C "${FDBCONF}" -p "${CLUSTERSTRING}" -L "${LOGDIR}" -d "${WORKDIR}/fdb/${$}" &> "${LOGDIR}/fdbserver.log" &
fdbpid=$!
fdbrc=$?
if [ $fdbrc -ne 0 ]
then
log "Failed to start FDB Server"
let status="${status} + 1"
else
FDBSERVERID="${fdbpid}"
fi
fi
else
"${BINDIR}/fdbserver" --knob_disable_posix_kernel_aio=1 -C "${FDBCONF}" -p "${FDBCLUSTERTEXT}" -L "${LOGDIR}" -d "${WORKDIR}/fdb/${$}" &> "${LOGDIR}/fdbserver.log" &
if [ "${?}" -ne 0 ]
then
log "Failed to start FDB Server"
let status="${status} + 1"
else
FDBSERVERID="${!}"
fi
fi
if [ -z "${FDBSERVERID}" ]; then
log "FDB Server start failed because no process"
let status="${status} + 1"
elif ! kill -0 "${FDBSERVERID}" ; then
log "FDB Server start failed because process terminated unexpectedly"
let status="${status} + 1"
fi
if [ -z "${FDBSERVERID}" ]; then
log "FDB Server start failed because no process"
let status="${status} + 1"
elif ! kill -0 "${FDBSERVERID}" ; then
log "FDB Server start failed because process terminated unexpectedly"
let status="${status} + 1"
fi
return ${status}
return ${status}
}
function getStatus
{
local status=0
local status=0
if [ "${status}" -ne 0 ]; then
:
elif ! date &>> "${LOGDIR}/fdbclient.log"
then
log 'Failed to get date'
let status="${status} + 1"
elif ! "${BINDIR}/fdbcli" -C "${FDBCONF}" --exec 'status json' --timeout 120 &>> "${LOGDIR}/fdbclient.log"
then
log 'Failed to get status from fdbcli'
let status="${status} + 1"
elif ! date &>> "${LOGDIR}/fdbclient.log"
then
log 'Failed to get date'
let status="${status} + 1"
fi
if [ "${status}" -ne 0 ]; then
:
elif ! date &>> "${LOGDIR}/fdbclient.log"
then
log 'Failed to get date'
let status="${status} + 1"
elif ! "${BINDIR}/fdbcli" -C "${FDBCONF}" --exec 'status json' --timeout 120 &>> "${LOGDIR}/fdbclient.log"
then
log 'Failed to get status from fdbcli'
let status="${status} + 1"
elif ! date &>> "${LOGDIR}/fdbclient.log"
then
log 'Failed to get date'
let status="${status} + 1"
fi
return ${status}
return ${status}
}
# Verify that the cluster is available.
function verifyAvailable
{
local status=0
local status=0
if [ -z "${FDBSERVERID}" ]; then
log "FDB Server process is not defined."
let status="${status} + 1"
# Verify that the server is running.
elif ! kill -0 "${FDBSERVERID}"
then
log "FDB server process (${FDBSERVERID}) is not running"
let status="${status} + 1"
# Display user message.
elif ! displayMessage "Checking cluster availability"
then
log 'Failed to display user message'
let status="${status} + 1"
# Determine if status json says the database is available.
else
avail=`"${BINDIR}/fdbcli" -C "${FDBCONF}" --exec 'status json' --timeout "${SERVERCHECKS}" 2> /dev/null | grep -E '"database_available"|"available"' | grep 'true'`
log "Avail value: ${avail}" "${DEBUGLEVEL}"
if [[ -n "${avail}" ]] ; then
:
else
let status="${status} + 1"
fi
fi
return "${status}"
if [ -z "${FDBSERVERID}" ]; then
log "FDB Server process is not defined."
let status="${status} + 1"
# Verify that the server is running.
elif ! kill -0 "${FDBSERVERID}"
then
log "FDB server process (${FDBSERVERID}) is not running"
let status="${status} + 1"
# Display user message.
elif ! displayMessage "Checking cluster availability"
then
log 'Failed to display user message'
let status="${status} + 1"
# Determine if status json says the database is available.
else
avail=`"${BINDIR}/fdbcli" -C "${FDBCONF}" --exec 'status json' --timeout "${SERVERCHECKS}" 2> /dev/null | grep -E '"database_available"|"available"' | grep 'true'`
log "Avail value: ${avail}" "${DEBUGLEVEL}"
if [[ -n "${avail}" ]] ; then
:
else
let status="${status} + 1"
fi
fi
return "${status}"
}
# Configure the database on the server.
function createDatabase
{
local status=0
local status=0
if [ "${status}" -ne 0 ]; then
:
# Ensure that the server is running
elif ! kill -0 "${FDBSERVERID}"
then
log "FDB server process: (${FDBSERVERID}) is not running"
let status="${status} + 1"
if [ "${status}" -ne 0 ]; then
:
# Ensure that the server is running
elif ! kill -0 "${FDBSERVERID}"
then
log "FDB server process: (${FDBSERVERID}) is not running"
let status="${status} + 1"
# Display user message
elif ! displayMessage "Creating database"
then
log 'Failed to display user message'
let status="${status} + 1"
elif ! echo "Client log:" &> "${LOGDIR}/fdbclient.log"
then
log 'Failed to create fdbclient.log'
let status="${status} + 1"
elif ! getStatus
then
log 'Failed to get status'
let status="${status} + 1"
# Display user message
elif ! displayMessage "Creating database"
then
log 'Failed to display user message'
let status="${status} + 1"
elif ! echo "Client log:" &> "${LOGDIR}/fdbclient.log"
then
log 'Failed to create fdbclient.log'
let status="${status} + 1"
elif ! getStatus
then
log 'Failed to get status'
let status="${status} + 1"
# Configure the database.
else
"${BINDIR}/fdbcli" -C "${FDBCONF}" --exec 'configure new single memory; status' --timeout "${CONFIGUREWAIT}" --log --log-dir "${LOGDIR}" &>> "${LOGDIR}/fdbclient.log"
# Configure the database.
else
"${BINDIR}/fdbcli" -C "${FDBCONF}" --exec 'configure new single memory; status' --timeout "${CONFIGUREWAIT}" --log --log-dir "${LOGDIR}" &>> "${LOGDIR}/fdbclient.log"
if ! displayMessage "Checking if config succeeded"
then
log 'Failed to display user message.'
fi
if ! displayMessage "Checking if config succeeded"
then
log 'Failed to display user message.'
fi
iteration=0
while [[ "${iteration}" -lt "${SERVERCHECKS}" ]] && ! verifyAvailable
do
log "Database not created (iteration ${iteration})."
let iteration="${iteration} + 1"
done
iteration=0
while [[ "${iteration}" -lt "${SERVERCHECKS}" ]] && ! verifyAvailable
do
log "Database not created (iteration ${iteration})."
let iteration="${iteration} + 1"
done
if ! verifyAvailable
then
log "Failed to create database via cli"
getStatus
cat "${LOGDIR}/fdbclient.log"
log "Ignoring -- moving on"
#let status="${status} + 1"
fi
fi
if ! verifyAvailable
then
log "Failed to create database via cli"
getStatus
cat "${LOGDIR}/fdbclient.log"
log "Ignoring -- moving on"
#let status="${status} + 1"
fi
fi
return ${status}
return ${status}
}
# Begin the local cluster from scratch.
function startCluster
{
local status=0
local status=0
if [ "${status}" -ne 0 ]; then
:
elif ! createDirectories
then
log "Could not create directories."
let status="${status} + 1"
elif ! createClusterFile
then
log "Could not create cluster file."
let status="${status} + 1"
elif ! startFdbServer
then
log "Could not start FDB server."
let status="${status} + 1"
elif ! createDatabase
then
log "Could not create database."
let status="${status} + 1"
fi
if [ "${status}" -ne 0 ]; then
:
elif ! createDirectories
then
log "Could not create directories."
let status="${status} + 1"
elif ! createClusterFile
then
log "Could not create cluster file."
let status="${status} + 1"
elif ! startFdbServer
then
log "Could not start FDB server."
let status="${status} + 1"
elif ! createDatabase
then
log "Could not create database."
let status="${status} + 1"
fi
return ${status}
return ${status}
}

View File

@ -263,9 +263,9 @@ See :ref:`developer-guide-programming-with-futures` for further (language-indepe
.. function:: fdb_error_t fdb_future_block_until_ready(FDBFuture* future)
Blocks the calling thread until the given Future is ready. It will return success even if the Future is set to an error -- you must call :func:`fdb_future_get_error()` to determine that. :func:`fdb_future_block_until_ready()` will return an error only in exceptional conditions (e.g. out of memory or other operating system resources).
Blocks the calling thread until the given Future is ready. It will return success even if the Future is set to an error -- you must call :func:`fdb_future_get_error()` to determine that. :func:`fdb_future_block_until_ready()` will return an error only in exceptional conditions (e.g. deadlock detected, out of memory or other operating system resources).
.. warning:: Never call this function from a callback passed to :func:`fdb_future_set_callback()`. This may block the thread on which :func:`fdb_run_network()` was invoked, resulting in a deadlock.
.. warning:: Never call this function from a callback passed to :func:`fdb_future_set_callback()`. This may block the thread on which :func:`fdb_run_network()` was invoked, resulting in a deadlock. In some cases the client can detect the deadlock and throw a ``blocked_from_network_thread`` error.
.. function:: fdb_bool_t fdb_future_is_ready(FDBFuture* future)

View File

@ -114,8 +114,12 @@ FoundationDB may return the following error codes from API functions. If you nee
+-----------------------------------------------+-----+--------------------------------------------------------------------------------+
| transaction_read_only | 2023| Attempted to commit a transaction specified as read-only |
+-----------------------------------------------+-----+--------------------------------------------------------------------------------+
| invalid_cache_eviction_policy | 2024| Invalid cache eviction policy, only random and lru are supported |
+-----------------------------------------------+-----+--------------------------------------------------------------------------------+
| network_cannot_be_restarted | 2025| Network can only be started once |
+-----------------------------------------------+-----+--------------------------------------------------------------------------------+
| blocked_from_network_thread | 2026| Detected a deadlock in a callback called from the network thread |
+-----------------------------------------------+-----+--------------------------------------------------------------------------------+
| incompatible_protocol_version | 2100| Incompatible protocol version |
+-----------------------------------------------+-----+--------------------------------------------------------------------------------+
| transaction_too_large | 2101| Transaction exceeds byte limit |

View File

@ -10,38 +10,38 @@ macOS
The macOS installation package is supported on macOS 10.7+. It includes the client and (optionally) the server.
* `FoundationDB-6.3.6.pkg <https://www.foundationdb.org/downloads/6.3.6/macOS/installers/FoundationDB-6.3.6.pkg>`_
* `FoundationDB-6.3.8.pkg <https://www.foundationdb.org/downloads/6.3.8/macOS/installers/FoundationDB-6.3.8.pkg>`_
Ubuntu
------
The Ubuntu packages are supported on 64-bit Ubuntu 12.04+, but beware of the Linux kernel bug in Ubuntu 12.x.
* `foundationdb-clients-6.3.6-1_amd64.deb <https://www.foundationdb.org/downloads/6.3.6/ubuntu/installers/foundationdb-clients_6.3.6-1_amd64.deb>`_
* `foundationdb-server-6.3.6-1_amd64.deb <https://www.foundationdb.org/downloads/6.3.6/ubuntu/installers/foundationdb-server_6.3.6-1_amd64.deb>`_ (depends on the clients package)
* `foundationdb-clients-6.3.8-1_amd64.deb <https://www.foundationdb.org/downloads/6.3.8/ubuntu/installers/foundationdb-clients_6.3.8-1_amd64.deb>`_
* `foundationdb-server-6.3.8-1_amd64.deb <https://www.foundationdb.org/downloads/6.3.8/ubuntu/installers/foundationdb-server_6.3.8-1_amd64.deb>`_ (depends on the clients package)
RHEL/CentOS EL6
---------------
The RHEL/CentOS EL6 packages are supported on 64-bit RHEL/CentOS 6.x.
* `foundationdb-clients-6.3.6-1.el6.x86_64.rpm <https://www.foundationdb.org/downloads/6.3.6/rhel6/installers/foundationdb-clients-6.3.6-1.el6.x86_64.rpm>`_
* `foundationdb-server-6.3.6-1.el6.x86_64.rpm <https://www.foundationdb.org/downloads/6.3.6/rhel6/installers/foundationdb-server-6.3.6-1.el6.x86_64.rpm>`_ (depends on the clients package)
* `foundationdb-clients-6.3.8-1.el6.x86_64.rpm <https://www.foundationdb.org/downloads/6.3.8/rhel6/installers/foundationdb-clients-6.3.8-1.el6.x86_64.rpm>`_
* `foundationdb-server-6.3.8-1.el6.x86_64.rpm <https://www.foundationdb.org/downloads/6.3.8/rhel6/installers/foundationdb-server-6.3.8-1.el6.x86_64.rpm>`_ (depends on the clients package)
RHEL/CentOS EL7
---------------
The RHEL/CentOS EL7 packages are supported on 64-bit RHEL/CentOS 7.x.
* `foundationdb-clients-6.3.6-1.el7.x86_64.rpm <https://www.foundationdb.org/downloads/6.3.6/rhel7/installers/foundationdb-clients-6.3.6-1.el7.x86_64.rpm>`_
* `foundationdb-server-6.3.6-1.el7.x86_64.rpm <https://www.foundationdb.org/downloads/6.3.6/rhel7/installers/foundationdb-server-6.3.6-1.el7.x86_64.rpm>`_ (depends on the clients package)
* `foundationdb-clients-6.3.8-1.el7.x86_64.rpm <https://www.foundationdb.org/downloads/6.3.8/rhel7/installers/foundationdb-clients-6.3.8-1.el7.x86_64.rpm>`_
* `foundationdb-server-6.3.8-1.el7.x86_64.rpm <https://www.foundationdb.org/downloads/6.3.8/rhel7/installers/foundationdb-server-6.3.8-1.el7.x86_64.rpm>`_ (depends on the clients package)
Windows
-------
The Windows installer is supported on 64-bit Windows XP and later. It includes the client and (optionally) the server.
* `foundationdb-6.3.6-x64.msi <https://www.foundationdb.org/downloads/6.3.6/windows/installers/foundationdb-6.3.6-x64.msi>`_
* `foundationdb-6.3.8-x64.msi <https://www.foundationdb.org/downloads/6.3.8/windows/installers/foundationdb-6.3.8-x64.msi>`_
API Language Bindings
=====================
@ -58,18 +58,18 @@ On macOS and Windows, the FoundationDB Python API bindings are installed as part
If you need to use the FoundationDB Python API from other Python installations or paths, use the Python package manager ``pip`` (``pip install foundationdb``) or download the Python package:
* `foundationdb-6.3.6.tar.gz <https://www.foundationdb.org/downloads/6.3.6/bindings/python/foundationdb-6.3.6.tar.gz>`_
* `foundationdb-6.3.8.tar.gz <https://www.foundationdb.org/downloads/6.3.8/bindings/python/foundationdb-6.3.8.tar.gz>`_
Ruby 1.9.3/2.0.0+
-----------------
* `fdb-6.3.6.gem <https://www.foundationdb.org/downloads/6.3.6/bindings/ruby/fdb-6.3.6.gem>`_
* `fdb-6.3.8.gem <https://www.foundationdb.org/downloads/6.3.8/bindings/ruby/fdb-6.3.8.gem>`_
Java 8+
-------
* `fdb-java-6.3.6.jar <https://www.foundationdb.org/downloads/6.3.6/bindings/java/fdb-java-6.3.6.jar>`_
* `fdb-java-6.3.6-javadoc.jar <https://www.foundationdb.org/downloads/6.3.6/bindings/java/fdb-java-6.3.6-javadoc.jar>`_
* `fdb-java-6.3.8.jar <https://www.foundationdb.org/downloads/6.3.8/bindings/java/fdb-java-6.3.8.jar>`_
* `fdb-java-6.3.8-javadoc.jar <https://www.foundationdb.org/downloads/6.3.8/bindings/java/fdb-java-6.3.8-javadoc.jar>`_
Go 1.11+
--------

View File

@ -2,6 +2,10 @@
Release Notes
#############
6.2.26
======
* Attempt to detect when calling :func:`fdb_future_block_until_ready` would cause a deadlock, and throw ``blocked_from_network_thread`` if it would definitely cause a deadlock.
6.2.25
======

View File

@ -2,7 +2,7 @@
Release Notes
#############
6.3.6
6.3.8
=====
Features
@ -110,6 +110,8 @@ Other Changes
* Blob backup URL parameter ``request_timeout`` changed to ``request_timeout_min``, with prior name still supported. `(PR #3533) <https://github.com/apple/foundationdb/pull/3533>`_
* Support query command in backup CLI that allows users to query restorable files by key ranges. [6.3.6] `(PR #3703) <https://github.com/apple/foundationdb/pull/3703>`_
* Report missing old tlogs information when in recovery before storage servers are fully recovered. [6.3.6] `(PR #3706) <https://github.com/apple/foundationdb/pull/3706>`_
* Updated OpenSSL to version 1.1.1h. [6.3.7] `(PR #3809) <https://github.com/apple/foundationdb/pull/3809>`_
* Lowered the amount of time a watch will remain registered on a storage server from 900 seconds to 30 seconds. [6.3.8] `(PR #3833) <https://github.com/apple/foundationdb/pull/3833>`_
Fixes from previous versions
----------------------------
@ -127,6 +129,7 @@ Fixes only impacting 6.3.0+
* All storage class processes attempted to connect to the same coordinator. [6.3.2] `(PR #3361) <https://github.com/apple/foundationdb/pull/3361>`_
* Adjusted the proxy load balancing algorithm to be based on the CPU usage of the process instead of the number of requests processed. [6.3.5] `(PR #3653) <https://github.com/apple/foundationdb/pull/3653>`_
* Only return the error code ``batch_transaction_throttled`` for API versions greater than or equal to 630. [6.3.6] `(PR #3799) <https://github.com/apple/foundationdb/pull/3799>`_
* The fault tolerance calculation in status did not take into account region configurations. [6.3.8] `(PR #3836) <https://github.com/apple/foundationdb/pull/3836>`_
Earlier release notes
---------------------

View File

@ -1063,10 +1063,10 @@ void printStatus(StatusObjectReader statusObj, StatusClient::StatusLevel level,
if (statusObjConfig.has("regions")) {
outputString += "\n Regions: ";
regions = statusObjConfig["regions"].get_array();
bool isPrimary = false;
std::vector<std::string> regionSatelliteDCs;
std::string regionDC;
for (StatusObjectReader region : regions) {
bool isPrimary = false;
std::vector<std::string> regionSatelliteDCs;
std::string regionDC;
for (StatusObjectReader dc : region["datacenters"].get_array()) {
if (!dc.has("satellite")) {
regionDC = dc["id"].get_str();

View File

@ -133,15 +133,19 @@ struct DatabaseConfiguration {
}
//Killing an entire datacenter counts as killing one zone in modes that support it
int32_t maxZoneFailuresTolerated() const {
int32_t maxZoneFailuresTolerated(int fullyReplicatedRegions, bool forAvailability) const {
int worstSatellite = regions.size() ? std::numeric_limits<int>::max() : 0;
int regionsWithNonNegativePriority = 0;
for(auto& r : regions) {
if(r.priority >= 0) {
regionsWithNonNegativePriority++;
}
worstSatellite = std::min(worstSatellite, r.satelliteTLogReplicationFactor - r.satelliteTLogWriteAntiQuorum);
if(r.satelliteTLogUsableDcsFallback > 0) {
worstSatellite = std::min(worstSatellite, r.satelliteTLogReplicationFactorFallback - r.satelliteTLogWriteAntiQuorumFallback);
}
}
if(usableRegions > 1 && worstSatellite > 0) {
if(usableRegions > 1 && fullyReplicatedRegions > 1 && worstSatellite > 0 && (!forAvailability || regionsWithNonNegativePriority > 1)) {
return 1 + std::min(std::max(tLogReplicationFactor - 1 - tLogWriteAntiQuorum, worstSatellite - 1), storageTeamSize - 1);
} else if(worstSatellite > 0) {
return std::min(tLogReplicationFactor + worstSatellite - 2 - tLogWriteAntiQuorum, storageTeamSize - 1);

View File

@ -104,7 +104,7 @@ void ClientKnobs::initialize(bool randomize) {
init( WATCH_POLLING_TIME, 1.0 ); if( randomize && BUGGIFY ) WATCH_POLLING_TIME = 5.0;
init( NO_RECENT_UPDATES_DURATION, 20.0 ); if( randomize && BUGGIFY ) NO_RECENT_UPDATES_DURATION = 0.1;
init( FAST_WATCH_TIMEOUT, 20.0 ); if( randomize && BUGGIFY ) FAST_WATCH_TIMEOUT = 1.0;
init( WATCH_TIMEOUT, 900.0 ); if( randomize && BUGGIFY ) WATCH_TIMEOUT = 20.0;
init( WATCH_TIMEOUT, 30.0 ); if( randomize && BUGGIFY ) WATCH_TIMEOUT = 20.0;
// Core
init( CORE_VERSIONSPERSECOND, 1e6 );

View File

@ -46,7 +46,6 @@ void ServerKnobs::initialize(bool randomize, ClientKnobs* clientKnobs, bool isSi
init( RECOVERY_TLOG_SMART_QUORUM_DELAY, 0.25 ); if( randomize && BUGGIFY ) RECOVERY_TLOG_SMART_QUORUM_DELAY = 0.0; // smaller might be better for bug amplification
init( TLOG_STORAGE_MIN_UPDATE_INTERVAL, 0.5 );
init( BUGGIFY_TLOG_STORAGE_MIN_UPDATE_INTERVAL, 30 );
init( UNFLUSHED_DATA_RATIO, 0.05 ); if( randomize && BUGGIFY ) UNFLUSHED_DATA_RATIO = 0.0;
init( DESIRED_TOTAL_BYTES, 150000 ); if( randomize && BUGGIFY ) DESIRED_TOTAL_BYTES = 10000;
init( DESIRED_UPDATE_BYTES, 2*DESIRED_TOTAL_BYTES );
init( UPDATE_DELAY, 0.001 );

View File

@ -45,7 +45,6 @@ public:
double RECOVERY_TLOG_SMART_QUORUM_DELAY; // smaller might be better for bug amplification
double TLOG_STORAGE_MIN_UPDATE_INTERVAL;
double BUGGIFY_TLOG_STORAGE_MIN_UPDATE_INTERVAL;
double UNFLUSHED_DATA_RATIO;
int DESIRED_TOTAL_BYTES;
int DESIRED_UPDATE_BYTES;
double UPDATE_DELAY;

View File

@ -658,7 +658,7 @@ void handleUpdateRateRequest(RestoreUpdateRateRequest req, Reference<RestoreAppl
}
ACTOR static Future<Void> traceRate(const char* context, Reference<ApplierBatchData> batchData, int batchIndex,
UID nodeID, NotifiedVersion* finishedVB) {
UID nodeID, NotifiedVersion* finishedVB, bool once = false) {
loop {
if ((finishedVB->get() != batchIndex - 1) || !batchData.isValid()) {
break;
@ -672,6 +672,9 @@ ACTOR static Future<Void> traceRate(const char* context, Reference<ApplierBatchD
.detail("TargetBytesMB", batchData->targetWriteRateMB)
.detail("InflightBytesMB", batchData->applyingDataBytes)
.detail("ReceivedBytes", batchData->receivedBytes);
if (once) {
break;
}
wait(delay(5.0));
}
@ -719,8 +722,9 @@ ACTOR static Future<Void> handleApplyToDBRequest(RestoreVersionBatchRequest req,
// Multiple actors can wait on req.batchIndex-1;
// Avoid setting finishedBatch when finishedBatch > req.batchIndex
if (self->finishedBatch.get() == req.batchIndex - 1) {
batchData->rateTracer = traceRate("FastRestoreApplierTransactionRateControlDone", batchData, req.batchIndex,
self->id(), &self->finishedBatch); // Track the last rate info
batchData->rateTracer =
traceRate("FastRestoreApplierTransactionRateControlDone", batchData, req.batchIndex, self->id(),
&self->finishedBatch, true /*print once*/); // Track the last rate info
self->finishedBatch.set(req.batchIndex);
// self->batch[req.batchIndex]->vbState = ApplierVersionBatchState::DONE;
// Free memory for the version batch

View File

@ -115,7 +115,7 @@ ACTOR Future<Void> startRestoreController(Reference<RestoreWorkerData> controlle
// recruitRestoreRoles must come after controllerWorker has finished collectWorkerInterface
wait(recruitRestoreRoles(controllerWorker, self));
self->addActor.send(updateHeartbeatTime(self));
// self->addActor.send(updateHeartbeatTime(self));
self->addActor.send(checkRolesLiveness(self));
self->addActor.send(updateProcessMetrics(self));
self->addActor.send(traceProcessMetrics(self, "RestoreController"));

View File

@ -1700,25 +1700,28 @@ static int getExtraTLogEligibleZones(const vector<WorkerDetails>& workers, const
if(configuration.regions.size() == 0) {
return allZones.size() - std::max(configuration.tLogReplicationFactor, configuration.storageTeamSize);
}
int extraTlogEligibleZones = configuration.usableRegions == 1 ? 0 : std::numeric_limits<int>::max();
int extraTlogEligibleZones = 0;
int regionsWithNonNegativePriority = 0;
for(auto& region : configuration.regions) {
int eligible = dcId_zone[region.dcId].size() - std::max(configuration.remoteTLogReplicationFactor, std::max(configuration.tLogReplicationFactor, configuration.storageTeamSize) );
//FIXME: does not take into account fallback satellite policies
if(region.satelliteTLogReplicationFactor > 0 && configuration.usableRegions > 1) {
int totalSatelliteEligible = 0;
for(auto& sat : region.satellites) {
totalSatelliteEligible += dcId_zone[sat.dcId].size();
if( region.priority >= 0 ) {
int eligible = dcId_zone[region.dcId].size() - std::max(configuration.remoteTLogReplicationFactor, std::max(configuration.tLogReplicationFactor, configuration.storageTeamSize) );
//FIXME: does not take into account fallback satellite policies
if(region.satelliteTLogReplicationFactor > 0 && configuration.usableRegions > 1) {
int totalSatelliteEligible = 0;
for(auto& sat : region.satellites) {
totalSatelliteEligible += dcId_zone[sat.dcId].size();
}
eligible = std::min<int>( eligible, totalSatelliteEligible - region.satelliteTLogReplicationFactor );
}
eligible = std::min<int>( eligible, totalSatelliteEligible - region.satelliteTLogReplicationFactor );
}
if( configuration.usableRegions == 1 ) {
if( region.priority >= 0 ) {
extraTlogEligibleZones = std::max( extraTlogEligibleZones, eligible );
if(eligible >= 0) {
regionsWithNonNegativePriority++;
}
} else {
extraTlogEligibleZones = std::min( extraTlogEligibleZones, eligible );
extraTlogEligibleZones = std::max( extraTlogEligibleZones, eligible );
}
}
if(regionsWithNonNegativePriority > 1) {
extraTlogEligibleZones++;
}
return extraTlogEligibleZones;
}
@ -2020,7 +2023,8 @@ static JsonBuilderObject tlogFetcher(int* logFaultTolerance, const std::vector<T
log_replication_factor, log_write_anti_quorum, log_fault_tolerance, remote_log_replication_factor,
remote_log_fault_tolerance;
int maxFaultTolerance = 0;
int minFaultTolerance = 1000;
int localSetsWithNonNegativeFaultTolerance = 0;
for (int i = 0; i < tLogs.size(); i++) {
int failedLogs = 0;
@ -2037,9 +2041,15 @@ static JsonBuilderObject tlogFetcher(int* logFaultTolerance, const std::vector<T
failedLogs++;
}
}
// The log generation's fault tolerance is the maximum tlog fault tolerance of each region.
maxFaultTolerance =
std::max(maxFaultTolerance, tLogs[i].tLogReplicationFactor - 1 - tLogs[i].tLogWriteAntiQuorum - failedLogs);
if (tLogs[i].isLocal) {
int currentFaultTolerance = tLogs[i].tLogReplicationFactor - 1 - tLogs[i].tLogWriteAntiQuorum - failedLogs;
if(currentFaultTolerance >= 0) {
localSetsWithNonNegativeFaultTolerance++;
}
minFaultTolerance = std::min(minFaultTolerance, currentFaultTolerance);
}
if (tLogs[i].isLocal && tLogs[i].locality == tagLocalitySatellite) {
sat_log_replication_factor = tLogs[i].tLogReplicationFactor;
sat_log_write_anti_quorum = tLogs[i].tLogWriteAntiQuorum;
@ -2053,11 +2063,18 @@ static JsonBuilderObject tlogFetcher(int* logFaultTolerance, const std::vector<T
remote_log_fault_tolerance = tLogs[i].tLogReplicationFactor - 1 - failedLogs;
}
}
*logFaultTolerance = std::min(*logFaultTolerance, maxFaultTolerance);
if(minFaultTolerance == 1000) {
//just in case we do not have any tlog sets
minFaultTolerance = 0;
}
if(localSetsWithNonNegativeFaultTolerance > 1) {
minFaultTolerance++;
}
*logFaultTolerance = std::min(*logFaultTolerance, minFaultTolerance);
statusObj["log_interfaces"] = logsObj;
// We may lose logs in this log generation, storage servers may never be able to catch up this log
// generation.
statusObj["possibly_losing_data"] = maxFaultTolerance < 0;
statusObj["possibly_losing_data"] = minFaultTolerance < 0;
if (sat_log_replication_factor.present())
statusObj["satellite_log_replication_factor"] = sat_log_replication_factor.get();
@ -2102,12 +2119,13 @@ static JsonBuilderArray tlogFetcher(int* logFaultTolerance, Reference<AsyncVar<S
static JsonBuilderObject faultToleranceStatusFetcher(DatabaseConfiguration configuration,
ServerCoordinators coordinators,
std::vector<WorkerDetails>& workers, int extraTlogEligibleZones,
int minReplicasRemaining, int oldLogFaultTolerance,
int minReplicasRemaining, int oldLogFaultTolerance,
int fullyReplicatedRegions,
bool underMaintenance) {
JsonBuilderObject statusObj;
// without losing data
int32_t maxZoneFailures = configuration.maxZoneFailuresTolerated();
int32_t maxZoneFailures = configuration.maxZoneFailuresTolerated(fullyReplicatedRegions, false);
if(underMaintenance) {
maxZoneFailures--;
}
@ -2145,8 +2163,14 @@ static JsonBuilderObject faultToleranceStatusFetcher(DatabaseConfiguration confi
// oldLogFaultTolerance means max failures we can tolerate to lose logs data. -1 means we lose data or availability.
zoneFailuresWithoutLosingData = std::max(std::min(zoneFailuresWithoutLosingData, oldLogFaultTolerance), -1);
statusObj["max_zone_failures_without_losing_data"] = zoneFailuresWithoutLosingData;
int32_t maxAvaiabilityZoneFailures = configuration.maxZoneFailuresTolerated(fullyReplicatedRegions, true);
if(underMaintenance) {
maxAvaiabilityZoneFailures--;
}
statusObj["max_zone_failures_without_losing_availability"] =
std::max(std::min(extraTlogEligibleZones, zoneFailuresWithoutLosingData), -1);
std::max(std::min(maxAvaiabilityZoneFailures,std::min(extraTlogEligibleZones, zoneFailuresWithoutLosingData)), -1);
return statusObj;
}
@ -2323,7 +2347,7 @@ ACTOR Future<JsonBuilderObject> lockedStatusFetcher(Reference<AsyncVar<ServerDBI
return statusObj;
}
ACTOR Future<Optional<Value>> getActivePrimaryDC(Database cx, JsonBuilderArray* messages) {
ACTOR Future<Optional<Value>> getActivePrimaryDC(Database cx, int* fullyReplicatedRegions, JsonBuilderArray* messages) {
state ReadYourWritesTransaction tr(cx);
state Future<Void> readTimeout = delay(5); // so that we won't loop forever
@ -2334,12 +2358,17 @@ ACTOR Future<Optional<Value>> getActivePrimaryDC(Database cx, JsonBuilderArray*
}
tr.setOption(FDBTransactionOptions::READ_SYSTEM_KEYS);
tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
Optional<Value> res = wait(timeoutError(tr.get(primaryDatacenterKey), 5));
if (!res.present()) {
state Future<Standalone<RangeResultRef>> fReplicaKeys = tr.getRange(datacenterReplicasKeys, CLIENT_KNOBS->TOO_MANY);
state Future<Optional<Value>> fPrimaryDatacenterKey = tr.get(primaryDatacenterKey);
wait(timeoutError(success(fPrimaryDatacenterKey) && success(fReplicaKeys), 5));
*fullyReplicatedRegions = fReplicaKeys.get().size();
if (!fPrimaryDatacenterKey.get().present()) {
messages->push_back(
JsonString::makeMessage("primary_dc_missing", "Unable to determine primary datacenter."));
}
return res;
return fPrimaryDatacenterKey.get();
} catch (Error& e) {
if (e.code() == error_code_timed_out) {
messages->push_back(
@ -2533,7 +2562,8 @@ ACTOR Future<StatusReply> clusterGetStatus(
state Future<ErrorOr<vector<std::pair<GrvProxyInterface, EventMap>>>> grvProxyFuture = errorOr(getGrvProxiesAndMetrics(db, address_workers));
state int minReplicasRemaining = -1;
state Future<Optional<Value>> primaryDCFO = getActivePrimaryDC(cx, &messages);
state int fullyReplicatedRegions = -1;
state Future<Optional<Value>> primaryDCFO = getActivePrimaryDC(cx, &fullyReplicatedRegions, &messages);
std::vector<Future<JsonBuilderObject>> futures2;
futures2.push_back(dataStatusFetcher(ddWorker, configuration.get(), &minReplicasRemaining));
futures2.push_back(workloadStatusFetcher(db, workers, mWorker, rkWorker, &qos, &data_overlay, &status_incomplete_reasons, storageServerFuture));
@ -2541,6 +2571,7 @@ ACTOR Future<StatusReply> clusterGetStatus(
futures2.push_back(lockedStatusFetcher(db, &messages, &status_incomplete_reasons));
futures2.push_back(clusterSummaryStatisticsFetcher(pMetrics, storageServerFuture, tLogFuture, &status_incomplete_reasons));
state std::vector<JsonBuilderObject> workerStatuses = wait(getAll(futures2));
wait(success(primaryDCFO));
int logFaultTolerance = 100;
if (db->get().recoveryState >= RecoveryState::ACCEPTING_COMMITS) {
@ -2551,13 +2582,12 @@ ACTOR Future<StatusReply> clusterGetStatus(
int extraTlogEligibleZones = getExtraTLogEligibleZones(workers, configuration.get());
statusObj["fault_tolerance"] = faultToleranceStatusFetcher(
configuration.get(), coordinators, workers, extraTlogEligibleZones, minReplicasRemaining,
logFaultTolerance, loadResult.present() && loadResult.get().healthyZone.present());
logFaultTolerance, fullyReplicatedRegions, loadResult.present() && loadResult.get().healthyZone.present());
}
state JsonBuilderObject configObj =
configurationFetcher(configuration, coordinators, &status_incomplete_reasons);
wait(success(primaryDCFO));
if (primaryDCFO.get().present()) {
statusObj["active_primary_dc"] = primaryDCFO.get().get();
}

View File

@ -1020,6 +1020,11 @@ ACTOR Future<Void> getValueQ( StorageServer* data, GetValueRequest req ) {
return Void();
};
// Pessimistic estimate the number of overhead bytes used by each
// watch. Watch key references are stored in an AsyncMap<Key,bool>, and actors
// must be kept alive until the watch is finished.
static constexpr size_t WATCH_OVERHEAD_BYTES = 1000;
ACTOR Future<Void> watchValue_impl( StorageServer* data, WatchValueRequest req, SpanID parent ) {
state Location spanLocation = "SS:WatchValueImpl"_loc;
state Span span(spanLocation, { parent });
@ -1070,7 +1075,7 @@ ACTOR Future<Void> watchValue_impl( StorageServer* data, WatchValueRequest req,
}
++data->numWatches;
data->watchBytes += ( req.key.expectedSize() + req.value.expectedSize() + 1000 );
data->watchBytes += (req.key.expectedSize() + req.value.expectedSize() + WATCH_OVERHEAD_BYTES);
try {
if(latest < minVersion) {
// If the version we read is less than minVersion, then we may fail to be notified of any changes that occur up to or including minVersion
@ -1083,10 +1088,10 @@ ACTOR Future<Void> watchValue_impl( StorageServer* data, WatchValueRequest req,
}
wait(watchFuture);
--data->numWatches;
data->watchBytes -= ( req.key.expectedSize() + req.value.expectedSize() + 1000 );
data->watchBytes -= (req.key.expectedSize() + req.value.expectedSize() + WATCH_OVERHEAD_BYTES);
} catch( Error &e ) {
--data->numWatches;
data->watchBytes -= ( req.key.expectedSize() + req.value.expectedSize() + 1000 );
data->watchBytes -= (req.key.expectedSize() + req.value.expectedSize() + WATCH_OVERHEAD_BYTES);
throw;
}
} catch( Error &e ) {

View File

@ -85,11 +85,12 @@ Error systemErrorCodeToError();
inline Error actor_cancelled() { return Error( error_code_operation_cancelled ); }
enum { error_code_actor_cancelled = error_code_operation_cancelled };
extern Error internal_error_impl( const char* file, int line );
extern Error internal_error_impl(const char* file, int line);
extern Error internal_error_impl(const char* msg, const char* file, int line);
extern Error internal_error_impl(const char * a_nm, long long a, const char * op_nm, const char * b_nm, long long b, const char * file, int line);
#define inernal_error_msg(msg) internal_error_impl(msg, __FILE__, __LINE__)
#define internal_error() internal_error_impl(__FILE__, __LINE__)
#define internal_error_msg(msg) internal_error_impl(msg, __FILE__, __LINE__)
extern bool isAssertDisabled( int line );
//#define ASSERT( condition ) ((void)0)

View File

@ -204,7 +204,16 @@ public:
}
}
ThreadSingleAssignmentVarBase() : status(Unset), callback(nullptr), valueReferenceCount(0) {} //, referenceCount(1) {}
void blockUntilReadyCheckOnMainThread() {
if (!isReady()) {
if (g_network->isOnMainThread()) {
throw blocked_from_network_thread();
}
BlockCallback cb(*this);
}
}
ThreadSingleAssignmentVarBase() : status(Unset), callback(NULL), valueReferenceCount(0) {} //, referenceCount(1) {}
~ThreadSingleAssignmentVarBase() {
this->mutex.assertNotEntered();
@ -310,12 +319,12 @@ public:
}
virtual void cancel() {
// Cancels the action and decrements the reference count by 1
// The if statement is just an optimization. It's ok if we take the wrong path due to a race
if(isReadyUnsafe())
delref();
else
onMainThreadVoid( [this](){ this->cancelFuture.cancel(); this->delref(); }, nullptr );
onMainThreadVoid(
[this]() {
this->cancelFuture.cancel();
this->delref();
},
nullptr);
}
void releaseMemory() {
@ -329,6 +338,7 @@ private:
int32_t valueReferenceCount;
protected:
// The caller of any of these *Unsafe functions should be holding |mutex|
bool isReadyUnsafe() const { return status >= Set; }
bool isErrorUnsafe() const { return status == ErrorSet; }
bool canBeSetUnsafe() const { return status == Unset; }
@ -426,6 +436,8 @@ public:
sav->blockUntilReady();
}
void blockUntilReadyCheckOnMainThread() { sav->blockUntilReadyCheckOnMainThread(); }
bool isValid() const {
return sav != 0;
}

View File

@ -145,6 +145,7 @@ ERROR( environment_variable_network_option_failed, 2022, "Environment variable n
ERROR( transaction_read_only, 2023, "Attempted to commit a transaction specified as read-only" )
ERROR( invalid_cache_eviction_policy, 2024, "Invalid cache eviction policy, only random and lru are supported" )
ERROR( network_cannot_be_restarted, 2025, "Network can only be started once" )
ERROR( blocked_from_network_thread, 2026, "Detected a deadlock in a callback called from the network thread" )
ERROR( incompatible_protocol_version, 2100, "Incompatible protocol version" )
ERROR( transaction_too_large, 2101, "Transaction exceeds byte limit" )

View File

@ -32,7 +32,7 @@
<Wix xmlns='http://schemas.microsoft.com/wix/2006/wi'>
<Product Name='$(var.Title)'
Id='{1377F0A0-D1AC-4B72-ADA7-7180D002A307}'
Id='{0AB36B0F-2187-4ECD-9E7E-983EDD966CEB}'
UpgradeCode='{A95EA002-686E-4164-8356-C715B7F8B1C8}'
Version='$(var.Version)'
Manufacturer='$(var.Manufacturer)'