Merge remote-tracking branch 'origin/master' into paxos-config-db
This commit is contained in:
commit
658233d0e0
|
@ -77,19 +77,37 @@ add_dependencies(packages python_package)
|
||||||
|
|
||||||
if (NOT WIN32 AND NOT OPEN_FOR_IDE)
|
if (NOT WIN32 AND NOT OPEN_FOR_IDE)
|
||||||
add_fdbclient_test(
|
add_fdbclient_test(
|
||||||
NAME fdbcli_tests
|
NAME single_process_fdbcli_tests
|
||||||
COMMAND ${CMAKE_SOURCE_DIR}/bindings/python/tests/fdbcli_tests.py
|
COMMAND ${CMAKE_SOURCE_DIR}/bindings/python/tests/fdbcli_tests.py
|
||||||
${CMAKE_BINARY_DIR}/bin/fdbcli
|
${CMAKE_BINARY_DIR}
|
||||||
@CLUSTER_FILE@
|
@CLUSTER_FILE@
|
||||||
1
|
|
||||||
)
|
)
|
||||||
add_fdbclient_test(
|
add_fdbclient_test(
|
||||||
NAME multi_process_fdbcli_tests
|
NAME multi_process_fdbcli_tests
|
||||||
PROCESS_NUMBER 5
|
PROCESS_NUMBER 5
|
||||||
TEST_TIMEOUT 120 # The test can take near to 1 minutes sometime, set timeout to 2 minutes to be safe
|
TEST_TIMEOUT 120 # The test can take near to 1 minutes sometime, set timeout to 2 minutes to be safe
|
||||||
COMMAND ${CMAKE_SOURCE_DIR}/bindings/python/tests/fdbcli_tests.py
|
COMMAND ${CMAKE_SOURCE_DIR}/bindings/python/tests/fdbcli_tests.py
|
||||||
${CMAKE_BINARY_DIR}/bin/fdbcli
|
${CMAKE_BINARY_DIR}
|
||||||
@CLUSTER_FILE@
|
@CLUSTER_FILE@
|
||||||
5
|
5
|
||||||
)
|
)
|
||||||
|
if (TARGET external_client) # external_client copies fdb_c to bindings/c/libfdb_c.so
|
||||||
|
add_fdbclient_test(
|
||||||
|
NAME single_process_external_client_fdbcli_tests
|
||||||
|
COMMAND ${CMAKE_SOURCE_DIR}/bindings/python/tests/fdbcli_tests.py
|
||||||
|
${CMAKE_BINARY_DIR}
|
||||||
|
@CLUSTER_FILE@
|
||||||
|
--external-client-library ${CMAKE_BINARY_DIR}/bindings/c/libfdb_c.so
|
||||||
|
)
|
||||||
|
add_fdbclient_test(
|
||||||
|
NAME multi_process_external_client_fdbcli_tests
|
||||||
|
PROCESS_NUMBER 5
|
||||||
|
TEST_TIMEOUT 120 # The test can take near to 1 minutes sometime, set timeout to 2 minutes to be safe
|
||||||
|
COMMAND ${CMAKE_SOURCE_DIR}/bindings/python/tests/fdbcli_tests.py
|
||||||
|
${CMAKE_BINARY_DIR}
|
||||||
|
@CLUSTER_FILE@
|
||||||
|
5
|
||||||
|
--external-client-library ${CMAKE_BINARY_DIR}/bindings/c/libfdb_c.so
|
||||||
|
)
|
||||||
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
|
@ -1,14 +1,17 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
|
import os
|
||||||
import subprocess
|
import subprocess
|
||||||
import logging
|
import logging
|
||||||
import functools
|
import functools
|
||||||
import json
|
import json
|
||||||
import time
|
import time
|
||||||
import random
|
import random
|
||||||
|
from argparse import ArgumentParser, RawDescriptionHelpFormatter
|
||||||
|
|
||||||
def enable_logging(level=logging.ERROR):
|
|
||||||
|
def enable_logging(level=logging.DEBUG):
|
||||||
"""Enable logging in the function with the specified logging level
|
"""Enable logging in the function with the specified logging level
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
|
@ -16,7 +19,7 @@ def enable_logging(level=logging.ERROR):
|
||||||
"""
|
"""
|
||||||
def func_decorator(func):
|
def func_decorator(func):
|
||||||
@functools.wraps(func)
|
@functools.wraps(func)
|
||||||
def wrapper(*args,**kwargs):
|
def wrapper(*args, **kwargs):
|
||||||
# initialize logger
|
# initialize logger
|
||||||
logger = logging.getLogger(func.__name__)
|
logger = logging.getLogger(func.__name__)
|
||||||
logger.setLevel(level)
|
logger.setLevel(level)
|
||||||
|
@ -32,6 +35,7 @@ def enable_logging(level=logging.ERROR):
|
||||||
return wrapper
|
return wrapper
|
||||||
return func_decorator
|
return func_decorator
|
||||||
|
|
||||||
|
|
||||||
def run_fdbcli_command(*args):
|
def run_fdbcli_command(*args):
|
||||||
"""run the fdbcli statement: fdbcli --exec '<arg1> <arg2> ... <argN>'.
|
"""run the fdbcli statement: fdbcli --exec '<arg1> <arg2> ... <argN>'.
|
||||||
|
|
||||||
|
@ -39,7 +43,8 @@ def run_fdbcli_command(*args):
|
||||||
string: Console output from fdbcli
|
string: Console output from fdbcli
|
||||||
"""
|
"""
|
||||||
commands = command_template + ["{}".format(' '.join(args))]
|
commands = command_template + ["{}".format(' '.join(args))]
|
||||||
return subprocess.run(commands, stdout=subprocess.PIPE).stdout.decode('utf-8').strip()
|
return subprocess.run(commands, stdout=subprocess.PIPE, env=fdbcli_env).stdout.decode('utf-8').strip()
|
||||||
|
|
||||||
|
|
||||||
def run_fdbcli_command_and_get_error(*args):
|
def run_fdbcli_command_and_get_error(*args):
|
||||||
"""run the fdbcli statement: fdbcli --exec '<arg1> <arg2> ... <argN>'.
|
"""run the fdbcli statement: fdbcli --exec '<arg1> <arg2> ... <argN>'.
|
||||||
|
@ -48,7 +53,8 @@ def run_fdbcli_command_and_get_error(*args):
|
||||||
string: Stderr output from fdbcli
|
string: Stderr output from fdbcli
|
||||||
"""
|
"""
|
||||||
commands = command_template + ["{}".format(' '.join(args))]
|
commands = command_template + ["{}".format(' '.join(args))]
|
||||||
return subprocess.run(commands, stdout=subprocess.PIPE, stderr=subprocess.PIPE).stderr.decode('utf-8').strip()
|
return subprocess.run(commands, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=fdbcli_env).stderr.decode('utf-8').strip()
|
||||||
|
|
||||||
|
|
||||||
@enable_logging()
|
@enable_logging()
|
||||||
def advanceversion(logger):
|
def advanceversion(logger):
|
||||||
|
@ -72,6 +78,7 @@ def advanceversion(logger):
|
||||||
logger.debug("Read version: {}".format(version4))
|
logger.debug("Read version: {}".format(version4))
|
||||||
assert version4 >= version3
|
assert version4 >= version3
|
||||||
|
|
||||||
|
|
||||||
@enable_logging()
|
@enable_logging()
|
||||||
def maintenance(logger):
|
def maintenance(logger):
|
||||||
# expected fdbcli output when running 'maintenance' while there's no ongoing maintenance
|
# expected fdbcli output when running 'maintenance' while there's no ongoing maintenance
|
||||||
|
@ -94,6 +101,7 @@ def maintenance(logger):
|
||||||
output3 = run_fdbcli_command('maintenance')
|
output3 = run_fdbcli_command('maintenance')
|
||||||
assert output3 == no_maintenance_output
|
assert output3 == no_maintenance_output
|
||||||
|
|
||||||
|
|
||||||
@enable_logging()
|
@enable_logging()
|
||||||
def setclass(logger):
|
def setclass(logger):
|
||||||
output1 = run_fdbcli_command('setclass')
|
output1 = run_fdbcli_command('setclass')
|
||||||
|
@ -108,11 +116,11 @@ def setclass(logger):
|
||||||
# check class source
|
# check class source
|
||||||
assert 'command_line' in class_type_line_1
|
assert 'command_line' in class_type_line_1
|
||||||
# set class to a random valid type
|
# set class to a random valid type
|
||||||
class_types = ['storage', 'storage', 'transaction', 'resolution',
|
class_types = ['storage', 'storage', 'transaction', 'resolution',
|
||||||
'commit_proxy', 'grv_proxy', 'master', 'stateless', 'log',
|
'commit_proxy', 'grv_proxy', 'master', 'stateless', 'log',
|
||||||
'router', 'cluster_controller', 'fast_restore', 'data_distributor',
|
'router', 'cluster_controller', 'fast_restore', 'data_distributor',
|
||||||
'coordinator', 'ratekeeper', 'storage_cache', 'backup'
|
'coordinator', 'ratekeeper', 'storage_cache', 'backup'
|
||||||
]
|
]
|
||||||
random_class_type = random.choice(class_types)
|
random_class_type = random.choice(class_types)
|
||||||
logger.debug("Change to type: {}".format(random_class_type))
|
logger.debug("Change to type: {}".format(random_class_type))
|
||||||
run_fdbcli_command('setclass', network_address, random_class_type)
|
run_fdbcli_command('setclass', network_address, random_class_type)
|
||||||
|
@ -134,6 +142,7 @@ def setclass(logger):
|
||||||
logger.debug(class_type_line_3)
|
logger.debug(class_type_line_3)
|
||||||
assert class_type_line_3 == class_type_line_1
|
assert class_type_line_3 == class_type_line_1
|
||||||
|
|
||||||
|
|
||||||
@enable_logging()
|
@enable_logging()
|
||||||
def lockAndUnlock(logger):
|
def lockAndUnlock(logger):
|
||||||
# lock an unlocked database, should be successful
|
# lock an unlocked database, should be successful
|
||||||
|
@ -148,7 +157,7 @@ def lockAndUnlock(logger):
|
||||||
output2 = run_fdbcli_command_and_get_error("lock")
|
output2 = run_fdbcli_command_and_get_error("lock")
|
||||||
assert output2 == 'ERROR: Database is locked (1038)'
|
assert output2 == 'ERROR: Database is locked (1038)'
|
||||||
# unlock the database
|
# unlock the database
|
||||||
process = subprocess.Popen(command_template + ['unlock ' + lock_uid], stdin = subprocess.PIPE, stdout = subprocess.PIPE)
|
process = subprocess.Popen(command_template + ['unlock ' + lock_uid], stdin=subprocess.PIPE, stdout=subprocess.PIPE, env=fdbcli_env)
|
||||||
line1 = process.stdout.readline()
|
line1 = process.stdout.readline()
|
||||||
# The randome passphrease we need to confirm to proceed the unlocking
|
# The randome passphrease we need to confirm to proceed the unlocking
|
||||||
line2 = process.stdout.readline()
|
line2 = process.stdout.readline()
|
||||||
|
@ -159,6 +168,7 @@ def lockAndUnlock(logger):
|
||||||
assert output3.decode('utf-8').strip() == 'Database unlocked.'
|
assert output3.decode('utf-8').strip() == 'Database unlocked.'
|
||||||
assert not get_value_from_status_json(True, 'cluster', 'database_lock_state', 'locked')
|
assert not get_value_from_status_json(True, 'cluster', 'database_lock_state', 'locked')
|
||||||
|
|
||||||
|
|
||||||
@enable_logging()
|
@enable_logging()
|
||||||
def kill(logger):
|
def kill(logger):
|
||||||
output1 = run_fdbcli_command('kill')
|
output1 = run_fdbcli_command('kill')
|
||||||
|
@ -168,11 +178,11 @@ def kill(logger):
|
||||||
address = lines[1]
|
address = lines[1]
|
||||||
logger.debug("Address: {}".format(address))
|
logger.debug("Address: {}".format(address))
|
||||||
old_generation = get_value_from_status_json(False, 'cluster', 'generation')
|
old_generation = get_value_from_status_json(False, 'cluster', 'generation')
|
||||||
# This is currently an issue with fdbcli,
|
# This is currently an issue with fdbcli,
|
||||||
# where you need to first run 'kill' to initialize processes' list
|
# where you need to first run 'kill' to initialize processes' list
|
||||||
# and then specify the certain process to kill
|
# and then specify the certain process to kill
|
||||||
process = subprocess.Popen(command_template[:-1], stdin = subprocess.PIPE, stdout = subprocess.PIPE)
|
process = subprocess.Popen(command_template[:-1], stdin=subprocess.PIPE, stdout=subprocess.PIPE, env=fdbcli_env)
|
||||||
#
|
#
|
||||||
output2, err = process.communicate(input='kill; kill {}\n'.format(address).encode())
|
output2, err = process.communicate(input='kill; kill {}\n'.format(address).encode())
|
||||||
logger.debug(output2)
|
logger.debug(output2)
|
||||||
# wait for a second for the cluster recovery
|
# wait for a second for the cluster recovery
|
||||||
|
@ -181,6 +191,7 @@ def kill(logger):
|
||||||
logger.debug("Old: {}, New: {}".format(old_generation, new_generation))
|
logger.debug("Old: {}, New: {}".format(old_generation, new_generation))
|
||||||
assert new_generation > old_generation
|
assert new_generation > old_generation
|
||||||
|
|
||||||
|
|
||||||
@enable_logging()
|
@enable_logging()
|
||||||
def suspend(logger):
|
def suspend(logger):
|
||||||
output1 = run_fdbcli_command('suspend')
|
output1 = run_fdbcli_command('suspend')
|
||||||
|
@ -200,7 +211,7 @@ def suspend(logger):
|
||||||
assert len(pinfo) == 1
|
assert len(pinfo) == 1
|
||||||
pid = pinfo[0].split(' ')[0]
|
pid = pinfo[0].split(' ')[0]
|
||||||
logger.debug("Pid: {}".format(pid))
|
logger.debug("Pid: {}".format(pid))
|
||||||
process = subprocess.Popen(command_template[:-1], stdin = subprocess.PIPE, stdout = subprocess.PIPE)
|
process = subprocess.Popen(command_template[:-1], stdin=subprocess.PIPE, stdout=subprocess.PIPE, env=fdbcli_env)
|
||||||
# suspend the process for enough long time
|
# suspend the process for enough long time
|
||||||
output2, err = process.communicate(input='suspend; suspend 3600 {}\n'.format(address).encode())
|
output2, err = process.communicate(input='suspend; suspend 3600 {}\n'.format(address).encode())
|
||||||
# the cluster should be unavailable after the only process being suspended
|
# the cluster should be unavailable after the only process being suspended
|
||||||
|
@ -213,7 +224,7 @@ def suspend(logger):
|
||||||
kill_output = subprocess.check_output(['kill', pid]).decode().strip()
|
kill_output = subprocess.check_output(['kill', pid]).decode().strip()
|
||||||
logger.debug("Kill result: {}".format(kill_output))
|
logger.debug("Kill result: {}".format(kill_output))
|
||||||
# The process should come back after a few time
|
# The process should come back after a few time
|
||||||
duration = 0 # seconds we already wait
|
duration = 0 # seconds we already wait
|
||||||
while not get_value_from_status_json(False, 'client', 'database_status', 'available') and duration < 60:
|
while not get_value_from_status_json(False, 'client', 'database_status', 'available') and duration < 60:
|
||||||
logger.debug("Sleep for 1 second to wait cluster recovery")
|
logger.debug("Sleep for 1 second to wait cluster recovery")
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
|
@ -221,6 +232,7 @@ def suspend(logger):
|
||||||
# at most after 60 seconds, the cluster should be available
|
# at most after 60 seconds, the cluster should be available
|
||||||
assert get_value_from_status_json(False, 'client', 'database_status', 'available')
|
assert get_value_from_status_json(False, 'client', 'database_status', 'available')
|
||||||
|
|
||||||
|
|
||||||
def get_value_from_status_json(retry, *args):
|
def get_value_from_status_json(retry, *args):
|
||||||
while True:
|
while True:
|
||||||
result = json.loads(run_fdbcli_command('status', 'json'))
|
result = json.loads(run_fdbcli_command('status', 'json'))
|
||||||
|
@ -229,9 +241,10 @@ def get_value_from_status_json(retry, *args):
|
||||||
for arg in args:
|
for arg in args:
|
||||||
assert arg in result
|
assert arg in result
|
||||||
result = result[arg]
|
result = result[arg]
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
@enable_logging()
|
@enable_logging()
|
||||||
def consistencycheck(logger):
|
def consistencycheck(logger):
|
||||||
consistency_check_on_output = 'ConsistencyCheck is on'
|
consistency_check_on_output = 'ConsistencyCheck is on'
|
||||||
|
@ -245,6 +258,7 @@ def consistencycheck(logger):
|
||||||
output3 = run_fdbcli_command('consistencycheck')
|
output3 = run_fdbcli_command('consistencycheck')
|
||||||
assert output3 == consistency_check_on_output
|
assert output3 == consistency_check_on_output
|
||||||
|
|
||||||
|
|
||||||
@enable_logging()
|
@enable_logging()
|
||||||
def cache_range(logger):
|
def cache_range(logger):
|
||||||
# this command is currently experimental
|
# this command is currently experimental
|
||||||
|
@ -252,6 +266,7 @@ def cache_range(logger):
|
||||||
run_fdbcli_command('cache_range', 'set', 'a', 'b')
|
run_fdbcli_command('cache_range', 'set', 'a', 'b')
|
||||||
run_fdbcli_command('cache_range', 'clear', 'a', 'b')
|
run_fdbcli_command('cache_range', 'clear', 'a', 'b')
|
||||||
|
|
||||||
|
|
||||||
@enable_logging()
|
@enable_logging()
|
||||||
def datadistribution(logger):
|
def datadistribution(logger):
|
||||||
output1 = run_fdbcli_command('datadistribution', 'off')
|
output1 = run_fdbcli_command('datadistribution', 'off')
|
||||||
|
@ -271,6 +286,7 @@ def datadistribution(logger):
|
||||||
assert output6 == 'Data distribution is enabled for rebalance.'
|
assert output6 == 'Data distribution is enabled for rebalance.'
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
|
|
||||||
|
|
||||||
@enable_logging()
|
@enable_logging()
|
||||||
def transaction(logger):
|
def transaction(logger):
|
||||||
"""This test will cover the transaction related fdbcli commands.
|
"""This test will cover the transaction related fdbcli commands.
|
||||||
|
@ -280,7 +296,7 @@ def transaction(logger):
|
||||||
"""
|
"""
|
||||||
err1 = run_fdbcli_command_and_get_error('set', 'key', 'value')
|
err1 = run_fdbcli_command_and_get_error('set', 'key', 'value')
|
||||||
assert err1 == 'ERROR: writemode must be enabled to set or clear keys in the database.'
|
assert err1 == 'ERROR: writemode must be enabled to set or clear keys in the database.'
|
||||||
process = subprocess.Popen(command_template[:-1], stdin = subprocess.PIPE, stdout = subprocess.PIPE)
|
process = subprocess.Popen(command_template[:-1], stdin=subprocess.PIPE, stdout=subprocess.PIPE, env=fdbcli_env)
|
||||||
transaction_flow = ['writemode on', 'begin', 'getversion', 'set key value', 'get key', 'commit']
|
transaction_flow = ['writemode on', 'begin', 'getversion', 'set key value', 'get key', 'commit']
|
||||||
output1, _ = process.communicate(input='\n'.join(transaction_flow).encode())
|
output1, _ = process.communicate(input='\n'.join(transaction_flow).encode())
|
||||||
# split the output into lines
|
# split the output into lines
|
||||||
|
@ -299,13 +315,13 @@ def transaction(logger):
|
||||||
output2 = run_fdbcli_command('get', 'key')
|
output2 = run_fdbcli_command('get', 'key')
|
||||||
assert output2 == "`key' is `value'"
|
assert output2 == "`key' is `value'"
|
||||||
# test rollback and read-your-write behavior
|
# test rollback and read-your-write behavior
|
||||||
process = subprocess.Popen(command_template[:-1], stdin = subprocess.PIPE, stdout = subprocess.PIPE)
|
process = subprocess.Popen(command_template[:-1], stdin=subprocess.PIPE, stdout=subprocess.PIPE, env=fdbcli_env)
|
||||||
transaction_flow = [
|
transaction_flow = [
|
||||||
'writemode on', 'begin', 'getrange a z',
|
'writemode on', 'begin', 'getrange a z',
|
||||||
'clear key', 'get key',
|
'clear key', 'get key',
|
||||||
# 'option on READ_YOUR_WRITES_DISABLE', 'get key',
|
# 'option on READ_YOUR_WRITES_DISABLE', 'get key',
|
||||||
'rollback'
|
'rollback'
|
||||||
]
|
]
|
||||||
output3, _ = process.communicate(input='\n'.join(transaction_flow).encode())
|
output3, _ = process.communicate(input='\n'.join(transaction_flow).encode())
|
||||||
lines = list(filter(len, output3.decode().split('\n')))[-5:]
|
lines = list(filter(len, output3.decode().split('\n')))[-5:]
|
||||||
# lines[0] == "Transaction started" and lines[1] == 'Range limited to 25 keys'
|
# lines[0] == "Transaction started" and lines[1] == 'Range limited to 25 keys'
|
||||||
|
@ -316,13 +332,13 @@ def transaction(logger):
|
||||||
output4 = run_fdbcli_command('get', 'key')
|
output4 = run_fdbcli_command('get', 'key')
|
||||||
assert output4 == "`key' is `value'"
|
assert output4 == "`key' is `value'"
|
||||||
# test read_your_write_disable option and clear the inserted key
|
# test read_your_write_disable option and clear the inserted key
|
||||||
process = subprocess.Popen(command_template[:-1], stdin = subprocess.PIPE, stdout = subprocess.PIPE)
|
process = subprocess.Popen(command_template[:-1], stdin=subprocess.PIPE, stdout=subprocess.PIPE, env=fdbcli_env)
|
||||||
transaction_flow = [
|
transaction_flow = [
|
||||||
'writemode on', 'begin',
|
'writemode on', 'begin',
|
||||||
'option on READ_YOUR_WRITES_DISABLE',
|
'option on READ_YOUR_WRITES_DISABLE',
|
||||||
'clear key', 'get key',
|
'clear key', 'get key',
|
||||||
'commit'
|
'commit'
|
||||||
]
|
]
|
||||||
output6, _ = process.communicate(input='\n'.join(transaction_flow).encode())
|
output6, _ = process.communicate(input='\n'.join(transaction_flow).encode())
|
||||||
lines = list(filter(len, output6.decode().split('\n')))[-4:]
|
lines = list(filter(len, output6.decode().split('\n')))[-4:]
|
||||||
assert lines[1] == 'Option enabled for current transaction'
|
assert lines[1] == 'Option enabled for current transaction'
|
||||||
|
@ -332,15 +348,17 @@ def transaction(logger):
|
||||||
output7 = run_fdbcli_command('get', 'key')
|
output7 = run_fdbcli_command('get', 'key')
|
||||||
assert output7 == "`key': not found"
|
assert output7 == "`key': not found"
|
||||||
|
|
||||||
|
|
||||||
def get_fdb_process_addresses(logger):
|
def get_fdb_process_addresses(logger):
|
||||||
# get all processes' network addresses
|
# get all processes' network addresses
|
||||||
output = run_fdbcli_command('kill')
|
output = run_fdbcli_command('kill')
|
||||||
logger.debug(output)
|
logger.debug(output)
|
||||||
# except the first line, each line is one process
|
# except the first line, each line is one process
|
||||||
addresses = output.split('\n')[1:]
|
addresses = output.split('\n')[1:]
|
||||||
assert len(addresses) == process_number
|
assert len(addresses) == args.process_number
|
||||||
return addresses
|
return addresses
|
||||||
|
|
||||||
|
|
||||||
@enable_logging(logging.DEBUG)
|
@enable_logging(logging.DEBUG)
|
||||||
def coordinators(logger):
|
def coordinators(logger):
|
||||||
# we should only have one coordinator for now
|
# we should only have one coordinator for now
|
||||||
|
@ -368,6 +386,7 @@ def coordinators(logger):
|
||||||
assert len(get_value_from_status_json(True, 'client', 'coordinators', 'coordinators')) == 1
|
assert len(get_value_from_status_json(True, 'client', 'coordinators', 'coordinators')) == 1
|
||||||
wait_for_database_available(logger)
|
wait_for_database_available(logger)
|
||||||
|
|
||||||
|
|
||||||
@enable_logging(logging.DEBUG)
|
@enable_logging(logging.DEBUG)
|
||||||
def exclude(logger):
|
def exclude(logger):
|
||||||
# get all processes' network addresses
|
# get all processes' network addresses
|
||||||
|
@ -380,7 +399,7 @@ def exclude(logger):
|
||||||
# randomly pick one and exclude the process
|
# randomly pick one and exclude the process
|
||||||
excluded_address = random.choice(addresses)
|
excluded_address = random.choice(addresses)
|
||||||
# If we see "not enough space" error, use FORCE option to proceed
|
# If we see "not enough space" error, use FORCE option to proceed
|
||||||
# this should be a safe operation as we do not need any storage space for the test
|
# this should be a safe operation as we do not need any storage space for the test
|
||||||
force = False
|
force = False
|
||||||
# sometimes we need to retry the exclude
|
# sometimes we need to retry the exclude
|
||||||
while True:
|
while True:
|
||||||
|
@ -417,6 +436,8 @@ def exclude(logger):
|
||||||
wait_for_database_available(logger)
|
wait_for_database_available(logger)
|
||||||
|
|
||||||
# read the system key 'k', need to enable the option first
|
# read the system key 'k', need to enable the option first
|
||||||
|
|
||||||
|
|
||||||
def read_system_key(k):
|
def read_system_key(k):
|
||||||
output = run_fdbcli_command('option', 'on', 'READ_SYSTEM_KEYS;', 'get', k)
|
output = run_fdbcli_command('option', 'on', 'READ_SYSTEM_KEYS;', 'get', k)
|
||||||
if 'is' not in output:
|
if 'is' not in output:
|
||||||
|
@ -425,11 +446,14 @@ def read_system_key(k):
|
||||||
_, value = output.split(' is ')
|
_, value = output.split(' is ')
|
||||||
return value
|
return value
|
||||||
|
|
||||||
|
|
||||||
@enable_logging()
|
@enable_logging()
|
||||||
def throttle(logger):
|
def throttle(logger):
|
||||||
# no throttled tags at the beginning
|
# no throttled tags at the beginning
|
||||||
no_throttle_tags_output = 'There are no throttled tags'
|
no_throttle_tags_output = 'There are no throttled tags'
|
||||||
assert run_fdbcli_command('throttle', 'list') == no_throttle_tags_output
|
output = run_fdbcli_command('throttle', 'list')
|
||||||
|
logger.debug(output)
|
||||||
|
assert output == no_throttle_tags_output
|
||||||
# test 'throttle enable auto'
|
# test 'throttle enable auto'
|
||||||
run_fdbcli_command('throttle', 'enable', 'auto')
|
run_fdbcli_command('throttle', 'enable', 'auto')
|
||||||
# verify the change is applied by reading the system key
|
# verify the change is applied by reading the system key
|
||||||
|
@ -442,6 +466,7 @@ def throttle(logger):
|
||||||
assert enable_flag == "`0'"
|
assert enable_flag == "`0'"
|
||||||
# TODO : test manual throttling, not easy to do now
|
# TODO : test manual throttling, not easy to do now
|
||||||
|
|
||||||
|
|
||||||
def wait_for_database_available(logger):
|
def wait_for_database_available(logger):
|
||||||
# sometimes the change takes some time to have effect and the database can be unavailable at that time
|
# sometimes the change takes some time to have effect and the database can be unavailable at that time
|
||||||
# this is to wait until the database is available again
|
# this is to wait until the database is available again
|
||||||
|
@ -449,17 +474,38 @@ def wait_for_database_available(logger):
|
||||||
logger.debug("Database unavailable for now, wait for one second")
|
logger.debug("Database unavailable for now, wait for one second")
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
# fdbcli_tests.py <path_to_fdbcli_binary> <path_to_fdb_cluster_file> <process_number>
|
parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter,
|
||||||
assert len(sys.argv) == 4, "Please pass arguments: <path_to_fdbcli_binary> <path_to_fdb_cluster_file> <process_number>"
|
description="""
|
||||||
|
The test calls fdbcli commands through fdbcli --exec "<command>" interactively using subprocess.
|
||||||
|
The outputs from fdbcli are returned and compared to predefined results.
|
||||||
|
Consequently, changing fdbcli outputs or breaking any commands will casue the test to fail.
|
||||||
|
Commands that are easy to test will run against a single process cluster.
|
||||||
|
For complex commands like exclude, they will run against a cluster with multiple(current set to 5) processes.
|
||||||
|
If external_client_library is given, we will disable the local client and use the external client to run fdbcli.
|
||||||
|
""")
|
||||||
|
parser.add_argument('build_dir', metavar='BUILD_DIRECTORY', help='FDB build directory')
|
||||||
|
parser.add_argument('cluster_file', metavar='CLUSTER_FILE', help='FDB cluster file')
|
||||||
|
parser.add_argument('process_number', nargs='?', metavar='PROCESS_NUMBER', help="Number of fdb processes", type=int, default=1)
|
||||||
|
parser.add_argument('--external-client-library', '-e', metavar='EXTERNAL_CLIENT_LIBRARY_PATH', help="External client library path")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
# keep current environment variables
|
||||||
|
fdbcli_env = os.environ.copy()
|
||||||
|
# set external client library if provided
|
||||||
|
if args.external_client_library:
|
||||||
|
# disable local client and use the external client library
|
||||||
|
fdbcli_env['FDB_NETWORK_OPTION_DISABLE_LOCAL_CLIENT'] = ''
|
||||||
|
fdbcli_env['FDB_NETWORK_OPTION_EXTERNAL_CLIENT_LIBRARY'] = args.external_client_library
|
||||||
|
|
||||||
# shell command template
|
# shell command template
|
||||||
command_template = [sys.argv[1], '-C', sys.argv[2], '--exec']
|
command_template = [args.build_dir + '/bin/fdbcli', '-C', args.cluster_file, '--exec']
|
||||||
# tests for fdbcli commands
|
# tests for fdbcli commands
|
||||||
# assertions will fail if fdbcli does not work as expected
|
# assertions will fail if fdbcli does not work as expected
|
||||||
process_number = int(sys.argv[3])
|
if args.process_number == 1:
|
||||||
if process_number == 1:
|
|
||||||
# TODO: disable for now, the change can cause the database unavailable
|
# TODO: disable for now, the change can cause the database unavailable
|
||||||
#advanceversion()
|
# advanceversion()
|
||||||
cache_range()
|
cache_range()
|
||||||
consistencycheck()
|
consistencycheck()
|
||||||
datadistribution()
|
datadistribution()
|
||||||
|
@ -471,8 +517,6 @@ if __name__ == '__main__':
|
||||||
transaction()
|
transaction()
|
||||||
throttle()
|
throttle()
|
||||||
else:
|
else:
|
||||||
assert process_number > 1, "Process number should be positive"
|
assert args.process_number > 1, "Process number should be positive"
|
||||||
coordinators()
|
coordinators()
|
||||||
exclude()
|
exclude()
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -42,7 +42,7 @@ else()
|
||||||
set(WITH_TLS OFF)
|
set(WITH_TLS OFF)
|
||||||
endif()
|
endif()
|
||||||
if(WIN32)
|
if(WIN32)
|
||||||
message(STATUS "TLS is temporarilty disabled on macOS while libressl -> openssl transition happens")
|
message(STATUS "TLS is temporarilty disabled on Windows while libressl -> openssl transition happens")
|
||||||
set(WITH_TLS OFF)
|
set(WITH_TLS OFF)
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
|
@ -63,7 +63,7 @@ Source IP:port 0 string The IP and port of the machine where the s
|
||||||
Trace ID 1 uint64 The 64-bit identifier of the trace. All spans in a trace share the same trace ID.
|
Trace ID 1 uint64 The 64-bit identifier of the trace. All spans in a trace share the same trace ID.
|
||||||
Span ID 2 uint64 The 64-bit identifier of the span. All spans have a unique identifier.
|
Span ID 2 uint64 The 64-bit identifier of the span. All spans have a unique identifier.
|
||||||
Start timestamp 3 double The timestamp when the operation represented by the span began.
|
Start timestamp 3 double The timestamp when the operation represented by the span began.
|
||||||
End timestamp 4 double The timestamp when the operation represented by the span ended.
|
Duration 4 double The duration in seconds of the operation represented by the span.
|
||||||
Operation name 5 string The name of the operation the span represents.
|
Operation name 5 string The name of the operation the span represents.
|
||||||
Tags 6 map User defined tags, added manually to specify additional information.
|
Tags 6 map User defined tags, added manually to specify additional information.
|
||||||
Parent span IDs 7 vector (Optional) A list of span IDs representing parents of this span.
|
Parent span IDs 7 vector (Optional) A list of span IDs representing parents of this span.
|
||||||
|
|
|
@ -21,345 +21,16 @@
|
||||||
#include "fdbcli/fdbcli.actor.h"
|
#include "fdbcli/fdbcli.actor.h"
|
||||||
|
|
||||||
#include "fdbclient/IClientApi.h"
|
#include "fdbclient/IClientApi.h"
|
||||||
#include "fdbclient/TagThrottle.h"
|
#include "fdbclient/TagThrottle.actor.h"
|
||||||
#include "fdbclient/Knobs.h"
|
#include "fdbclient/Knobs.h"
|
||||||
#include "fdbclient/SystemData.h"
|
#include "fdbclient/SystemData.h"
|
||||||
#include "fdbclient/CommitTransaction.h"
|
#include "fdbclient/CommitTransaction.h"
|
||||||
|
|
||||||
#include "flow/Arena.h"
|
#include "flow/Arena.h"
|
||||||
#include "flow/FastRef.h"
|
|
||||||
#include "flow/ThreadHelper.actor.h"
|
#include "flow/ThreadHelper.actor.h"
|
||||||
#include "flow/genericactors.actor.h"
|
#include "flow/genericactors.actor.h"
|
||||||
#include "flow/actorcompiler.h" // This must be the last #include.
|
#include "flow/actorcompiler.h" // This must be the last #include.
|
||||||
|
|
||||||
namespace {
|
|
||||||
|
|
||||||
// Helper functions copied from TagThrottle.actor.cpp
|
|
||||||
// The only difference is transactions are changed to go through MultiversionTransaction,
|
|
||||||
// instead of the native Transaction(i.e., RYWTransaction)
|
|
||||||
|
|
||||||
ACTOR Future<bool> getValidAutoEnabled(Reference<ITransaction> tr) {
|
|
||||||
state bool result;
|
|
||||||
loop {
|
|
||||||
Optional<Value> value = wait(safeThreadFutureToFuture(tr->get(tagThrottleAutoEnabledKey)));
|
|
||||||
if (!value.present()) {
|
|
||||||
tr->reset();
|
|
||||||
wait(delay(CLIENT_KNOBS->DEFAULT_BACKOFF));
|
|
||||||
continue;
|
|
||||||
} else if (value.get() == LiteralStringRef("1")) {
|
|
||||||
result = true;
|
|
||||||
} else if (value.get() == LiteralStringRef("0")) {
|
|
||||||
result = false;
|
|
||||||
} else {
|
|
||||||
TraceEvent(SevWarnAlways, "InvalidAutoTagThrottlingValue").detail("Value", value.get());
|
|
||||||
tr->reset();
|
|
||||||
wait(delay(CLIENT_KNOBS->DEFAULT_BACKOFF));
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
ACTOR Future<std::vector<TagThrottleInfo>> getThrottledTags(Reference<IDatabase> db,
|
|
||||||
int limit,
|
|
||||||
bool containsRecommend = false) {
|
|
||||||
state Reference<ITransaction> tr = db->createTransaction();
|
|
||||||
state bool reportAuto = containsRecommend;
|
|
||||||
loop {
|
|
||||||
tr->setOption(FDBTransactionOptions::READ_SYSTEM_KEYS);
|
|
||||||
try {
|
|
||||||
if (!containsRecommend) {
|
|
||||||
wait(store(reportAuto, getValidAutoEnabled(tr)));
|
|
||||||
}
|
|
||||||
state ThreadFuture<RangeResult> f = tr->getRange(
|
|
||||||
reportAuto ? tagThrottleKeys : KeyRangeRef(tagThrottleKeysPrefix, tagThrottleAutoKeysPrefix), limit);
|
|
||||||
RangeResult throttles = wait(safeThreadFutureToFuture(f));
|
|
||||||
std::vector<TagThrottleInfo> results;
|
|
||||||
for (auto throttle : throttles) {
|
|
||||||
results.push_back(TagThrottleInfo(TagThrottleKey::fromKey(throttle.key),
|
|
||||||
TagThrottleValue::fromValue(throttle.value)));
|
|
||||||
}
|
|
||||||
return results;
|
|
||||||
} catch (Error& e) {
|
|
||||||
wait(safeThreadFutureToFuture(tr->onError(e)));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
ACTOR Future<std::vector<TagThrottleInfo>> getRecommendedTags(Reference<IDatabase> db, int limit) {
|
|
||||||
state Reference<ITransaction> tr = db->createTransaction();
|
|
||||||
loop {
|
|
||||||
tr->setOption(FDBTransactionOptions::READ_SYSTEM_KEYS);
|
|
||||||
try {
|
|
||||||
bool enableAuto = wait(getValidAutoEnabled(tr));
|
|
||||||
if (enableAuto) {
|
|
||||||
return std::vector<TagThrottleInfo>();
|
|
||||||
}
|
|
||||||
state ThreadFuture<RangeResult> f =
|
|
||||||
tr->getRange(KeyRangeRef(tagThrottleAutoKeysPrefix, tagThrottleKeys.end), limit);
|
|
||||||
RangeResult throttles = wait(safeThreadFutureToFuture(f));
|
|
||||||
std::vector<TagThrottleInfo> results;
|
|
||||||
for (auto throttle : throttles) {
|
|
||||||
results.push_back(TagThrottleInfo(TagThrottleKey::fromKey(throttle.key),
|
|
||||||
TagThrottleValue::fromValue(throttle.value)));
|
|
||||||
}
|
|
||||||
return results;
|
|
||||||
} catch (Error& e) {
|
|
||||||
wait(safeThreadFutureToFuture(tr->onError(e)));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
ACTOR Future<Void> updateThrottleCount(Reference<ITransaction> tr, int64_t delta) {
|
|
||||||
state ThreadFuture<Optional<Value>> countVal = tr->get(tagThrottleCountKey);
|
|
||||||
state ThreadFuture<Optional<Value>> limitVal = tr->get(tagThrottleLimitKey);
|
|
||||||
|
|
||||||
wait(success(safeThreadFutureToFuture(countVal)) && success(safeThreadFutureToFuture(limitVal)));
|
|
||||||
|
|
||||||
int64_t count = 0;
|
|
||||||
int64_t limit = 0;
|
|
||||||
|
|
||||||
if (countVal.get().present()) {
|
|
||||||
BinaryReader reader(countVal.get().get(), Unversioned());
|
|
||||||
reader >> count;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (limitVal.get().present()) {
|
|
||||||
BinaryReader reader(limitVal.get().get(), Unversioned());
|
|
||||||
reader >> limit;
|
|
||||||
}
|
|
||||||
|
|
||||||
count += delta;
|
|
||||||
|
|
||||||
if (count > limit) {
|
|
||||||
throw too_many_tag_throttles();
|
|
||||||
}
|
|
||||||
|
|
||||||
BinaryWriter writer(Unversioned());
|
|
||||||
writer << count;
|
|
||||||
|
|
||||||
tr->set(tagThrottleCountKey, writer.toValue());
|
|
||||||
return Void();
|
|
||||||
}
|
|
||||||
|
|
||||||
void signalThrottleChange(Reference<ITransaction> tr) {
|
|
||||||
tr->atomicOp(
|
|
||||||
tagThrottleSignalKey, LiteralStringRef("XXXXXXXXXX\x00\x00\x00\x00"), MutationRef::SetVersionstampedValue);
|
|
||||||
}
|
|
||||||
|
|
||||||
ACTOR Future<Void> throttleTags(Reference<IDatabase> db,
|
|
||||||
TagSet tags,
|
|
||||||
double tpsRate,
|
|
||||||
double initialDuration,
|
|
||||||
TagThrottleType throttleType,
|
|
||||||
TransactionPriority priority,
|
|
||||||
Optional<double> expirationTime = Optional<double>(),
|
|
||||||
Optional<TagThrottledReason> reason = Optional<TagThrottledReason>()) {
|
|
||||||
state Reference<ITransaction> tr = db->createTransaction();
|
|
||||||
state Key key = TagThrottleKey(tags, throttleType, priority).toKey();
|
|
||||||
|
|
||||||
ASSERT(initialDuration > 0);
|
|
||||||
|
|
||||||
if (throttleType == TagThrottleType::MANUAL) {
|
|
||||||
reason = TagThrottledReason::MANUAL;
|
|
||||||
}
|
|
||||||
TagThrottleValue throttle(tpsRate,
|
|
||||||
expirationTime.present() ? expirationTime.get() : 0,
|
|
||||||
initialDuration,
|
|
||||||
reason.present() ? reason.get() : TagThrottledReason::UNSET);
|
|
||||||
BinaryWriter wr(IncludeVersion(ProtocolVersion::withTagThrottleValueReason()));
|
|
||||||
wr << throttle;
|
|
||||||
state Value value = wr.toValue();
|
|
||||||
|
|
||||||
loop {
|
|
||||||
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
|
||||||
try {
|
|
||||||
if (throttleType == TagThrottleType::MANUAL) {
|
|
||||||
Optional<Value> oldThrottle = wait(safeThreadFutureToFuture(tr->get(key)));
|
|
||||||
if (!oldThrottle.present()) {
|
|
||||||
wait(updateThrottleCount(tr, 1));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
tr->set(key, value);
|
|
||||||
|
|
||||||
if (throttleType == TagThrottleType::MANUAL) {
|
|
||||||
signalThrottleChange(tr);
|
|
||||||
}
|
|
||||||
|
|
||||||
wait(safeThreadFutureToFuture(tr->commit()));
|
|
||||||
return Void();
|
|
||||||
} catch (Error& e) {
|
|
||||||
wait(safeThreadFutureToFuture(tr->onError(e)));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
ACTOR Future<bool> unthrottleTags(Reference<IDatabase> db,
|
|
||||||
TagSet tags,
|
|
||||||
Optional<TagThrottleType> throttleType,
|
|
||||||
Optional<TransactionPriority> priority) {
|
|
||||||
state Reference<ITransaction> tr = db->createTransaction();
|
|
||||||
|
|
||||||
state std::vector<Key> keys;
|
|
||||||
for (auto p : allTransactionPriorities) {
|
|
||||||
if (!priority.present() || priority.get() == p) {
|
|
||||||
if (!throttleType.present() || throttleType.get() == TagThrottleType::AUTO) {
|
|
||||||
keys.push_back(TagThrottleKey(tags, TagThrottleType::AUTO, p).toKey());
|
|
||||||
}
|
|
||||||
if (!throttleType.present() || throttleType.get() == TagThrottleType::MANUAL) {
|
|
||||||
keys.push_back(TagThrottleKey(tags, TagThrottleType::MANUAL, p).toKey());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
state bool removed = false;
|
|
||||||
|
|
||||||
loop {
|
|
||||||
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
|
||||||
try {
|
|
||||||
state std::vector<Future<Optional<Value>>> values;
|
|
||||||
values.reserve(keys.size());
|
|
||||||
for (auto key : keys) {
|
|
||||||
values.push_back(safeThreadFutureToFuture(tr->get(key)));
|
|
||||||
}
|
|
||||||
|
|
||||||
wait(waitForAll(values));
|
|
||||||
|
|
||||||
int delta = 0;
|
|
||||||
for (int i = 0; i < values.size(); ++i) {
|
|
||||||
if (values[i].get().present()) {
|
|
||||||
if (TagThrottleKey::fromKey(keys[i]).throttleType == TagThrottleType::MANUAL) {
|
|
||||||
delta -= 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
tr->clear(keys[i]);
|
|
||||||
|
|
||||||
// Report that we are removing this tag if we ever see it present.
|
|
||||||
// This protects us from getting confused if the transaction is maybe committed.
|
|
||||||
// It's ok if someone else actually ends up removing this tag at the same time
|
|
||||||
// and we aren't the ones to actually do it.
|
|
||||||
removed = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (delta != 0) {
|
|
||||||
wait(updateThrottleCount(tr, delta));
|
|
||||||
}
|
|
||||||
if (removed) {
|
|
||||||
signalThrottleChange(tr);
|
|
||||||
wait(safeThreadFutureToFuture(tr->commit()));
|
|
||||||
}
|
|
||||||
|
|
||||||
return removed;
|
|
||||||
} catch (Error& e) {
|
|
||||||
wait(safeThreadFutureToFuture(tr->onError(e)));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
ACTOR Future<Void> enableAuto(Reference<IDatabase> db, bool enabled) {
|
|
||||||
state Reference<ITransaction> tr = db->createTransaction();
|
|
||||||
|
|
||||||
loop {
|
|
||||||
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
|
||||||
try {
|
|
||||||
Optional<Value> value = wait(safeThreadFutureToFuture(tr->get(tagThrottleAutoEnabledKey)));
|
|
||||||
if (!value.present() || (enabled && value.get() != LiteralStringRef("1")) ||
|
|
||||||
(!enabled && value.get() != LiteralStringRef("0"))) {
|
|
||||||
tr->set(tagThrottleAutoEnabledKey, LiteralStringRef(enabled ? "1" : "0"));
|
|
||||||
signalThrottleChange(tr);
|
|
||||||
|
|
||||||
wait(safeThreadFutureToFuture(tr->commit()));
|
|
||||||
}
|
|
||||||
return Void();
|
|
||||||
} catch (Error& e) {
|
|
||||||
wait(safeThreadFutureToFuture(tr->onError(e)));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
ACTOR Future<bool> unthrottleMatchingThrottles(Reference<IDatabase> db,
|
|
||||||
KeyRef beginKey,
|
|
||||||
KeyRef endKey,
|
|
||||||
Optional<TransactionPriority> priority,
|
|
||||||
bool onlyExpiredThrottles) {
|
|
||||||
state Reference<ITransaction> tr = db->createTransaction();
|
|
||||||
|
|
||||||
state KeySelector begin = firstGreaterOrEqual(beginKey);
|
|
||||||
state KeySelector end = firstGreaterOrEqual(endKey);
|
|
||||||
|
|
||||||
state bool removed = false;
|
|
||||||
|
|
||||||
loop {
|
|
||||||
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
|
||||||
try {
|
|
||||||
// holds memory of the RangeResult
|
|
||||||
state ThreadFuture<RangeResult> f = tr->getRange(begin, end, 1000);
|
|
||||||
state RangeResult tags = wait(safeThreadFutureToFuture(f));
|
|
||||||
state uint64_t unthrottledTags = 0;
|
|
||||||
uint64_t manualUnthrottledTags = 0;
|
|
||||||
for (auto tag : tags) {
|
|
||||||
if (onlyExpiredThrottles) {
|
|
||||||
double expirationTime = TagThrottleValue::fromValue(tag.value).expirationTime;
|
|
||||||
if (expirationTime == 0 || expirationTime > now()) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
TagThrottleKey key = TagThrottleKey::fromKey(tag.key);
|
|
||||||
if (priority.present() && key.priority != priority.get()) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (key.throttleType == TagThrottleType::MANUAL) {
|
|
||||||
++manualUnthrottledTags;
|
|
||||||
}
|
|
||||||
|
|
||||||
removed = true;
|
|
||||||
tr->clear(tag.key);
|
|
||||||
unthrottledTags++;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (manualUnthrottledTags > 0) {
|
|
||||||
wait(updateThrottleCount(tr, -manualUnthrottledTags));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (unthrottledTags > 0) {
|
|
||||||
signalThrottleChange(tr);
|
|
||||||
}
|
|
||||||
|
|
||||||
wait(safeThreadFutureToFuture(tr->commit()));
|
|
||||||
|
|
||||||
if (!tags.more) {
|
|
||||||
return removed;
|
|
||||||
}
|
|
||||||
|
|
||||||
ASSERT(tags.size() > 0);
|
|
||||||
begin = KeySelector(firstGreaterThan(tags[tags.size() - 1].key), tags.arena());
|
|
||||||
} catch (Error& e) {
|
|
||||||
wait(safeThreadFutureToFuture(tr->onError(e)));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Future<bool> unthrottleAll(Reference<IDatabase> db,
|
|
||||||
Optional<TagThrottleType> tagThrottleType,
|
|
||||||
Optional<TransactionPriority> priority) {
|
|
||||||
KeyRef begin = tagThrottleKeys.begin;
|
|
||||||
KeyRef end = tagThrottleKeys.end;
|
|
||||||
|
|
||||||
if (tagThrottleType.present() && tagThrottleType == TagThrottleType::AUTO) {
|
|
||||||
begin = tagThrottleAutoKeysPrefix;
|
|
||||||
} else if (tagThrottleType.present() && tagThrottleType == TagThrottleType::MANUAL) {
|
|
||||||
end = tagThrottleAutoKeysPrefix;
|
|
||||||
}
|
|
||||||
|
|
||||||
return unthrottleMatchingThrottles(db, begin, end, priority, false);
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace
|
|
||||||
|
|
||||||
namespace fdb_cli {
|
namespace fdb_cli {
|
||||||
|
|
||||||
ACTOR Future<bool> throttleCommandActor(Reference<IDatabase> db, std::vector<StringRef> tokens) {
|
ACTOR Future<bool> throttleCommandActor(Reference<IDatabase> db, std::vector<StringRef> tokens) {
|
||||||
|
@ -403,11 +74,11 @@ ACTOR Future<bool> throttleCommandActor(Reference<IDatabase> db, std::vector<Str
|
||||||
|
|
||||||
state std::vector<TagThrottleInfo> tags;
|
state std::vector<TagThrottleInfo> tags;
|
||||||
if (reportThrottled && reportRecommended) {
|
if (reportThrottled && reportRecommended) {
|
||||||
wait(store(tags, getThrottledTags(db, throttleListLimit, true)));
|
wait(store(tags, ThrottleApi::getThrottledTags(db, throttleListLimit, true)));
|
||||||
} else if (reportThrottled) {
|
} else if (reportThrottled) {
|
||||||
wait(store(tags, getThrottledTags(db, throttleListLimit)));
|
wait(store(tags, ThrottleApi::getThrottledTags(db, throttleListLimit)));
|
||||||
} else if (reportRecommended) {
|
} else if (reportRecommended) {
|
||||||
wait(store(tags, getRecommendedTags(db, throttleListLimit)));
|
wait(store(tags, ThrottleApi::getRecommendedTags(db, throttleListLimit)));
|
||||||
}
|
}
|
||||||
|
|
||||||
bool anyLogged = false;
|
bool anyLogged = false;
|
||||||
|
@ -509,7 +180,7 @@ ACTOR Future<bool> throttleCommandActor(Reference<IDatabase> db, std::vector<Str
|
||||||
TagSet tags;
|
TagSet tags;
|
||||||
tags.addTag(tokens[3]);
|
tags.addTag(tokens[3]);
|
||||||
|
|
||||||
wait(throttleTags(db, tags, tpsRate, duration, TagThrottleType::MANUAL, priority));
|
wait(ThrottleApi::throttleTags(db, tags, tpsRate, duration, TagThrottleType::MANUAL, priority));
|
||||||
printf("Tag `%s' has been throttled\n", tokens[3].toString().c_str());
|
printf("Tag `%s' has been throttled\n", tokens[3].toString().c_str());
|
||||||
} else if (tokencmp(tokens[1], "off")) {
|
} else if (tokencmp(tokens[1], "off")) {
|
||||||
int nextIndex = 2;
|
int nextIndex = 2;
|
||||||
|
@ -586,7 +257,7 @@ ACTOR Future<bool> throttleCommandActor(Reference<IDatabase> db, std::vector<Str
|
||||||
priority.present() ? format(" at %s priority", transactionPriorityToString(priority.get(), false)) : "";
|
priority.present() ? format(" at %s priority", transactionPriorityToString(priority.get(), false)) : "";
|
||||||
|
|
||||||
if (tags.size() > 0) {
|
if (tags.size() > 0) {
|
||||||
bool success = wait(unthrottleTags(db, tags, throttleType, priority));
|
bool success = wait(ThrottleApi::unthrottleTags(db, tags, throttleType, priority));
|
||||||
if (success) {
|
if (success) {
|
||||||
printf("Unthrottled tag `%s'%s\n", tokens[3].toString().c_str(), priorityString.c_str());
|
printf("Unthrottled tag `%s'%s\n", tokens[3].toString().c_str(), priorityString.c_str());
|
||||||
} else {
|
} else {
|
||||||
|
@ -596,7 +267,7 @@ ACTOR Future<bool> throttleCommandActor(Reference<IDatabase> db, std::vector<Str
|
||||||
priorityString.c_str());
|
priorityString.c_str());
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
bool unthrottled = wait(unthrottleAll(db, throttleType, priority));
|
bool unthrottled = wait(ThrottleApi::unthrottleAll(db, throttleType, priority));
|
||||||
if (unthrottled) {
|
if (unthrottled) {
|
||||||
printf("Unthrottled all %sthrottled tags%s\n", throttleTypeString, priorityString.c_str());
|
printf("Unthrottled all %sthrottled tags%s\n", throttleTypeString, priorityString.c_str());
|
||||||
} else {
|
} else {
|
||||||
|
@ -626,7 +297,7 @@ ACTOR Future<bool> throttleCommandActor(Reference<IDatabase> db, std::vector<Str
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
state bool autoTagThrottlingEnabled = tokencmp(tokens[1], "enable");
|
state bool autoTagThrottlingEnabled = tokencmp(tokens[1], "enable");
|
||||||
wait(enableAuto(db, autoTagThrottlingEnabled));
|
wait(ThrottleApi::enableAuto(db, autoTagThrottlingEnabled));
|
||||||
printf("Automatic tag throttling has been %s\n", autoTagThrottlingEnabled ? "enabled" : "disabled");
|
printf("Automatic tag throttling has been %s\n", autoTagThrottlingEnabled ? "enabled" : "disabled");
|
||||||
} else {
|
} else {
|
||||||
printUsage(tokens[0]);
|
printUsage(tokens[0]);
|
||||||
|
|
|
@ -36,7 +36,7 @@
|
||||||
#include "fdbclient/Schemas.h"
|
#include "fdbclient/Schemas.h"
|
||||||
#include "fdbclient/CoordinationInterface.h"
|
#include "fdbclient/CoordinationInterface.h"
|
||||||
#include "fdbclient/FDBOptions.g.h"
|
#include "fdbclient/FDBOptions.g.h"
|
||||||
#include "fdbclient/TagThrottle.h"
|
#include "fdbclient/TagThrottle.actor.h"
|
||||||
#include "fdbclient/Tuple.h"
|
#include "fdbclient/Tuple.h"
|
||||||
|
|
||||||
#include "fdbclient/ThreadSafeTransaction.h"
|
#include "fdbclient/ThreadSafeTransaction.h"
|
||||||
|
@ -3195,10 +3195,10 @@ ACTOR template <class T>
|
||||||
Future<T> stopNetworkAfter(Future<T> what) {
|
Future<T> stopNetworkAfter(Future<T> what) {
|
||||||
try {
|
try {
|
||||||
T t = wait(what);
|
T t = wait(what);
|
||||||
g_network->stop();
|
API->stopNetwork();
|
||||||
return t;
|
return t;
|
||||||
} catch (...) {
|
} catch (...) {
|
||||||
g_network->stop();
|
API->stopNetwork();
|
||||||
throw;
|
throw;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -4685,7 +4685,7 @@ int main(int argc, char** argv) {
|
||||||
Future<int> cliFuture = runCli(opt);
|
Future<int> cliFuture = runCli(opt);
|
||||||
Future<Void> timeoutFuture = opt.exit_timeout ? timeExit(opt.exit_timeout) : Never();
|
Future<Void> timeoutFuture = opt.exit_timeout ? timeExit(opt.exit_timeout) : Never();
|
||||||
auto f = stopNetworkAfter(success(cliFuture) || timeoutFuture);
|
auto f = stopNetworkAfter(success(cliFuture) || timeoutFuture);
|
||||||
runNetwork();
|
API->runNetwork();
|
||||||
|
|
||||||
if (cliFuture.isReady()) {
|
if (cliFuture.isReady()) {
|
||||||
return cliFuture.get();
|
return cliFuture.get();
|
||||||
|
|
|
@ -113,7 +113,7 @@ set(FDBCLIENT_SRCS
|
||||||
SystemData.cpp
|
SystemData.cpp
|
||||||
SystemData.h
|
SystemData.h
|
||||||
TagThrottle.actor.cpp
|
TagThrottle.actor.cpp
|
||||||
TagThrottle.h
|
TagThrottle.actor.h
|
||||||
TaskBucket.actor.cpp
|
TaskBucket.actor.cpp
|
||||||
TaskBucket.h
|
TaskBucket.h
|
||||||
TestKnobCollection.cpp
|
TestKnobCollection.cpp
|
||||||
|
|
|
@ -28,7 +28,7 @@
|
||||||
#include "fdbclient/FDBTypes.h"
|
#include "fdbclient/FDBTypes.h"
|
||||||
#include "fdbclient/StorageServerInterface.h"
|
#include "fdbclient/StorageServerInterface.h"
|
||||||
#include "fdbclient/CommitTransaction.h"
|
#include "fdbclient/CommitTransaction.h"
|
||||||
#include "fdbclient/TagThrottle.h"
|
#include "fdbclient/TagThrottle.actor.h"
|
||||||
#include "fdbclient/GlobalConfig.h"
|
#include "fdbclient/GlobalConfig.h"
|
||||||
|
|
||||||
#include "fdbrpc/Stats.h"
|
#include "fdbrpc/Stats.h"
|
||||||
|
|
|
@ -437,6 +437,10 @@ public:
|
||||||
// Requests to the storage server will no longer be duplicated to its pair TSS.
|
// Requests to the storage server will no longer be duplicated to its pair TSS.
|
||||||
void removeTssMapping(StorageServerInterface const& ssi);
|
void removeTssMapping(StorageServerInterface const& ssi);
|
||||||
|
|
||||||
|
// used in template functions to create a transaction
|
||||||
|
using TransactionT = ReadYourWritesTransaction;
|
||||||
|
Reference<TransactionT> createTransaction();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::unordered_map<KeyRef, Reference<WatchMetadata>> watchMap;
|
std::unordered_map<KeyRef, Reference<WatchMetadata>> watchMap;
|
||||||
};
|
};
|
||||||
|
|
|
@ -41,8 +41,8 @@ typedef UID SpanID;
|
||||||
enum {
|
enum {
|
||||||
tagLocalitySpecial = -1, // tag with this locality means it is invalidTag (id=0), txsTag (id=1), or cacheTag (id=2)
|
tagLocalitySpecial = -1, // tag with this locality means it is invalidTag (id=0), txsTag (id=1), or cacheTag (id=2)
|
||||||
tagLocalityLogRouter = -2,
|
tagLocalityLogRouter = -2,
|
||||||
tagLocalityRemoteLog = -3, // tag created by log router for remote tLogs
|
tagLocalityRemoteLog = -3, // tag created by log router for remote (aka. not in Primary DC) tLogs
|
||||||
tagLocalityUpgraded = -4,
|
tagLocalityUpgraded = -4, // tlogs with old log format
|
||||||
tagLocalitySatellite = -5,
|
tagLocalitySatellite = -5,
|
||||||
tagLocalityLogRouterMapped = -6, // The pseudo tag used by log routers to pop the real LogRouter tag (i.e., -2)
|
tagLocalityLogRouterMapped = -6, // The pseudo tag used by log routers to pop the real LogRouter tag (i.e., -2)
|
||||||
tagLocalityTxs = -7,
|
tagLocalityTxs = -7,
|
||||||
|
|
|
@ -88,6 +88,9 @@ public:
|
||||||
|
|
||||||
virtual void addref() = 0;
|
virtual void addref() = 0;
|
||||||
virtual void delref() = 0;
|
virtual void delref() = 0;
|
||||||
|
|
||||||
|
// used in template functions as returned Future type
|
||||||
|
template<class Type> using FutureT = ThreadFuture<Type>;
|
||||||
};
|
};
|
||||||
|
|
||||||
// An interface that represents a connection to a cluster made by a client
|
// An interface that represents a connection to a cluster made by a client
|
||||||
|
@ -115,6 +118,9 @@ public:
|
||||||
virtual ThreadFuture<Void> forceRecoveryWithDataLoss(const StringRef& dcid) = 0;
|
virtual ThreadFuture<Void> forceRecoveryWithDataLoss(const StringRef& dcid) = 0;
|
||||||
// Management API, create snapshot
|
// Management API, create snapshot
|
||||||
virtual ThreadFuture<Void> createSnapshot(const StringRef& uid, const StringRef& snapshot_command) = 0;
|
virtual ThreadFuture<Void> createSnapshot(const StringRef& uid, const StringRef& snapshot_command) = 0;
|
||||||
|
|
||||||
|
// used in template functions as the Transaction type that can be created through createTransaction()
|
||||||
|
using TransactionT = ITransaction;
|
||||||
};
|
};
|
||||||
|
|
||||||
// An interface that presents the top-level FDB client API as exposed through the C bindings
|
// An interface that presents the top-level FDB client API as exposed through the C bindings
|
||||||
|
|
|
@ -373,7 +373,8 @@ void traceTSSErrors(const char* name, UID tssId, const std::unordered_map<int, u
|
||||||
ACTOR Future<Void> databaseLogger(DatabaseContext* cx) {
|
ACTOR Future<Void> databaseLogger(DatabaseContext* cx) {
|
||||||
state double lastLogged = 0;
|
state double lastLogged = 0;
|
||||||
loop {
|
loop {
|
||||||
wait(delay(CLIENT_KNOBS->SYSTEM_MONITOR_INTERVAL, TaskPriority::FlushTrace));
|
wait(delay(CLIENT_KNOBS->SYSTEM_MONITOR_INTERVAL, TaskPriority::FlushTrace));
|
||||||
|
|
||||||
TraceEvent ev("TransactionMetrics", cx->dbId);
|
TraceEvent ev("TransactionMetrics", cx->dbId);
|
||||||
|
|
||||||
ev.detail("Elapsed", (lastLogged == 0) ? 0 : now() - lastLogged)
|
ev.detail("Elapsed", (lastLogged == 0) ? 0 : now() - lastLogged)
|
||||||
|
@ -384,6 +385,7 @@ ACTOR Future<Void> databaseLogger(DatabaseContext* cx) {
|
||||||
|
|
||||||
cx->cc.logToTraceEvent(ev);
|
cx->cc.logToTraceEvent(ev);
|
||||||
|
|
||||||
|
ev.detail("LocationCacheEntryCount", cx->locationCache.size());
|
||||||
ev.detail("MeanLatency", cx->latencies.mean())
|
ev.detail("MeanLatency", cx->latencies.mean())
|
||||||
.detail("MedianLatency", cx->latencies.median())
|
.detail("MedianLatency", cx->latencies.median())
|
||||||
.detail("Latency90", cx->latencies.percentile(0.90))
|
.detail("Latency90", cx->latencies.percentile(0.90))
|
||||||
|
@ -6562,3 +6564,7 @@ ACTOR Future<Void> setPerpetualStorageWiggle(Database cx, bool enable, LockAware
|
||||||
}
|
}
|
||||||
return Void();
|
return Void();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Reference<DatabaseContext::TransactionT> DatabaseContext::createTransaction() {
|
||||||
|
return makeReference<ReadYourWritesTransaction>(Database(Reference<DatabaseContext>::addRef(this)));
|
||||||
|
}
|
||||||
|
|
|
@ -106,6 +106,7 @@ public:
|
||||||
inline DatabaseContext* getPtr() const { return db.getPtr(); }
|
inline DatabaseContext* getPtr() const { return db.getPtr(); }
|
||||||
inline DatabaseContext* extractPtr() { return db.extractPtr(); }
|
inline DatabaseContext* extractPtr() { return db.extractPtr(); }
|
||||||
DatabaseContext* operator->() const { return db.getPtr(); }
|
DatabaseContext* operator->() const { return db.getPtr(); }
|
||||||
|
Reference<DatabaseContext> getReference() const { return db; }
|
||||||
|
|
||||||
const UniqueOrderedOptionList<FDBTransactionOptions>& getTransactionDefaults() const;
|
const UniqueOrderedOptionList<FDBTransactionOptions>& getTransactionDefaults() const;
|
||||||
|
|
||||||
|
|
|
@ -22,8 +22,6 @@
|
||||||
#include "fdbclient/PaxosConfigTransaction.h"
|
#include "fdbclient/PaxosConfigTransaction.h"
|
||||||
#include "flow/actorcompiler.h" // must be last include
|
#include "flow/actorcompiler.h" // must be last include
|
||||||
|
|
||||||
namespace {
|
|
||||||
|
|
||||||
// TODO: Some replicas may reply after quorum has already been achieved, and we may want to add them to the readReplicas
|
// TODO: Some replicas may reply after quorum has already been achieved, and we may want to add them to the readReplicas
|
||||||
// list
|
// list
|
||||||
class GetGenerationQuorum {
|
class GetGenerationQuorum {
|
||||||
|
@ -72,8 +70,6 @@ public:
|
||||||
Optional<Version> getLastSeenLiveVersion() const { return lastSeenLiveVersion; }
|
Optional<Version> getLastSeenLiveVersion() const { return lastSeenLiveVersion; }
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace
|
|
||||||
|
|
||||||
class PaxosConfigTransactionImpl {
|
class PaxosConfigTransactionImpl {
|
||||||
ConfigTransactionCommitRequest toCommit;
|
ConfigTransactionCommitRequest toCommit;
|
||||||
Future<GetGenerationQuorum::Result> getGenerationFuture;
|
Future<GetGenerationQuorum::Result> getGenerationFuture;
|
||||||
|
|
|
@ -175,6 +175,9 @@ public:
|
||||||
void setSpecialKeySpaceErrorMsg(const std::string& msg) { specialKeySpaceErrorMsg = msg; }
|
void setSpecialKeySpaceErrorMsg(const std::string& msg) { specialKeySpaceErrorMsg = msg; }
|
||||||
Transaction& getTransaction() { return tr; }
|
Transaction& getTransaction() { return tr; }
|
||||||
|
|
||||||
|
// used in template functions as returned Future type
|
||||||
|
template<typename Type> using FutureT = Future<Type>;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
friend class RYWImpl;
|
friend class RYWImpl;
|
||||||
|
|
||||||
|
|
|
@ -64,6 +64,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
|
||||||
init( TLOG_MESSAGE_BLOCK_BYTES, 10e6 );
|
init( TLOG_MESSAGE_BLOCK_BYTES, 10e6 );
|
||||||
init( TLOG_MESSAGE_BLOCK_OVERHEAD_FACTOR, double(TLOG_MESSAGE_BLOCK_BYTES) / (TLOG_MESSAGE_BLOCK_BYTES - MAX_MESSAGE_SIZE) ); //1.0121466709838096006362758832473
|
init( TLOG_MESSAGE_BLOCK_OVERHEAD_FACTOR, double(TLOG_MESSAGE_BLOCK_BYTES) / (TLOG_MESSAGE_BLOCK_BYTES - MAX_MESSAGE_SIZE) ); //1.0121466709838096006362758832473
|
||||||
init( PEEK_TRACKER_EXPIRATION_TIME, 600 ); if( randomize && BUGGIFY ) PEEK_TRACKER_EXPIRATION_TIME = deterministicRandom()->coinflip() ? 0.1 : 120;
|
init( PEEK_TRACKER_EXPIRATION_TIME, 600 ); if( randomize && BUGGIFY ) PEEK_TRACKER_EXPIRATION_TIME = deterministicRandom()->coinflip() ? 0.1 : 120;
|
||||||
|
init( PEEK_USING_STREAMING, true );
|
||||||
init( PARALLEL_GET_MORE_REQUESTS, 32 ); if( randomize && BUGGIFY ) PARALLEL_GET_MORE_REQUESTS = 2;
|
init( PARALLEL_GET_MORE_REQUESTS, 32 ); if( randomize && BUGGIFY ) PARALLEL_GET_MORE_REQUESTS = 2;
|
||||||
init( MULTI_CURSOR_PRE_FETCH_LIMIT, 10 );
|
init( MULTI_CURSOR_PRE_FETCH_LIMIT, 10 );
|
||||||
init( MAX_QUEUE_COMMIT_BYTES, 15e6 ); if( randomize && BUGGIFY ) MAX_QUEUE_COMMIT_BYTES = 5000;
|
init( MAX_QUEUE_COMMIT_BYTES, 15e6 ); if( randomize && BUGGIFY ) MAX_QUEUE_COMMIT_BYTES = 5000;
|
||||||
|
@ -627,6 +628,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
|
||||||
init( FETCH_KEYS_TOO_LONG_TIME_CRITERIA, 300.0 );
|
init( FETCH_KEYS_TOO_LONG_TIME_CRITERIA, 300.0 );
|
||||||
init( MAX_STORAGE_COMMIT_TIME, 120.0 ); //The max fsync stall time on the storage server and tlog before marking a disk as failed
|
init( MAX_STORAGE_COMMIT_TIME, 120.0 ); //The max fsync stall time on the storage server and tlog before marking a disk as failed
|
||||||
init( RANGESTREAM_LIMIT_BYTES, 2e6 ); if( randomize && BUGGIFY ) RANGESTREAM_LIMIT_BYTES = 1;
|
init( RANGESTREAM_LIMIT_BYTES, 2e6 ); if( randomize && BUGGIFY ) RANGESTREAM_LIMIT_BYTES = 1;
|
||||||
|
init( ENABLE_CLEAR_RANGE_EAGER_READS, true );
|
||||||
|
|
||||||
//Wait Failure
|
//Wait Failure
|
||||||
init( MAX_OUTSTANDING_WAIT_FAILURE_REQUESTS, 250 ); if( randomize && BUGGIFY ) MAX_OUTSTANDING_WAIT_FAILURE_REQUESTS = 2;
|
init( MAX_OUTSTANDING_WAIT_FAILURE_REQUESTS, 250 ); if( randomize && BUGGIFY ) MAX_OUTSTANDING_WAIT_FAILURE_REQUESTS = 2;
|
||||||
|
|
|
@ -41,6 +41,7 @@ public:
|
||||||
// often, so that versions always advance smoothly
|
// often, so that versions always advance smoothly
|
||||||
|
|
||||||
// TLogs
|
// TLogs
|
||||||
|
bool PEEK_USING_STREAMING;
|
||||||
double TLOG_TIMEOUT; // tlog OR commit proxy failure - master's reaction time
|
double TLOG_TIMEOUT; // tlog OR commit proxy failure - master's reaction time
|
||||||
double TLOG_SLOW_REJOIN_WARN_TIMEOUT_SECS; // Warns if a tlog takes too long to rejoin
|
double TLOG_SLOW_REJOIN_WARN_TIMEOUT_SECS; // Warns if a tlog takes too long to rejoin
|
||||||
double RECOVERY_TLOG_SMART_QUORUM_DELAY; // smaller might be better for bug amplification
|
double RECOVERY_TLOG_SMART_QUORUM_DELAY; // smaller might be better for bug amplification
|
||||||
|
@ -563,6 +564,7 @@ public:
|
||||||
double FETCH_KEYS_TOO_LONG_TIME_CRITERIA;
|
double FETCH_KEYS_TOO_LONG_TIME_CRITERIA;
|
||||||
double MAX_STORAGE_COMMIT_TIME;
|
double MAX_STORAGE_COMMIT_TIME;
|
||||||
int64_t RANGESTREAM_LIMIT_BYTES;
|
int64_t RANGESTREAM_LIMIT_BYTES;
|
||||||
|
bool ENABLE_CLEAR_RANGE_EAGER_READS;
|
||||||
|
|
||||||
// Wait Failure
|
// Wait Failure
|
||||||
int MAX_OUTSTANDING_WAIT_FAILURE_REQUESTS;
|
int MAX_OUTSTANDING_WAIT_FAILURE_REQUESTS;
|
||||||
|
|
|
@ -1964,16 +1964,29 @@ void parse(StringRef& val, WaitState& w) {
|
||||||
|
|
||||||
void parse(StringRef& val, time_t& t) {
|
void parse(StringRef& val, time_t& t) {
|
||||||
struct tm tm = { 0 };
|
struct tm tm = { 0 };
|
||||||
|
#ifdef _WIN32
|
||||||
|
std::istringstream s(val.toString());
|
||||||
|
s.imbue(std::locale(setlocale(LC_TIME, nullptr)));
|
||||||
|
s >> std::get_time(&tm, "%FT%T%z");
|
||||||
|
if (s.fail()) {
|
||||||
|
throw std::invalid_argument("failed to parse ISO 8601 datetime");
|
||||||
|
}
|
||||||
|
long timezone;
|
||||||
|
if (_get_timezone(&timezone) != 0) {
|
||||||
|
throw std::runtime_error("failed to convert ISO 8601 datetime");
|
||||||
|
}
|
||||||
|
timezone = -timezone;
|
||||||
|
#else
|
||||||
if (strptime(val.toString().c_str(), "%FT%T%z", &tm) == nullptr) {
|
if (strptime(val.toString().c_str(), "%FT%T%z", &tm) == nullptr) {
|
||||||
throw std::invalid_argument("failed to parse ISO 8601 datetime");
|
throw std::invalid_argument("failed to parse ISO 8601 datetime");
|
||||||
}
|
}
|
||||||
|
|
||||||
long timezone = tm.tm_gmtoff;
|
long timezone = tm.tm_gmtoff;
|
||||||
t = timegm(&tm);
|
t = timegm(&tm);
|
||||||
if (t == -1) {
|
if (t == -1) {
|
||||||
throw std::runtime_error("failed to convert ISO 8601 datetime");
|
throw std::runtime_error("failed to convert ISO 8601 datetime");
|
||||||
}
|
}
|
||||||
t -= timezone;
|
t -= timezone;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void parse(StringRef& val, NetworkAddress& a) {
|
void parse(StringRef& val, NetworkAddress& a) {
|
||||||
|
|
|
@ -30,7 +30,7 @@
|
||||||
#include "fdbrpc/Stats.h"
|
#include "fdbrpc/Stats.h"
|
||||||
#include "fdbrpc/TimedRequest.h"
|
#include "fdbrpc/TimedRequest.h"
|
||||||
#include "fdbrpc/TSSComparison.h"
|
#include "fdbrpc/TSSComparison.h"
|
||||||
#include "fdbclient/TagThrottle.h"
|
#include "fdbclient/TagThrottle.actor.h"
|
||||||
#include "flow/UnitTest.h"
|
#include "flow/UnitTest.h"
|
||||||
|
|
||||||
// Dead code, removed in the next protocol version
|
// Dead code, removed in the next protocol version
|
||||||
|
|
|
@ -18,7 +18,7 @@
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "fdbclient/TagThrottle.h"
|
#include "fdbclient/TagThrottle.actor.h"
|
||||||
#include "fdbclient/CommitProxyInterface.h"
|
#include "fdbclient/CommitProxyInterface.h"
|
||||||
#include "fdbclient/DatabaseContext.h"
|
#include "fdbclient/DatabaseContext.h"
|
||||||
|
|
||||||
|
@ -110,319 +110,3 @@ TagThrottleValue TagThrottleValue::fromValue(const ValueRef& value) {
|
||||||
reader >> throttleValue;
|
reader >> throttleValue;
|
||||||
return throttleValue;
|
return throttleValue;
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace ThrottleApi {
|
|
||||||
ACTOR Future<bool> getValidAutoEnabled(Transaction* tr, Database db) {
|
|
||||||
state bool result;
|
|
||||||
loop {
|
|
||||||
Optional<Value> value = wait(tr->get(tagThrottleAutoEnabledKey));
|
|
||||||
if (!value.present()) {
|
|
||||||
tr->reset();
|
|
||||||
wait(delay(CLIENT_KNOBS->DEFAULT_BACKOFF));
|
|
||||||
continue;
|
|
||||||
} else if (value.get() == LiteralStringRef("1")) {
|
|
||||||
result = true;
|
|
||||||
} else if (value.get() == LiteralStringRef("0")) {
|
|
||||||
result = false;
|
|
||||||
} else {
|
|
||||||
TraceEvent(SevWarnAlways, "InvalidAutoTagThrottlingValue", db->dbId).detail("Value", value.get());
|
|
||||||
tr->reset();
|
|
||||||
wait(delay(CLIENT_KNOBS->DEFAULT_BACKOFF));
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
void signalThrottleChange(Transaction& tr) {
|
|
||||||
tr.atomicOp(
|
|
||||||
tagThrottleSignalKey, LiteralStringRef("XXXXXXXXXX\x00\x00\x00\x00"), MutationRef::SetVersionstampedValue);
|
|
||||||
}
|
|
||||||
|
|
||||||
ACTOR Future<Void> updateThrottleCount(Transaction* tr, int64_t delta) {
|
|
||||||
state Future<Optional<Value>> countVal = tr->get(tagThrottleCountKey);
|
|
||||||
state Future<Optional<Value>> limitVal = tr->get(tagThrottleLimitKey);
|
|
||||||
|
|
||||||
wait(success(countVal) && success(limitVal));
|
|
||||||
|
|
||||||
int64_t count = 0;
|
|
||||||
int64_t limit = 0;
|
|
||||||
|
|
||||||
if (countVal.get().present()) {
|
|
||||||
BinaryReader reader(countVal.get().get(), Unversioned());
|
|
||||||
reader >> count;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (limitVal.get().present()) {
|
|
||||||
BinaryReader reader(limitVal.get().get(), Unversioned());
|
|
||||||
reader >> limit;
|
|
||||||
}
|
|
||||||
|
|
||||||
count += delta;
|
|
||||||
|
|
||||||
if (count > limit) {
|
|
||||||
throw too_many_tag_throttles();
|
|
||||||
}
|
|
||||||
|
|
||||||
BinaryWriter writer(Unversioned());
|
|
||||||
writer << count;
|
|
||||||
|
|
||||||
tr->set(tagThrottleCountKey, writer.toValue());
|
|
||||||
return Void();
|
|
||||||
}
|
|
||||||
|
|
||||||
ACTOR Future<std::vector<TagThrottleInfo>> getThrottledTags(Database db, int limit, bool containsRecommend) {
|
|
||||||
state Transaction tr(db);
|
|
||||||
state bool reportAuto = containsRecommend;
|
|
||||||
loop {
|
|
||||||
try {
|
|
||||||
if (!containsRecommend) {
|
|
||||||
wait(store(reportAuto, getValidAutoEnabled(&tr, db)));
|
|
||||||
}
|
|
||||||
RangeResult throttles = wait(tr.getRange(
|
|
||||||
reportAuto ? tagThrottleKeys : KeyRangeRef(tagThrottleKeysPrefix, tagThrottleAutoKeysPrefix), limit));
|
|
||||||
std::vector<TagThrottleInfo> results;
|
|
||||||
for (auto throttle : throttles) {
|
|
||||||
results.push_back(TagThrottleInfo(TagThrottleKey::fromKey(throttle.key),
|
|
||||||
TagThrottleValue::fromValue(throttle.value)));
|
|
||||||
}
|
|
||||||
return results;
|
|
||||||
} catch (Error& e) {
|
|
||||||
wait(tr.onError(e));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
ACTOR Future<std::vector<TagThrottleInfo>> getRecommendedTags(Database db, int limit) {
|
|
||||||
state Transaction tr(db);
|
|
||||||
loop {
|
|
||||||
try {
|
|
||||||
bool enableAuto = wait(getValidAutoEnabled(&tr, db));
|
|
||||||
if (enableAuto) {
|
|
||||||
return std::vector<TagThrottleInfo>();
|
|
||||||
}
|
|
||||||
|
|
||||||
RangeResult throttles =
|
|
||||||
wait(tr.getRange(KeyRangeRef(tagThrottleAutoKeysPrefix, tagThrottleKeys.end), limit));
|
|
||||||
std::vector<TagThrottleInfo> results;
|
|
||||||
for (auto throttle : throttles) {
|
|
||||||
results.push_back(TagThrottleInfo(TagThrottleKey::fromKey(throttle.key),
|
|
||||||
TagThrottleValue::fromValue(throttle.value)));
|
|
||||||
}
|
|
||||||
return results;
|
|
||||||
} catch (Error& e) {
|
|
||||||
wait(tr.onError(e));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
ACTOR Future<Void> throttleTags(Database db,
|
|
||||||
TagSet tags,
|
|
||||||
double tpsRate,
|
|
||||||
double initialDuration,
|
|
||||||
TagThrottleType throttleType,
|
|
||||||
TransactionPriority priority,
|
|
||||||
Optional<double> expirationTime,
|
|
||||||
Optional<TagThrottledReason> reason) {
|
|
||||||
state Transaction tr(db);
|
|
||||||
state Key key = TagThrottleKey(tags, throttleType, priority).toKey();
|
|
||||||
|
|
||||||
ASSERT(initialDuration > 0);
|
|
||||||
|
|
||||||
if (throttleType == TagThrottleType::MANUAL) {
|
|
||||||
reason = TagThrottledReason::MANUAL;
|
|
||||||
}
|
|
||||||
TagThrottleValue throttle(tpsRate,
|
|
||||||
expirationTime.present() ? expirationTime.get() : 0,
|
|
||||||
initialDuration,
|
|
||||||
reason.present() ? reason.get() : TagThrottledReason::UNSET);
|
|
||||||
BinaryWriter wr(IncludeVersion(ProtocolVersion::withTagThrottleValueReason()));
|
|
||||||
wr << throttle;
|
|
||||||
state Value value = wr.toValue();
|
|
||||||
|
|
||||||
loop {
|
|
||||||
try {
|
|
||||||
if (throttleType == TagThrottleType::MANUAL) {
|
|
||||||
Optional<Value> oldThrottle = wait(tr.get(key));
|
|
||||||
if (!oldThrottle.present()) {
|
|
||||||
wait(updateThrottleCount(&tr, 1));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
tr.set(key, value);
|
|
||||||
|
|
||||||
if (throttleType == TagThrottleType::MANUAL) {
|
|
||||||
signalThrottleChange(tr);
|
|
||||||
}
|
|
||||||
|
|
||||||
wait(tr.commit());
|
|
||||||
return Void();
|
|
||||||
} catch (Error& e) {
|
|
||||||
wait(tr.onError(e));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
ACTOR Future<bool> unthrottleTags(Database db,
|
|
||||||
TagSet tags,
|
|
||||||
Optional<TagThrottleType> throttleType,
|
|
||||||
Optional<TransactionPriority> priority) {
|
|
||||||
state Transaction tr(db);
|
|
||||||
|
|
||||||
state std::vector<Key> keys;
|
|
||||||
for (auto p : allTransactionPriorities) {
|
|
||||||
if (!priority.present() || priority.get() == p) {
|
|
||||||
if (!throttleType.present() || throttleType.get() == TagThrottleType::AUTO) {
|
|
||||||
keys.push_back(TagThrottleKey(tags, TagThrottleType::AUTO, p).toKey());
|
|
||||||
}
|
|
||||||
if (!throttleType.present() || throttleType.get() == TagThrottleType::MANUAL) {
|
|
||||||
keys.push_back(TagThrottleKey(tags, TagThrottleType::MANUAL, p).toKey());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
state bool removed = false;
|
|
||||||
|
|
||||||
loop {
|
|
||||||
try {
|
|
||||||
state std::vector<Future<Optional<Value>>> values;
|
|
||||||
values.reserve(keys.size());
|
|
||||||
for (auto key : keys) {
|
|
||||||
values.push_back(tr.get(key));
|
|
||||||
}
|
|
||||||
|
|
||||||
wait(waitForAll(values));
|
|
||||||
|
|
||||||
int delta = 0;
|
|
||||||
for (int i = 0; i < values.size(); ++i) {
|
|
||||||
if (values[i].get().present()) {
|
|
||||||
if (TagThrottleKey::fromKey(keys[i]).throttleType == TagThrottleType::MANUAL) {
|
|
||||||
delta -= 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
tr.clear(keys[i]);
|
|
||||||
|
|
||||||
// Report that we are removing this tag if we ever see it present.
|
|
||||||
// This protects us from getting confused if the transaction is maybe committed.
|
|
||||||
// It's ok if someone else actually ends up removing this tag at the same time
|
|
||||||
// and we aren't the ones to actually do it.
|
|
||||||
removed = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (delta != 0) {
|
|
||||||
wait(updateThrottleCount(&tr, delta));
|
|
||||||
}
|
|
||||||
if (removed) {
|
|
||||||
signalThrottleChange(tr);
|
|
||||||
wait(tr.commit());
|
|
||||||
}
|
|
||||||
|
|
||||||
return removed;
|
|
||||||
} catch (Error& e) {
|
|
||||||
wait(tr.onError(e));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
ACTOR Future<bool> unthrottleMatchingThrottles(Database db,
|
|
||||||
KeyRef beginKey,
|
|
||||||
KeyRef endKey,
|
|
||||||
Optional<TransactionPriority> priority,
|
|
||||||
bool onlyExpiredThrottles) {
|
|
||||||
state Transaction tr(db);
|
|
||||||
|
|
||||||
state KeySelector begin = firstGreaterOrEqual(beginKey);
|
|
||||||
state KeySelector end = firstGreaterOrEqual(endKey);
|
|
||||||
|
|
||||||
state bool removed = false;
|
|
||||||
|
|
||||||
loop {
|
|
||||||
try {
|
|
||||||
state RangeResult tags = wait(tr.getRange(begin, end, 1000));
|
|
||||||
state uint64_t unthrottledTags = 0;
|
|
||||||
uint64_t manualUnthrottledTags = 0;
|
|
||||||
for (auto tag : tags) {
|
|
||||||
if (onlyExpiredThrottles) {
|
|
||||||
double expirationTime = TagThrottleValue::fromValue(tag.value).expirationTime;
|
|
||||||
if (expirationTime == 0 || expirationTime > now()) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
TagThrottleKey key = TagThrottleKey::fromKey(tag.key);
|
|
||||||
if (priority.present() && key.priority != priority.get()) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (key.throttleType == TagThrottleType::MANUAL) {
|
|
||||||
++manualUnthrottledTags;
|
|
||||||
}
|
|
||||||
|
|
||||||
removed = true;
|
|
||||||
tr.clear(tag.key);
|
|
||||||
unthrottledTags++;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (manualUnthrottledTags > 0) {
|
|
||||||
wait(updateThrottleCount(&tr, -manualUnthrottledTags));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (unthrottledTags > 0) {
|
|
||||||
signalThrottleChange(tr);
|
|
||||||
}
|
|
||||||
|
|
||||||
wait(tr.commit());
|
|
||||||
|
|
||||||
if (!tags.more) {
|
|
||||||
return removed;
|
|
||||||
}
|
|
||||||
|
|
||||||
ASSERT(tags.size() > 0);
|
|
||||||
begin = KeySelector(firstGreaterThan(tags[tags.size() - 1].key), tags.arena());
|
|
||||||
} catch (Error& e) {
|
|
||||||
wait(tr.onError(e));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Future<bool> unthrottleAll(Database db,
|
|
||||||
Optional<TagThrottleType> tagThrottleType,
|
|
||||||
Optional<TransactionPriority> priority) {
|
|
||||||
KeyRef begin = tagThrottleKeys.begin;
|
|
||||||
KeyRef end = tagThrottleKeys.end;
|
|
||||||
|
|
||||||
if (tagThrottleType.present() && tagThrottleType == TagThrottleType::AUTO) {
|
|
||||||
begin = tagThrottleAutoKeysPrefix;
|
|
||||||
} else if (tagThrottleType.present() && tagThrottleType == TagThrottleType::MANUAL) {
|
|
||||||
end = tagThrottleAutoKeysPrefix;
|
|
||||||
}
|
|
||||||
|
|
||||||
return unthrottleMatchingThrottles(db, begin, end, priority, false);
|
|
||||||
}
|
|
||||||
|
|
||||||
Future<bool> expire(Database db) {
|
|
||||||
return unthrottleMatchingThrottles(
|
|
||||||
db, tagThrottleKeys.begin, tagThrottleKeys.end, Optional<TransactionPriority>(), true);
|
|
||||||
}
|
|
||||||
|
|
||||||
ACTOR Future<Void> enableAuto(Database db, bool enabled) {
|
|
||||||
state Transaction tr(db);
|
|
||||||
|
|
||||||
loop {
|
|
||||||
try {
|
|
||||||
Optional<Value> value = wait(tr.get(tagThrottleAutoEnabledKey));
|
|
||||||
if (!value.present() || (enabled && value.get() != LiteralStringRef("1")) ||
|
|
||||||
(!enabled && value.get() != LiteralStringRef("0"))) {
|
|
||||||
tr.set(tagThrottleAutoEnabledKey, LiteralStringRef(enabled ? "1" : "0"));
|
|
||||||
signalThrottleChange(tr);
|
|
||||||
|
|
||||||
wait(tr.commit());
|
|
||||||
}
|
|
||||||
return Void();
|
|
||||||
} catch (Error& e) {
|
|
||||||
wait(tr.onError(e));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} // namespace ThrottleApi
|
|
||||||
|
|
|
@ -0,0 +1,592 @@
|
||||||
|
/*
|
||||||
|
* TagThrottle.actor.h
|
||||||
|
*
|
||||||
|
* This source file is part of the FoundationDB open source project
|
||||||
|
*
|
||||||
|
* Copyright 2013-2020 Apple Inc. and the FoundationDB project authors
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#if defined(NO_INTELLISENSE) && !defined(FDBCLIENT_TAG_THROTTLE_ACTOR_G_H)
|
||||||
|
#define FDBCLIENT_TAG_THROTTLE_ACTOR_G_H
|
||||||
|
#include "fdbclient/TagThrottle.actor.g.h"
|
||||||
|
#elif !defined(FDBCLIENT_TAG_THROTTLE_ACTOR_H)
|
||||||
|
#define FDBCLIENT_TAG_THROTTLE_ACTOR_H
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "flow/Error.h"
|
||||||
|
#include "flow/flow.h"
|
||||||
|
#include "flow/network.h"
|
||||||
|
#include "flow/ThreadHelper.actor.h"
|
||||||
|
#include "fdbclient/FDBOptions.g.h"
|
||||||
|
#include "fdbclient/FDBTypes.h"
|
||||||
|
#include "fdbclient/CommitTransaction.h"
|
||||||
|
#include "flow/actorcompiler.h" // This must be the last #include.
|
||||||
|
|
||||||
|
typedef StringRef TransactionTagRef;
|
||||||
|
typedef Standalone<TransactionTagRef> TransactionTag;
|
||||||
|
|
||||||
|
class TagSet {
|
||||||
|
public:
|
||||||
|
typedef std::vector<TransactionTagRef>::const_iterator const_iterator;
|
||||||
|
|
||||||
|
TagSet() : bytes(0) {}
|
||||||
|
|
||||||
|
void addTag(TransactionTagRef tag);
|
||||||
|
size_t size() const;
|
||||||
|
|
||||||
|
const_iterator begin() const { return tags.begin(); }
|
||||||
|
|
||||||
|
const_iterator end() const { return tags.end(); }
|
||||||
|
|
||||||
|
void clear() {
|
||||||
|
tags.clear();
|
||||||
|
bytes = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class Context>
|
||||||
|
void save(uint8_t* out, Context& c) const {
|
||||||
|
uint8_t* start = out;
|
||||||
|
for (const auto& tag : *this) {
|
||||||
|
*(out++) = (uint8_t)tag.size();
|
||||||
|
|
||||||
|
std::copy(tag.begin(), tag.end(), out);
|
||||||
|
out += tag.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
ASSERT((size_t)(out - start) == size() + bytes);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class Context>
|
||||||
|
void load(const uint8_t* data, size_t size, Context& context) {
|
||||||
|
// const uint8_t *start = data;
|
||||||
|
const uint8_t* end = data + size;
|
||||||
|
while (data < end) {
|
||||||
|
uint8_t len = *(data++);
|
||||||
|
// Tags are already deduplicated
|
||||||
|
const auto& tag = tags.emplace_back(context.tryReadZeroCopy(data, len), len);
|
||||||
|
data += len;
|
||||||
|
bytes += tag.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
ASSERT(data == end);
|
||||||
|
|
||||||
|
// Deserialized tag sets share the arena with the request that contained them
|
||||||
|
// For this reason, persisting a TagSet that shares memory with other request
|
||||||
|
// members should be done with caution.
|
||||||
|
arena = context.arena();
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t getBytes() const { return bytes; }
|
||||||
|
|
||||||
|
const Arena& getArena() const { return arena; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
size_t bytes;
|
||||||
|
Arena arena;
|
||||||
|
// Currently there are never >= 256 tags, so
|
||||||
|
// std::vector is faster than std::set. This may
|
||||||
|
// change if we allow more tags in the future.
|
||||||
|
std::vector<TransactionTagRef> tags;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct dynamic_size_traits<TagSet> : std::true_type {
|
||||||
|
// May be called multiple times during one serialization
|
||||||
|
template <class Context>
|
||||||
|
static size_t size(const TagSet& t, Context&) {
|
||||||
|
return t.size() + t.getBytes();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Guaranteed to be called only once during serialization
|
||||||
|
template <class Context>
|
||||||
|
static void save(uint8_t* out, const TagSet& t, Context& c) {
|
||||||
|
t.save(out, c);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Context is an arbitrary type that is plumbed by reference throughout the
|
||||||
|
// load call tree.
|
||||||
|
template <class Context>
|
||||||
|
static void load(const uint8_t* data, size_t size, TagSet& t, Context& context) {
|
||||||
|
t.load(data, size, context);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
enum class TagThrottleType : uint8_t { MANUAL, AUTO };
|
||||||
|
|
||||||
|
enum class TagThrottledReason : uint8_t { UNSET = 0, MANUAL, BUSY_READ, BUSY_WRITE };
|
||||||
|
|
||||||
|
struct TagThrottleKey {
|
||||||
|
TagSet tags;
|
||||||
|
TagThrottleType throttleType;
|
||||||
|
TransactionPriority priority;
|
||||||
|
|
||||||
|
TagThrottleKey() : throttleType(TagThrottleType::MANUAL), priority(TransactionPriority::DEFAULT) {}
|
||||||
|
TagThrottleKey(TagSet tags, TagThrottleType throttleType, TransactionPriority priority)
|
||||||
|
: tags(tags), throttleType(throttleType), priority(priority) {}
|
||||||
|
|
||||||
|
Key toKey() const;
|
||||||
|
static TagThrottleKey fromKey(const KeyRef& key);
|
||||||
|
};
|
||||||
|
|
||||||
|
struct TagThrottleValue {
|
||||||
|
double tpsRate;
|
||||||
|
double expirationTime;
|
||||||
|
double initialDuration;
|
||||||
|
TagThrottledReason reason;
|
||||||
|
|
||||||
|
TagThrottleValue() : tpsRate(0), expirationTime(0), initialDuration(0), reason(TagThrottledReason::UNSET) {}
|
||||||
|
TagThrottleValue(double tpsRate, double expirationTime, double initialDuration, TagThrottledReason reason)
|
||||||
|
: tpsRate(tpsRate), expirationTime(expirationTime), initialDuration(initialDuration), reason(reason) {}
|
||||||
|
|
||||||
|
static TagThrottleValue fromValue(const ValueRef& value);
|
||||||
|
|
||||||
|
// To change this serialization, ProtocolVersion::TagThrottleValue must be updated, and downgrades need to be
|
||||||
|
// considered
|
||||||
|
template <class Ar>
|
||||||
|
void serialize(Ar& ar) {
|
||||||
|
if (ar.protocolVersion().hasTagThrottleValueReason()) {
|
||||||
|
serializer(ar, tpsRate, expirationTime, initialDuration, reason);
|
||||||
|
} else if (ar.protocolVersion().hasTagThrottleValue()) {
|
||||||
|
serializer(ar, tpsRate, expirationTime, initialDuration);
|
||||||
|
if (ar.isDeserializing) {
|
||||||
|
reason = TagThrottledReason::UNSET;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct TagThrottleInfo {
|
||||||
|
TransactionTag tag;
|
||||||
|
TagThrottleType throttleType;
|
||||||
|
TransactionPriority priority;
|
||||||
|
double tpsRate;
|
||||||
|
double expirationTime;
|
||||||
|
double initialDuration;
|
||||||
|
TagThrottledReason reason;
|
||||||
|
|
||||||
|
TagThrottleInfo(TransactionTag tag,
|
||||||
|
TagThrottleType throttleType,
|
||||||
|
TransactionPriority priority,
|
||||||
|
double tpsRate,
|
||||||
|
double expirationTime,
|
||||||
|
double initialDuration,
|
||||||
|
TagThrottledReason reason = TagThrottledReason::UNSET)
|
||||||
|
: tag(tag), throttleType(throttleType), priority(priority), tpsRate(tpsRate), expirationTime(expirationTime),
|
||||||
|
initialDuration(initialDuration), reason(reason) {}
|
||||||
|
|
||||||
|
TagThrottleInfo(TagThrottleKey key, TagThrottleValue value)
|
||||||
|
: throttleType(key.throttleType), priority(key.priority), tpsRate(value.tpsRate),
|
||||||
|
expirationTime(value.expirationTime), initialDuration(value.initialDuration), reason(value.reason) {
|
||||||
|
ASSERT(key.tags.size() == 1); // Multiple tags per throttle is not currently supported
|
||||||
|
tag = *key.tags.begin();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ClientTagThrottleLimits {
|
||||||
|
double tpsRate;
|
||||||
|
double expiration;
|
||||||
|
|
||||||
|
ClientTagThrottleLimits() : tpsRate(0), expiration(0) {}
|
||||||
|
ClientTagThrottleLimits(double tpsRate, double expiration) : tpsRate(tpsRate), expiration(expiration) {}
|
||||||
|
|
||||||
|
template <class Archive>
|
||||||
|
void serialize(Archive& ar) {
|
||||||
|
// Convert expiration time to a duration to avoid clock differences
|
||||||
|
double duration = 0;
|
||||||
|
if (!ar.isDeserializing) {
|
||||||
|
duration = expiration - now();
|
||||||
|
}
|
||||||
|
|
||||||
|
serializer(ar, tpsRate, duration);
|
||||||
|
|
||||||
|
if (ar.isDeserializing) {
|
||||||
|
expiration = now() + duration;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ClientTrCommitCostEstimation {
|
||||||
|
int opsCount = 0;
|
||||||
|
uint64_t writeCosts = 0;
|
||||||
|
std::deque<std::pair<int, uint64_t>> clearIdxCosts;
|
||||||
|
uint32_t expensiveCostEstCount = 0;
|
||||||
|
template <class Ar>
|
||||||
|
void serialize(Ar& ar) {
|
||||||
|
serializer(ar, opsCount, writeCosts, clearIdxCosts, expensiveCostEstCount);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Keys to view and control tag throttling
|
||||||
|
extern const KeyRangeRef tagThrottleKeys;
|
||||||
|
extern const KeyRef tagThrottleKeysPrefix;
|
||||||
|
extern const KeyRef tagThrottleAutoKeysPrefix;
|
||||||
|
extern const KeyRef tagThrottleSignalKey;
|
||||||
|
extern const KeyRef tagThrottleAutoEnabledKey;
|
||||||
|
extern const KeyRef tagThrottleLimitKey;
|
||||||
|
extern const KeyRef tagThrottleCountKey;
|
||||||
|
|
||||||
|
namespace ThrottleApi {
|
||||||
|
|
||||||
|
// The template functions can be called with Native API like DatabaseContext, Transaction/ReadYourWritesTransaction
|
||||||
|
// or using IClientAPI like IDatabase, ITransaction
|
||||||
|
|
||||||
|
ACTOR template <class Tr>
|
||||||
|
Future<bool> getValidAutoEnabled(Reference<Tr> tr) {
|
||||||
|
state bool result;
|
||||||
|
loop {
|
||||||
|
Optional<Value> value = wait(safeThreadFutureToFuture(tr->get(tagThrottleAutoEnabledKey)));
|
||||||
|
if (!value.present()) {
|
||||||
|
tr->reset();
|
||||||
|
wait(delay(CLIENT_KNOBS->DEFAULT_BACKOFF));
|
||||||
|
continue;
|
||||||
|
} else if (value.get() == LiteralStringRef("1")) {
|
||||||
|
result = true;
|
||||||
|
} else if (value.get() == LiteralStringRef("0")) {
|
||||||
|
result = false;
|
||||||
|
} else {
|
||||||
|
TraceEvent(SevWarnAlways, "InvalidAutoTagThrottlingValue").detail("Value", value.get());
|
||||||
|
tr->reset();
|
||||||
|
wait(delay(CLIENT_KNOBS->DEFAULT_BACKOFF));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
ACTOR template <class DB>
|
||||||
|
Future<std::vector<TagThrottleInfo>> getRecommendedTags(Reference<DB> db, int limit) {
|
||||||
|
state Reference<typename DB::TransactionT> tr = db->createTransaction();
|
||||||
|
loop {
|
||||||
|
tr->setOption(FDBTransactionOptions::READ_SYSTEM_KEYS);
|
||||||
|
try {
|
||||||
|
bool enableAuto = wait(getValidAutoEnabled(tr));
|
||||||
|
if (enableAuto) {
|
||||||
|
return std::vector<TagThrottleInfo>();
|
||||||
|
}
|
||||||
|
state typename DB::TransactionT::template FutureT<RangeResult> f =
|
||||||
|
tr->getRange(KeyRangeRef(tagThrottleAutoKeysPrefix, tagThrottleKeys.end), limit);
|
||||||
|
RangeResult throttles = wait(safeThreadFutureToFuture(f));
|
||||||
|
std::vector<TagThrottleInfo> results;
|
||||||
|
for (auto throttle : throttles) {
|
||||||
|
results.push_back(TagThrottleInfo(TagThrottleKey::fromKey(throttle.key),
|
||||||
|
TagThrottleValue::fromValue(throttle.value)));
|
||||||
|
}
|
||||||
|
return results;
|
||||||
|
} catch (Error& e) {
|
||||||
|
wait(safeThreadFutureToFuture(tr->onError(e)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ACTOR template <class DB>
|
||||||
|
Future<std::vector<TagThrottleInfo>> getThrottledTags(Reference<DB> db, int limit, bool containsRecommend = false) {
|
||||||
|
state Reference<typename DB::TransactionT> tr = db->createTransaction();
|
||||||
|
state bool reportAuto = containsRecommend;
|
||||||
|
loop {
|
||||||
|
tr->setOption(FDBTransactionOptions::READ_SYSTEM_KEYS);
|
||||||
|
try {
|
||||||
|
if (!containsRecommend) {
|
||||||
|
wait(store(reportAuto, getValidAutoEnabled(tr)));
|
||||||
|
}
|
||||||
|
state typename DB::TransactionT::template FutureT<RangeResult> f = tr->getRange(
|
||||||
|
reportAuto ? tagThrottleKeys : KeyRangeRef(tagThrottleKeysPrefix, tagThrottleAutoKeysPrefix), limit);
|
||||||
|
RangeResult throttles = wait(safeThreadFutureToFuture(f));
|
||||||
|
std::vector<TagThrottleInfo> results;
|
||||||
|
for (auto throttle : throttles) {
|
||||||
|
results.push_back(TagThrottleInfo(TagThrottleKey::fromKey(throttle.key),
|
||||||
|
TagThrottleValue::fromValue(throttle.value)));
|
||||||
|
}
|
||||||
|
return results;
|
||||||
|
} catch (Error& e) {
|
||||||
|
wait(safeThreadFutureToFuture(tr->onError(e)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class Tr>
|
||||||
|
void signalThrottleChange(Reference<Tr> tr) {
|
||||||
|
tr->atomicOp(
|
||||||
|
tagThrottleSignalKey, LiteralStringRef("XXXXXXXXXX\x00\x00\x00\x00"), MutationRef::SetVersionstampedValue);
|
||||||
|
}
|
||||||
|
|
||||||
|
ACTOR template <class Tr>
|
||||||
|
Future<Void> updateThrottleCount(Reference<Tr> tr, int64_t delta) {
|
||||||
|
state typename Tr::template FutureT<Optional<Value>> countVal = tr->get(tagThrottleCountKey);
|
||||||
|
state typename Tr::template FutureT<Optional<Value>> limitVal = tr->get(tagThrottleLimitKey);
|
||||||
|
|
||||||
|
wait(success(safeThreadFutureToFuture(countVal)) && success(safeThreadFutureToFuture(limitVal)));
|
||||||
|
|
||||||
|
int64_t count = 0;
|
||||||
|
int64_t limit = 0;
|
||||||
|
|
||||||
|
if (countVal.get().present()) {
|
||||||
|
BinaryReader reader(countVal.get().get(), Unversioned());
|
||||||
|
reader >> count;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (limitVal.get().present()) {
|
||||||
|
BinaryReader reader(limitVal.get().get(), Unversioned());
|
||||||
|
reader >> limit;
|
||||||
|
}
|
||||||
|
|
||||||
|
count += delta;
|
||||||
|
|
||||||
|
if (count > limit) {
|
||||||
|
throw too_many_tag_throttles();
|
||||||
|
}
|
||||||
|
|
||||||
|
BinaryWriter writer(Unversioned());
|
||||||
|
writer << count;
|
||||||
|
|
||||||
|
tr->set(tagThrottleCountKey, writer.toValue());
|
||||||
|
return Void();
|
||||||
|
}
|
||||||
|
|
||||||
|
ACTOR template <class DB>
|
||||||
|
Future<bool> unthrottleMatchingThrottles(Reference<DB> db,
|
||||||
|
KeyRef beginKey,
|
||||||
|
KeyRef endKey,
|
||||||
|
Optional<TransactionPriority> priority,
|
||||||
|
bool onlyExpiredThrottles) {
|
||||||
|
state Reference<typename DB::TransactionT> tr = db->createTransaction();
|
||||||
|
|
||||||
|
state KeySelector begin = firstGreaterOrEqual(beginKey);
|
||||||
|
state KeySelector end = firstGreaterOrEqual(endKey);
|
||||||
|
|
||||||
|
state bool removed = false;
|
||||||
|
|
||||||
|
loop {
|
||||||
|
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||||
|
try {
|
||||||
|
// holds memory of the RangeResult
|
||||||
|
state typename DB::TransactionT::template FutureT<RangeResult> f = tr->getRange(begin, end, 1000);
|
||||||
|
state RangeResult tags = wait(safeThreadFutureToFuture(f));
|
||||||
|
state uint64_t unthrottledTags = 0;
|
||||||
|
uint64_t manualUnthrottledTags = 0;
|
||||||
|
for (auto tag : tags) {
|
||||||
|
if (onlyExpiredThrottles) {
|
||||||
|
double expirationTime = TagThrottleValue::fromValue(tag.value).expirationTime;
|
||||||
|
if (expirationTime == 0 || expirationTime > now()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TagThrottleKey key = TagThrottleKey::fromKey(tag.key);
|
||||||
|
if (priority.present() && key.priority != priority.get()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (key.throttleType == TagThrottleType::MANUAL) {
|
||||||
|
++manualUnthrottledTags;
|
||||||
|
}
|
||||||
|
|
||||||
|
removed = true;
|
||||||
|
tr->clear(tag.key);
|
||||||
|
unthrottledTags++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (manualUnthrottledTags > 0) {
|
||||||
|
wait(updateThrottleCount(tr, -manualUnthrottledTags));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (unthrottledTags > 0) {
|
||||||
|
signalThrottleChange(tr);
|
||||||
|
}
|
||||||
|
|
||||||
|
wait(safeThreadFutureToFuture(tr->commit()));
|
||||||
|
|
||||||
|
if (!tags.more) {
|
||||||
|
return removed;
|
||||||
|
}
|
||||||
|
|
||||||
|
ASSERT(tags.size() > 0);
|
||||||
|
begin = KeySelector(firstGreaterThan(tags[tags.size() - 1].key), tags.arena());
|
||||||
|
} catch (Error& e) {
|
||||||
|
wait(safeThreadFutureToFuture(tr->onError(e)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class DB>
|
||||||
|
Future<bool> expire(DB db) {
|
||||||
|
return unthrottleMatchingThrottles(
|
||||||
|
db, tagThrottleKeys.begin, tagThrottleKeys.end, Optional<TransactionPriority>(), true);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class DB>
|
||||||
|
Future<bool> unthrottleAll(Reference<DB> db,
|
||||||
|
Optional<TagThrottleType> tagThrottleType,
|
||||||
|
Optional<TransactionPriority> priority) {
|
||||||
|
KeyRef begin = tagThrottleKeys.begin;
|
||||||
|
KeyRef end = tagThrottleKeys.end;
|
||||||
|
|
||||||
|
if (tagThrottleType.present() && tagThrottleType == TagThrottleType::AUTO) {
|
||||||
|
begin = tagThrottleAutoKeysPrefix;
|
||||||
|
} else if (tagThrottleType.present() && tagThrottleType == TagThrottleType::MANUAL) {
|
||||||
|
end = tagThrottleAutoKeysPrefix;
|
||||||
|
}
|
||||||
|
|
||||||
|
return unthrottleMatchingThrottles(db, begin, end, priority, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
ACTOR template <class DB>
|
||||||
|
Future<bool> unthrottleTags(Reference<DB> db,
|
||||||
|
TagSet tags,
|
||||||
|
Optional<TagThrottleType> throttleType,
|
||||||
|
Optional<TransactionPriority> priority) {
|
||||||
|
state Reference<typename DB::TransactionT> tr = db->createTransaction();
|
||||||
|
|
||||||
|
state std::vector<Key> keys;
|
||||||
|
for (auto p : allTransactionPriorities) {
|
||||||
|
if (!priority.present() || priority.get() == p) {
|
||||||
|
if (!throttleType.present() || throttleType.get() == TagThrottleType::AUTO) {
|
||||||
|
keys.push_back(TagThrottleKey(tags, TagThrottleType::AUTO, p).toKey());
|
||||||
|
}
|
||||||
|
if (!throttleType.present() || throttleType.get() == TagThrottleType::MANUAL) {
|
||||||
|
keys.push_back(TagThrottleKey(tags, TagThrottleType::MANUAL, p).toKey());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
state bool removed = false;
|
||||||
|
|
||||||
|
loop {
|
||||||
|
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||||
|
try {
|
||||||
|
state std::vector<Future<Optional<Value>>> values;
|
||||||
|
values.reserve(keys.size());
|
||||||
|
for (auto key : keys) {
|
||||||
|
values.push_back(safeThreadFutureToFuture(tr->get(key)));
|
||||||
|
}
|
||||||
|
|
||||||
|
wait(waitForAll(values));
|
||||||
|
|
||||||
|
int delta = 0;
|
||||||
|
for (int i = 0; i < values.size(); ++i) {
|
||||||
|
if (values[i].get().present()) {
|
||||||
|
if (TagThrottleKey::fromKey(keys[i]).throttleType == TagThrottleType::MANUAL) {
|
||||||
|
delta -= 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
tr->clear(keys[i]);
|
||||||
|
|
||||||
|
// Report that we are removing this tag if we ever see it present.
|
||||||
|
// This protects us from getting confused if the transaction is maybe committed.
|
||||||
|
// It's ok if someone else actually ends up removing this tag at the same time
|
||||||
|
// and we aren't the ones to actually do it.
|
||||||
|
removed = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (delta != 0) {
|
||||||
|
wait(updateThrottleCount(tr, delta));
|
||||||
|
}
|
||||||
|
if (removed) {
|
||||||
|
signalThrottleChange(tr);
|
||||||
|
wait(safeThreadFutureToFuture(tr->commit()));
|
||||||
|
}
|
||||||
|
|
||||||
|
return removed;
|
||||||
|
} catch (Error& e) {
|
||||||
|
wait(safeThreadFutureToFuture(tr->onError(e)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ACTOR template <class DB>
|
||||||
|
Future<Void> throttleTags(Reference<DB> db,
|
||||||
|
TagSet tags,
|
||||||
|
double tpsRate,
|
||||||
|
double initialDuration,
|
||||||
|
TagThrottleType throttleType,
|
||||||
|
TransactionPriority priority,
|
||||||
|
Optional<double> expirationTime = Optional<double>(),
|
||||||
|
Optional<TagThrottledReason> reason = Optional<TagThrottledReason>()) {
|
||||||
|
state Reference<typename DB::TransactionT> tr = db->createTransaction();
|
||||||
|
state Key key = TagThrottleKey(tags, throttleType, priority).toKey();
|
||||||
|
|
||||||
|
ASSERT(initialDuration > 0);
|
||||||
|
|
||||||
|
if (throttleType == TagThrottleType::MANUAL) {
|
||||||
|
reason = TagThrottledReason::MANUAL;
|
||||||
|
}
|
||||||
|
TagThrottleValue throttle(tpsRate,
|
||||||
|
expirationTime.present() ? expirationTime.get() : 0,
|
||||||
|
initialDuration,
|
||||||
|
reason.present() ? reason.get() : TagThrottledReason::UNSET);
|
||||||
|
BinaryWriter wr(IncludeVersion(ProtocolVersion::withTagThrottleValueReason()));
|
||||||
|
wr << throttle;
|
||||||
|
state Value value = wr.toValue();
|
||||||
|
|
||||||
|
loop {
|
||||||
|
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||||
|
try {
|
||||||
|
if (throttleType == TagThrottleType::MANUAL) {
|
||||||
|
Optional<Value> oldThrottle = wait(safeThreadFutureToFuture(tr->get(key)));
|
||||||
|
if (!oldThrottle.present()) {
|
||||||
|
wait(updateThrottleCount(tr, 1));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
tr->set(key, value);
|
||||||
|
|
||||||
|
if (throttleType == TagThrottleType::MANUAL) {
|
||||||
|
signalThrottleChange(tr);
|
||||||
|
}
|
||||||
|
|
||||||
|
wait(safeThreadFutureToFuture(tr->commit()));
|
||||||
|
return Void();
|
||||||
|
} catch (Error& e) {
|
||||||
|
wait(safeThreadFutureToFuture(tr->onError(e)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ACTOR template <class DB>
|
||||||
|
Future<Void> enableAuto(Reference<DB> db, bool enabled) {
|
||||||
|
state Reference<typename DB::TransactionT> tr = db->createTransaction();
|
||||||
|
|
||||||
|
loop {
|
||||||
|
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||||
|
try {
|
||||||
|
Optional<Value> value = wait(safeThreadFutureToFuture(tr->get(tagThrottleAutoEnabledKey)));
|
||||||
|
if (!value.present() || (enabled && value.get() != LiteralStringRef("1")) ||
|
||||||
|
(!enabled && value.get() != LiteralStringRef("0"))) {
|
||||||
|
tr->set(tagThrottleAutoEnabledKey, LiteralStringRef(enabled ? "1" : "0"));
|
||||||
|
signalThrottleChange<typename DB::TransactionT>(tr);
|
||||||
|
|
||||||
|
wait(safeThreadFutureToFuture(tr->commit()));
|
||||||
|
}
|
||||||
|
return Void();
|
||||||
|
} catch (Error& e) {
|
||||||
|
wait(safeThreadFutureToFuture(tr->onError(e)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}; // namespace ThrottleApi
|
||||||
|
|
||||||
|
template <class Value>
|
||||||
|
using TransactionTagMap = std::unordered_map<TransactionTag, Value, std::hash<TransactionTagRef>>;
|
||||||
|
|
||||||
|
template <class Value>
|
||||||
|
using PrioritizedTransactionTagMap = std::map<TransactionPriority, TransactionTagMap<Value>>;
|
||||||
|
|
||||||
|
template <class Value>
|
||||||
|
using UIDTransactionTagMap = std::unordered_map<UID, TransactionTagMap<Value>>;
|
||||||
|
|
||||||
|
#include "flow/unactorcompiler.h"
|
||||||
|
#endif
|
|
@ -1,265 +0,0 @@
|
||||||
/*
|
|
||||||
* TagThrottle.h
|
|
||||||
*
|
|
||||||
* This source file is part of the FoundationDB open source project
|
|
||||||
*
|
|
||||||
* Copyright 2013-2020 Apple Inc. and the FoundationDB project authors
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef FDBCLIENT_TAG_THROTTLE_H
|
|
||||||
#define FDBCLIENT_TAG_THROTTLE_H
|
|
||||||
|
|
||||||
#pragma once
|
|
||||||
|
|
||||||
#include "flow/Error.h"
|
|
||||||
#include "flow/flow.h"
|
|
||||||
#include "flow/network.h"
|
|
||||||
#include "fdbclient/FDBTypes.h"
|
|
||||||
|
|
||||||
#include <set>
|
|
||||||
|
|
||||||
class Database;
|
|
||||||
|
|
||||||
namespace ThrottleApi {}
|
|
||||||
|
|
||||||
typedef StringRef TransactionTagRef;
|
|
||||||
typedef Standalone<TransactionTagRef> TransactionTag;
|
|
||||||
|
|
||||||
class TagSet {
|
|
||||||
public:
|
|
||||||
typedef std::vector<TransactionTagRef>::const_iterator const_iterator;
|
|
||||||
|
|
||||||
TagSet() : bytes(0) {}
|
|
||||||
|
|
||||||
void addTag(TransactionTagRef tag);
|
|
||||||
size_t size() const;
|
|
||||||
|
|
||||||
const_iterator begin() const { return tags.begin(); }
|
|
||||||
|
|
||||||
const_iterator end() const { return tags.end(); }
|
|
||||||
|
|
||||||
void clear() {
|
|
||||||
tags.clear();
|
|
||||||
bytes = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <class Context>
|
|
||||||
void save(uint8_t* out, Context& c) const {
|
|
||||||
uint8_t* start = out;
|
|
||||||
for (const auto& tag : *this) {
|
|
||||||
*(out++) = (uint8_t)tag.size();
|
|
||||||
|
|
||||||
std::copy(tag.begin(), tag.end(), out);
|
|
||||||
out += tag.size();
|
|
||||||
}
|
|
||||||
|
|
||||||
ASSERT((size_t)(out - start) == size() + bytes);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <class Context>
|
|
||||||
void load(const uint8_t* data, size_t size, Context& context) {
|
|
||||||
// const uint8_t *start = data;
|
|
||||||
const uint8_t* end = data + size;
|
|
||||||
while (data < end) {
|
|
||||||
uint8_t len = *(data++);
|
|
||||||
// Tags are already deduplicated
|
|
||||||
const auto& tag = tags.emplace_back(context.tryReadZeroCopy(data, len), len);
|
|
||||||
data += len;
|
|
||||||
bytes += tag.size();
|
|
||||||
}
|
|
||||||
|
|
||||||
ASSERT(data == end);
|
|
||||||
|
|
||||||
// Deserialized tag sets share the arena with the request that contained them
|
|
||||||
// For this reason, persisting a TagSet that shares memory with other request
|
|
||||||
// members should be done with caution.
|
|
||||||
arena = context.arena();
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t getBytes() const { return bytes; }
|
|
||||||
|
|
||||||
const Arena& getArena() const { return arena; }
|
|
||||||
|
|
||||||
private:
|
|
||||||
size_t bytes;
|
|
||||||
Arena arena;
|
|
||||||
// Currently there are never >= 256 tags, so
|
|
||||||
// std::vector is faster than std::set. This may
|
|
||||||
// change if we allow more tags in the future.
|
|
||||||
std::vector<TransactionTagRef> tags;
|
|
||||||
};
|
|
||||||
|
|
||||||
template <>
|
|
||||||
struct dynamic_size_traits<TagSet> : std::true_type {
|
|
||||||
// May be called multiple times during one serialization
|
|
||||||
template <class Context>
|
|
||||||
static size_t size(const TagSet& t, Context&) {
|
|
||||||
return t.size() + t.getBytes();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Guaranteed to be called only once during serialization
|
|
||||||
template <class Context>
|
|
||||||
static void save(uint8_t* out, const TagSet& t, Context& c) {
|
|
||||||
t.save(out, c);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Context is an arbitrary type that is plumbed by reference throughout the
|
|
||||||
// load call tree.
|
|
||||||
template <class Context>
|
|
||||||
static void load(const uint8_t* data, size_t size, TagSet& t, Context& context) {
|
|
||||||
t.load(data, size, context);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
enum class TagThrottleType : uint8_t { MANUAL, AUTO };
|
|
||||||
|
|
||||||
enum class TagThrottledReason : uint8_t { UNSET = 0, MANUAL, BUSY_READ, BUSY_WRITE };
|
|
||||||
|
|
||||||
struct TagThrottleKey {
|
|
||||||
TagSet tags;
|
|
||||||
TagThrottleType throttleType;
|
|
||||||
TransactionPriority priority;
|
|
||||||
|
|
||||||
TagThrottleKey() : throttleType(TagThrottleType::MANUAL), priority(TransactionPriority::DEFAULT) {}
|
|
||||||
TagThrottleKey(TagSet tags, TagThrottleType throttleType, TransactionPriority priority)
|
|
||||||
: tags(tags), throttleType(throttleType), priority(priority) {}
|
|
||||||
|
|
||||||
Key toKey() const;
|
|
||||||
static TagThrottleKey fromKey(const KeyRef& key);
|
|
||||||
};
|
|
||||||
|
|
||||||
struct TagThrottleValue {
|
|
||||||
double tpsRate;
|
|
||||||
double expirationTime;
|
|
||||||
double initialDuration;
|
|
||||||
TagThrottledReason reason;
|
|
||||||
|
|
||||||
TagThrottleValue() : tpsRate(0), expirationTime(0), initialDuration(0), reason(TagThrottledReason::UNSET) {}
|
|
||||||
TagThrottleValue(double tpsRate, double expirationTime, double initialDuration, TagThrottledReason reason)
|
|
||||||
: tpsRate(tpsRate), expirationTime(expirationTime), initialDuration(initialDuration), reason(reason) {}
|
|
||||||
|
|
||||||
static TagThrottleValue fromValue(const ValueRef& value);
|
|
||||||
|
|
||||||
// To change this serialization, ProtocolVersion::TagThrottleValue must be updated, and downgrades need to be
|
|
||||||
// considered
|
|
||||||
template <class Ar>
|
|
||||||
void serialize(Ar& ar) {
|
|
||||||
if (ar.protocolVersion().hasTagThrottleValueReason()) {
|
|
||||||
serializer(ar, tpsRate, expirationTime, initialDuration, reason);
|
|
||||||
} else if (ar.protocolVersion().hasTagThrottleValue()) {
|
|
||||||
serializer(ar, tpsRate, expirationTime, initialDuration);
|
|
||||||
if (ar.isDeserializing) {
|
|
||||||
reason = TagThrottledReason::UNSET;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
struct TagThrottleInfo {
|
|
||||||
TransactionTag tag;
|
|
||||||
TagThrottleType throttleType;
|
|
||||||
TransactionPriority priority;
|
|
||||||
double tpsRate;
|
|
||||||
double expirationTime;
|
|
||||||
double initialDuration;
|
|
||||||
TagThrottledReason reason;
|
|
||||||
|
|
||||||
TagThrottleInfo(TransactionTag tag,
|
|
||||||
TagThrottleType throttleType,
|
|
||||||
TransactionPriority priority,
|
|
||||||
double tpsRate,
|
|
||||||
double expirationTime,
|
|
||||||
double initialDuration,
|
|
||||||
TagThrottledReason reason = TagThrottledReason::UNSET)
|
|
||||||
: tag(tag), throttleType(throttleType), priority(priority), tpsRate(tpsRate), expirationTime(expirationTime),
|
|
||||||
initialDuration(initialDuration), reason(reason) {}
|
|
||||||
|
|
||||||
TagThrottleInfo(TagThrottleKey key, TagThrottleValue value)
|
|
||||||
: throttleType(key.throttleType), priority(key.priority), tpsRate(value.tpsRate),
|
|
||||||
expirationTime(value.expirationTime), initialDuration(value.initialDuration), reason(value.reason) {
|
|
||||||
ASSERT(key.tags.size() == 1); // Multiple tags per throttle is not currently supported
|
|
||||||
tag = *key.tags.begin();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
struct ClientTagThrottleLimits {
|
|
||||||
double tpsRate;
|
|
||||||
double expiration;
|
|
||||||
|
|
||||||
ClientTagThrottleLimits() : tpsRate(0), expiration(0) {}
|
|
||||||
ClientTagThrottleLimits(double tpsRate, double expiration) : tpsRate(tpsRate), expiration(expiration) {}
|
|
||||||
|
|
||||||
template <class Archive>
|
|
||||||
void serialize(Archive& ar) {
|
|
||||||
// Convert expiration time to a duration to avoid clock differences
|
|
||||||
double duration = 0;
|
|
||||||
if (!ar.isDeserializing) {
|
|
||||||
duration = expiration - now();
|
|
||||||
}
|
|
||||||
|
|
||||||
serializer(ar, tpsRate, duration);
|
|
||||||
|
|
||||||
if (ar.isDeserializing) {
|
|
||||||
expiration = now() + duration;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
struct ClientTrCommitCostEstimation {
|
|
||||||
int opsCount = 0;
|
|
||||||
uint64_t writeCosts = 0;
|
|
||||||
std::deque<std::pair<int, uint64_t>> clearIdxCosts;
|
|
||||||
uint32_t expensiveCostEstCount = 0;
|
|
||||||
template <class Ar>
|
|
||||||
void serialize(Ar& ar) {
|
|
||||||
serializer(ar, opsCount, writeCosts, clearIdxCosts, expensiveCostEstCount);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
namespace ThrottleApi {
|
|
||||||
Future<std::vector<TagThrottleInfo>> getThrottledTags(Database const& db,
|
|
||||||
int const& limit,
|
|
||||||
bool const& containsRecommend = false);
|
|
||||||
Future<std::vector<TagThrottleInfo>> getRecommendedTags(Database const& db, int const& limit);
|
|
||||||
|
|
||||||
Future<Void> throttleTags(Database const& db,
|
|
||||||
TagSet const& tags,
|
|
||||||
double const& tpsRate,
|
|
||||||
double const& initialDuration,
|
|
||||||
TagThrottleType const& throttleType,
|
|
||||||
TransactionPriority const& priority,
|
|
||||||
Optional<double> const& expirationTime = Optional<double>(),
|
|
||||||
Optional<TagThrottledReason> const& reason = Optional<TagThrottledReason>());
|
|
||||||
|
|
||||||
Future<bool> unthrottleTags(Database const& db,
|
|
||||||
TagSet const& tags,
|
|
||||||
Optional<TagThrottleType> const& throttleType,
|
|
||||||
Optional<TransactionPriority> const& priority);
|
|
||||||
|
|
||||||
Future<bool> unthrottleAll(Database db, Optional<TagThrottleType> throttleType, Optional<TransactionPriority> priority);
|
|
||||||
Future<bool> expire(Database db);
|
|
||||||
|
|
||||||
Future<Void> enableAuto(Database const& db, bool const& enabled);
|
|
||||||
}; // namespace ThrottleApi
|
|
||||||
|
|
||||||
template <class Value>
|
|
||||||
using TransactionTagMap = std::unordered_map<TransactionTag, Value, std::hash<TransactionTagRef>>;
|
|
||||||
|
|
||||||
template <class Value>
|
|
||||||
using PrioritizedTransactionTagMap = std::map<TransactionPriority, TransactionTagMap<Value>>;
|
|
||||||
|
|
||||||
template <class Value>
|
|
||||||
using UIDTransactionTagMap = std::unordered_map<UID, TransactionTagMap<Value>>;
|
|
||||||
#endif
|
|
|
@ -158,7 +158,10 @@ const Endpoint& EndpointMap::insert(NetworkAddressList localAddresses,
|
||||||
NetworkMessageReceiver* EndpointMap::get(Endpoint::Token const& token) {
|
NetworkMessageReceiver* EndpointMap::get(Endpoint::Token const& token) {
|
||||||
uint32_t index = token.second();
|
uint32_t index = token.second();
|
||||||
if (index < wellKnownEndpointCount && data[index].receiver == nullptr) {
|
if (index < wellKnownEndpointCount && data[index].receiver == nullptr) {
|
||||||
TraceEvent(SevWarnAlways, "WellKnownEndpointNotAdded").detail("Token", token).detail("Index", index).backtrace();
|
TraceEvent(SevWarnAlways, "WellKnownEndpointNotAdded")
|
||||||
|
.detail("Token", token)
|
||||||
|
.detail("Index", index)
|
||||||
|
.backtrace();
|
||||||
}
|
}
|
||||||
if (index < data.size() && data[index].token().first() == token.first() &&
|
if (index < data.size() && data[index].token().first() == token.first() &&
|
||||||
((data[index].token().second() & 0xffffffff00000000LL) | index) == token.second())
|
((data[index].token().second() & 0xffffffff00000000LL) | index) == token.second())
|
||||||
|
@ -923,6 +926,7 @@ ACTOR static void deliver(TransportData* self,
|
||||||
// ReadSocket) we can just upgrade. Otherwise we'll context switch so that we don't block other tasks that might run
|
// ReadSocket) we can just upgrade. Otherwise we'll context switch so that we don't block other tasks that might run
|
||||||
// with a higher priority. ReplyPromiseStream needs to guarentee that messages are recieved in the order they were
|
// with a higher priority. ReplyPromiseStream needs to guarentee that messages are recieved in the order they were
|
||||||
// sent, so we are using orderedDelay.
|
// sent, so we are using orderedDelay.
|
||||||
|
// NOTE: don't skip delay(0) when it's local deliver since it could cause out of order object deconstruction.
|
||||||
if (priority < TaskPriority::ReadSocket || !inReadSocket) {
|
if (priority < TaskPriority::ReadSocket || !inReadSocket) {
|
||||||
wait(orderedDelay(0, priority));
|
wait(orderedDelay(0, priority));
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -300,7 +300,7 @@ struct AcknowledgementReceiver final : FlowReceiver, FastAllocated<Acknowledgeme
|
||||||
Promise<Void> hold = ready;
|
Promise<Void> hold = ready;
|
||||||
hold.sendError(message.getError());
|
hold.sendError(message.getError());
|
||||||
} else {
|
} else {
|
||||||
ASSERT(message.get().bytes > bytesAcknowledged);
|
ASSERT(message.get().bytes > bytesAcknowledged || (message.get().bytes < 0 && bytesAcknowledged > 0));
|
||||||
bytesAcknowledged = message.get().bytes;
|
bytesAcknowledged = message.get().bytes;
|
||||||
if (ready.isValid() && bytesSent - bytesAcknowledged < bytesLimit) {
|
if (ready.isValid() && bytesSent - bytesAcknowledged < bytesLimit) {
|
||||||
Promise<Void> hold = ready;
|
Promise<Void> hold = ready;
|
||||||
|
@ -393,7 +393,8 @@ struct NetNotifiedQueueWithAcknowledgements final : NotifiedQueue<T>,
|
||||||
false);
|
false);
|
||||||
}
|
}
|
||||||
if (isRemoteEndpoint() && !sentError && !acknowledgements.failures.isReady()) {
|
if (isRemoteEndpoint() && !sentError && !acknowledgements.failures.isReady()) {
|
||||||
// The ReplyPromiseStream was cancelled before sending an error, so the storage server must have died
|
// Notify the client ReplyPromiseStream was cancelled before sending an error, so the storage server must
|
||||||
|
// have died
|
||||||
FlowTransport::transport().sendUnreliable(SerializeSource<ErrorOr<EnsureTable<T>>>(broken_promise()),
|
FlowTransport::transport().sendUnreliable(SerializeSource<ErrorOr<EnsureTable<T>>>(broken_promise()),
|
||||||
getEndpoint(TaskPriority::ReadSocket),
|
getEndpoint(TaskPriority::ReadSocket),
|
||||||
false);
|
false);
|
||||||
|
@ -413,6 +414,7 @@ public:
|
||||||
void send(U&& value) const {
|
void send(U&& value) const {
|
||||||
if (queue->isRemoteEndpoint()) {
|
if (queue->isRemoteEndpoint()) {
|
||||||
if (!queue->acknowledgements.getRawEndpoint().isValid()) {
|
if (!queue->acknowledgements.getRawEndpoint().isValid()) {
|
||||||
|
// register acknowledge receiver on sender and tell the receiver where to send acknowledge messages
|
||||||
value.acknowledgeToken = queue->acknowledgements.getEndpoint(TaskPriority::ReadSocket).token;
|
value.acknowledgeToken = queue->acknowledgements.getEndpoint(TaskPriority::ReadSocket).token;
|
||||||
}
|
}
|
||||||
queue->acknowledgements.bytesSent += value.expectedSize();
|
queue->acknowledgements.bytesSent += value.expectedSize();
|
||||||
|
@ -474,6 +476,8 @@ public:
|
||||||
errors->delPromiseRef();
|
errors->delPromiseRef();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// The endpoints of a ReplyPromiseStream must be initialized at Task::ReadSocket, because with lower priorities
|
||||||
|
// a delay(0) in FlowTransport deliver can cause out of order delivery.
|
||||||
const Endpoint& getEndpoint() const { return queue->getEndpoint(TaskPriority::ReadSocket); }
|
const Endpoint& getEndpoint() const { return queue->getEndpoint(TaskPriority::ReadSocket); }
|
||||||
|
|
||||||
bool operator==(const ReplyPromiseStream<T>& rhs) const { return queue == rhs.queue; }
|
bool operator==(const ReplyPromiseStream<T>& rhs) const { return queue == rhs.queue; }
|
||||||
|
|
|
@ -197,7 +197,7 @@ struct PeerHolder {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// Implements getRepyStream, this a void actor with the same lifetime as the input ReplyPromiseStream.
|
// Implements getReplyStream, this a void actor with the same lifetime as the input ReplyPromiseStream.
|
||||||
// Because this actor holds a reference to the stream, normally it would be impossible to know when there are no other
|
// Because this actor holds a reference to the stream, normally it would be impossible to know when there are no other
|
||||||
// references. To get around this, there is a SAV inside the stream that has one less promise reference than it should
|
// references. To get around this, there is a SAV inside the stream that has one less promise reference than it should
|
||||||
// (caused by getErrorFutureAndDelPromiseRef()). When that SAV gets a broken promise because no one besides this void
|
// (caused by getErrorFutureAndDelPromiseRef()). When that SAV gets a broken promise because no one besides this void
|
||||||
|
|
|
@ -1900,7 +1900,7 @@ public:
|
||||||
|
|
||||||
KillType ktResult, ktMin = kt;
|
KillType ktResult, ktMin = kt;
|
||||||
for (auto& datacenterMachine : datacenterMachines) {
|
for (auto& datacenterMachine : datacenterMachines) {
|
||||||
if (deterministicRandom()->random01() < 0.99) {
|
if (deterministicRandom()->random01() < 0.99 || forceKill) {
|
||||||
killMachine(datacenterMachine.first, kt, true, &ktResult);
|
killMachine(datacenterMachine.first, kt, true, &ktResult);
|
||||||
if (ktResult != kt) {
|
if (ktResult != kt) {
|
||||||
TraceEvent(SevWarn, "KillDCFail")
|
TraceEvent(SevWarn, "KillDCFail")
|
||||||
|
|
|
@ -3938,13 +3938,11 @@ ACTOR Future<Void> timeKeeper(ClusterControllerData* self) {
|
||||||
state Reference<ReadYourWritesTransaction> tr = makeReference<ReadYourWritesTransaction>(self->cx);
|
state Reference<ReadYourWritesTransaction> tr = makeReference<ReadYourWritesTransaction>(self->cx);
|
||||||
loop {
|
loop {
|
||||||
try {
|
try {
|
||||||
|
state UID debugID = deterministicRandom()->randomUniqueID();
|
||||||
if (!g_network->isSimulated()) {
|
if (!g_network->isSimulated()) {
|
||||||
// This is done to provide an arbitrary logged transaction every ~10s.
|
// This is done to provide an arbitrary logged transaction every ~10s.
|
||||||
// FIXME: replace or augment this with logging on the proxy which tracks
|
// FIXME: replace or augment this with logging on the proxy which tracks
|
||||||
// how long it is taking to hear responses from each other component.
|
// how long it is taking to hear responses from each other component.
|
||||||
|
|
||||||
UID debugID = deterministicRandom()->randomUniqueID();
|
|
||||||
TraceEvent("TimeKeeperCommit", debugID).log();
|
|
||||||
tr->debugTransaction(debugID);
|
tr->debugTransaction(debugID);
|
||||||
}
|
}
|
||||||
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||||
|
@ -3959,7 +3957,9 @@ ACTOR Future<Void> timeKeeper(ClusterControllerData* self) {
|
||||||
Version v = tr->getReadVersion().get();
|
Version v = tr->getReadVersion().get();
|
||||||
int64_t currentTime = (int64_t)now();
|
int64_t currentTime = (int64_t)now();
|
||||||
versionMap.set(tr, currentTime, v);
|
versionMap.set(tr, currentTime, v);
|
||||||
|
if (!g_network->isSimulated()) {
|
||||||
|
TraceEvent("TimeKeeperCommit", debugID).detail("Version", v);
|
||||||
|
}
|
||||||
int64_t ttl = currentTime - SERVER_KNOBS->TIME_KEEPER_DELAY * SERVER_KNOBS->TIME_KEEPER_MAX_ENTRIES;
|
int64_t ttl = currentTime - SERVER_KNOBS->TIME_KEEPER_DELAY * SERVER_KNOBS->TIME_KEEPER_MAX_ENTRIES;
|
||||||
if (ttl > 0) {
|
if (ttl > 0) {
|
||||||
versionMap.erase(tr, 0, ttl);
|
versionMap.erase(tr, 0, ttl);
|
||||||
|
@ -4858,18 +4858,18 @@ ACTOR Future<Void> clusterControllerCore(ClusterControllerFullInterface interf,
|
||||||
++self.getWorkersRequests;
|
++self.getWorkersRequests;
|
||||||
vector<WorkerDetails> workers;
|
vector<WorkerDetails> workers;
|
||||||
|
|
||||||
for (auto& it : self.id_worker) {
|
for (auto const& [id, worker] : self.id_worker) {
|
||||||
if ((req.flags & GetWorkersRequest::NON_EXCLUDED_PROCESSES_ONLY) &&
|
if ((req.flags & GetWorkersRequest::NON_EXCLUDED_PROCESSES_ONLY) &&
|
||||||
self.db.config.isExcludedServer(it.second.details.interf.addresses())) {
|
self.db.config.isExcludedServer(worker.details.interf.addresses())) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((req.flags & GetWorkersRequest::TESTER_CLASS_ONLY) &&
|
if ((req.flags & GetWorkersRequest::TESTER_CLASS_ONLY) &&
|
||||||
it.second.details.processClass.classType() != ProcessClass::TesterClass) {
|
worker.details.processClass.classType() != ProcessClass::TesterClass) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
workers.push_back(it.second.details);
|
workers.push_back(worker.details);
|
||||||
}
|
}
|
||||||
|
|
||||||
req.reply.send(workers);
|
req.reply.send(workers);
|
||||||
|
|
|
@ -123,11 +123,11 @@ public:
|
||||||
class ReadFromLocalConfigEnvironment {
|
class ReadFromLocalConfigEnvironment {
|
||||||
UID id;
|
UID id;
|
||||||
std::string dataDir;
|
std::string dataDir;
|
||||||
LocalConfiguration localConfiguration;
|
Reference<LocalConfiguration> localConfiguration;
|
||||||
Reference<AsyncVar<ConfigBroadcastInterface> const> cbi;
|
Reference<AsyncVar<ConfigBroadcastInterface> const> cbi;
|
||||||
Future<Void> consumer;
|
Future<Void> consumer;
|
||||||
|
|
||||||
ACTOR static Future<Void> checkEventually(LocalConfiguration const* localConfiguration,
|
ACTOR static Future<Void> checkEventually(Reference<LocalConfiguration const> localConfiguration,
|
||||||
Optional<int64_t> expected) {
|
Optional<int64_t> expected) {
|
||||||
state double lastMismatchTime = now();
|
state double lastMismatchTime = now();
|
||||||
loop {
|
loop {
|
||||||
|
@ -145,7 +145,7 @@ class ReadFromLocalConfigEnvironment {
|
||||||
}
|
}
|
||||||
|
|
||||||
ACTOR static Future<Void> setup(ReadFromLocalConfigEnvironment* self) {
|
ACTOR static Future<Void> setup(ReadFromLocalConfigEnvironment* self) {
|
||||||
wait(self->localConfiguration.initialize());
|
wait(self->localConfiguration->initialize());
|
||||||
if (self->cbi) {
|
if (self->cbi) {
|
||||||
// LocalConfiguration runs in a loop waiting for messages from the
|
// LocalConfiguration runs in a loop waiting for messages from the
|
||||||
// broadcaster. These unit tests use the same
|
// broadcaster. These unit tests use the same
|
||||||
|
@ -155,7 +155,7 @@ class ReadFromLocalConfigEnvironment {
|
||||||
// prevents two actors trying to listen for the same message on the
|
// prevents two actors trying to listen for the same message on the
|
||||||
// same interface, causing lots of issues!
|
// same interface, causing lots of issues!
|
||||||
self->consumer.cancel();
|
self->consumer.cancel();
|
||||||
self->consumer = self->localConfiguration.consume(self->cbi->get());
|
self->consumer = self->localConfiguration->consume(self->cbi->get());
|
||||||
}
|
}
|
||||||
return Void();
|
return Void();
|
||||||
}
|
}
|
||||||
|
@ -164,40 +164,43 @@ public:
|
||||||
ReadFromLocalConfigEnvironment(std::string const& dataDir,
|
ReadFromLocalConfigEnvironment(std::string const& dataDir,
|
||||||
std::string const& configPath,
|
std::string const& configPath,
|
||||||
std::map<std::string, std::string> const& manualKnobOverrides)
|
std::map<std::string, std::string> const& manualKnobOverrides)
|
||||||
: dataDir(dataDir), localConfiguration(dataDir, configPath, manualKnobOverrides, IsTest::True),
|
: dataDir(dataDir),
|
||||||
|
localConfiguration(makeReference<LocalConfiguration>(dataDir, configPath, manualKnobOverrides, IsTest::True)),
|
||||||
consumer(Never()) {}
|
consumer(Never()) {}
|
||||||
|
|
||||||
Future<Void> setup() { return setup(this); }
|
Future<Void> setup() { return setup(this); }
|
||||||
|
|
||||||
Future<Void> restartLocalConfig(std::string const& newConfigPath) {
|
Future<Void> restartLocalConfig(std::string const& newConfigPath) {
|
||||||
localConfiguration = LocalConfiguration(dataDir, newConfigPath, {}, IsTest::True);
|
std::map<std::string, std::string> manualKnobOverrides = {};
|
||||||
|
localConfiguration =
|
||||||
|
makeReference<LocalConfiguration>(dataDir, newConfigPath, manualKnobOverrides, IsTest::True);
|
||||||
return setup();
|
return setup();
|
||||||
}
|
}
|
||||||
|
|
||||||
void connectToBroadcaster(Reference<AsyncVar<ConfigBroadcastInterface> const> const& cbi) {
|
void connectToBroadcaster(Reference<AsyncVar<ConfigBroadcastInterface> const> const& cbi) {
|
||||||
this->cbi = cbi;
|
this->cbi = cbi;
|
||||||
consumer = localConfiguration.consume(cbi->get());
|
consumer = localConfiguration->consume(cbi->get());
|
||||||
}
|
}
|
||||||
|
|
||||||
void checkImmediate(Optional<int64_t> expected) const {
|
void checkImmediate(Optional<int64_t> expected) const {
|
||||||
if (expected.present()) {
|
if (expected.present()) {
|
||||||
ASSERT_EQ(localConfiguration.getTestKnobs().TEST_LONG, expected.get());
|
ASSERT_EQ(localConfiguration->getTestKnobs().TEST_LONG, expected.get());
|
||||||
} else {
|
} else {
|
||||||
ASSERT_EQ(localConfiguration.getTestKnobs().TEST_LONG, 0);
|
ASSERT_EQ(localConfiguration->getTestKnobs().TEST_LONG, 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Future<Void> checkEventually(Optional<int64_t> expected) const {
|
Future<Void> checkEventually(Optional<int64_t> expected) const {
|
||||||
return checkEventually(&localConfiguration, expected);
|
return checkEventually(localConfiguration, expected);
|
||||||
}
|
}
|
||||||
|
|
||||||
LocalConfiguration& getMutableLocalConfiguration() { return localConfiguration; }
|
LocalConfiguration& getMutableLocalConfiguration() { return *localConfiguration; }
|
||||||
|
|
||||||
Future<Void> getError() const { return consumer; }
|
Future<Void> getError() const { return consumer; }
|
||||||
|
|
||||||
Version lastSeenVersion() { return localConfiguration.lastSeenVersion(); }
|
Version lastSeenVersion() { return localConfiguration->lastSeenVersion(); }
|
||||||
|
|
||||||
ConfigClassSet configClassSet() { return localConfiguration.configClassSet(); }
|
ConfigClassSet configClassSet() { return localConfiguration->configClassSet(); }
|
||||||
};
|
};
|
||||||
|
|
||||||
class LocalConfigEnvironment {
|
class LocalConfigEnvironment {
|
||||||
|
|
|
@ -858,7 +858,7 @@ ACTOR Future<Void> fetchShardMetrics(DataDistributionTracker* self, GetMetricsRe
|
||||||
when(wait(delay(SERVER_KNOBS->DD_SHARD_METRICS_TIMEOUT, TaskPriority::DataDistribution))) {
|
when(wait(delay(SERVER_KNOBS->DD_SHARD_METRICS_TIMEOUT, TaskPriority::DataDistribution))) {
|
||||||
TEST(true); // DD_SHARD_METRICS_TIMEOUT
|
TEST(true); // DD_SHARD_METRICS_TIMEOUT
|
||||||
StorageMetrics largeMetrics;
|
StorageMetrics largeMetrics;
|
||||||
largeMetrics.bytes = SERVER_KNOBS->MAX_SHARD_BYTES;
|
largeMetrics.bytes = getMaxShardSize(self->dbSizeEstimate->get());
|
||||||
req.reply.send(largeMetrics);
|
req.reply.send(largeMetrics);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -88,6 +88,7 @@ rocksdb::Options getOptions() {
|
||||||
}
|
}
|
||||||
|
|
||||||
options.table_factory.reset(rocksdb::NewBlockBasedTableFactory(bbOpts));
|
options.table_factory.reset(rocksdb::NewBlockBasedTableFactory(bbOpts));
|
||||||
|
options.db_log_dir = SERVER_KNOBS->LOG_DIRECTORY;
|
||||||
return options;
|
return options;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -427,10 +427,6 @@ LocalConfiguration::LocalConfiguration(std::string const& dataFolder,
|
||||||
IsTest isTest)
|
IsTest isTest)
|
||||||
: impl(PImpl<LocalConfigurationImpl>::create(dataFolder, configPath, manualKnobOverrides, isTest)) {}
|
: impl(PImpl<LocalConfigurationImpl>::create(dataFolder, configPath, manualKnobOverrides, isTest)) {}
|
||||||
|
|
||||||
LocalConfiguration::LocalConfiguration(LocalConfiguration&&) = default;
|
|
||||||
|
|
||||||
LocalConfiguration& LocalConfiguration::operator=(LocalConfiguration&&) = default;
|
|
||||||
|
|
||||||
LocalConfiguration::~LocalConfiguration() = default;
|
LocalConfiguration::~LocalConfiguration() = default;
|
||||||
|
|
||||||
FlowKnobs const& LocalConfiguration::getFlowKnobs() const {
|
FlowKnobs const& LocalConfiguration::getFlowKnobs() const {
|
||||||
|
|
|
@ -43,7 +43,7 @@ FDB_DECLARE_BOOLEAN_PARAM(IsTest);
|
||||||
* - Register with the broadcaster to receive new updates for the relevant configuration classes
|
* - Register with the broadcaster to receive new updates for the relevant configuration classes
|
||||||
* - Persist these updates when received, and restart if necessary
|
* - Persist these updates when received, and restart if necessary
|
||||||
*/
|
*/
|
||||||
class LocalConfiguration {
|
class LocalConfiguration : public ReferenceCounted<LocalConfiguration> {
|
||||||
PImpl<class LocalConfigurationImpl> impl;
|
PImpl<class LocalConfigurationImpl> impl;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
@ -51,8 +51,6 @@ public:
|
||||||
std::string const& configPath,
|
std::string const& configPath,
|
||||||
std::map<std::string, std::string> const& manualKnobOverrides,
|
std::map<std::string, std::string> const& manualKnobOverrides,
|
||||||
IsTest = IsTest::False);
|
IsTest = IsTest::False);
|
||||||
LocalConfiguration(LocalConfiguration&&);
|
|
||||||
LocalConfiguration& operator=(LocalConfiguration&&);
|
|
||||||
~LocalConfiguration();
|
~LocalConfiguration();
|
||||||
FlowKnobs const& getFlowKnobs() const;
|
FlowKnobs const& getFlowKnobs() const;
|
||||||
ClientKnobs const& getClientKnobs() const;
|
ClientKnobs const& getClientKnobs() const;
|
||||||
|
|
|
@ -117,6 +117,7 @@ struct LogRouterData {
|
||||||
getMoreBlockedCount; // Increase by 1 if data is not available when LR tries to pull data from satellite tLog.
|
getMoreBlockedCount; // Increase by 1 if data is not available when LR tries to pull data from satellite tLog.
|
||||||
Future<Void> logger;
|
Future<Void> logger;
|
||||||
Reference<EventCacheHolder> eventCacheHolder;
|
Reference<EventCacheHolder> eventCacheHolder;
|
||||||
|
int activePeekStreams = 0;
|
||||||
|
|
||||||
std::vector<Reference<TagData>> tag_data; // we only store data for the remote tag locality
|
std::vector<Reference<TagData>> tag_data; // we only store data for the remote tag locality
|
||||||
|
|
||||||
|
@ -193,6 +194,7 @@ struct LogRouterData {
|
||||||
return int64_t(1000 * val);
|
return int64_t(1000 * val);
|
||||||
});
|
});
|
||||||
specialCounter(cc, "Generation", [this]() { return this->generation; });
|
specialCounter(cc, "Generation", [this]() { return this->generation; });
|
||||||
|
specialCounter(cc, "ActivePeekStreams", [this]() { return this->activePeekStreams; });
|
||||||
logger = traceCounters("LogRouterMetrics",
|
logger = traceCounters("LogRouterMetrics",
|
||||||
dbgid,
|
dbgid,
|
||||||
SERVER_KNOBS->WORKER_LOGGING_INTERVAL,
|
SERVER_KNOBS->WORKER_LOGGING_INTERVAL,
|
||||||
|
@ -404,18 +406,15 @@ std::deque<std::pair<Version, LengthPrefixedStringRef>>& get_version_messages(Lo
|
||||||
return tagData->version_messages;
|
return tagData->version_messages;
|
||||||
};
|
};
|
||||||
|
|
||||||
void peekMessagesFromMemory(LogRouterData* self,
|
void peekMessagesFromMemory(LogRouterData* self, Tag tag, Version begin, BinaryWriter& messages, Version& endVersion) {
|
||||||
TLogPeekRequest const& req,
|
|
||||||
BinaryWriter& messages,
|
|
||||||
Version& endVersion) {
|
|
||||||
ASSERT(!messages.getLength());
|
ASSERT(!messages.getLength());
|
||||||
|
|
||||||
auto& deque = get_version_messages(self, req.tag);
|
auto& deque = get_version_messages(self, tag);
|
||||||
//TraceEvent("TLogPeekMem", self->dbgid).detail("Tag", req.tag1).detail("PDS", self->persistentDataSequence).detail("PDDS", self->persistentDataDurableSequence).detail("Oldest", map1.empty() ? 0 : map1.begin()->key ).detail("OldestMsgCount", map1.empty() ? 0 : map1.begin()->value.size());
|
//TraceEvent("TLogPeekMem", self->dbgid).detail("Tag", req.tag1).detail("PDS", self->persistentDataSequence).detail("PDDS", self->persistentDataDurableSequence).detail("Oldest", map1.empty() ? 0 : map1.begin()->key ).detail("OldestMsgCount", map1.empty() ? 0 : map1.begin()->value.size());
|
||||||
|
|
||||||
auto it = std::lower_bound(deque.begin(),
|
auto it = std::lower_bound(deque.begin(),
|
||||||
deque.end(),
|
deque.end(),
|
||||||
std::make_pair(req.begin, LengthPrefixedStringRef()),
|
std::make_pair(begin, LengthPrefixedStringRef()),
|
||||||
CompareFirst<std::pair<Version, LengthPrefixedStringRef>>());
|
CompareFirst<std::pair<Version, LengthPrefixedStringRef>>());
|
||||||
|
|
||||||
Version currentVersion = -1;
|
Version currentVersion = -1;
|
||||||
|
@ -442,22 +441,30 @@ Version poppedVersion(LogRouterData* self, Tag tag) {
|
||||||
return tagData->popped;
|
return tagData->popped;
|
||||||
}
|
}
|
||||||
|
|
||||||
ACTOR Future<Void> logRouterPeekMessages(LogRouterData* self, TLogPeekRequest req) {
|
// Common logics to peek TLog and create TLogPeekReply that serves both streaming peek or normal peek request
|
||||||
|
ACTOR template <typename PromiseType>
|
||||||
|
Future<Void> logRouterPeekMessages(PromiseType replyPromise,
|
||||||
|
LogRouterData* self,
|
||||||
|
Version reqBegin,
|
||||||
|
Tag reqTag,
|
||||||
|
bool reqReturnIfBlocked = false,
|
||||||
|
bool reqOnlySpilled = false,
|
||||||
|
Optional<std::pair<UID, int>> reqSequence = Optional<std::pair<UID, int>>()) {
|
||||||
state BinaryWriter messages(Unversioned());
|
state BinaryWriter messages(Unversioned());
|
||||||
state int sequence = -1;
|
state int sequence = -1;
|
||||||
state UID peekId;
|
state UID peekId;
|
||||||
|
|
||||||
if (req.sequence.present()) {
|
if (reqSequence.present()) {
|
||||||
try {
|
try {
|
||||||
peekId = req.sequence.get().first;
|
peekId = reqSequence.get().first;
|
||||||
sequence = req.sequence.get().second;
|
sequence = reqSequence.get().second;
|
||||||
if (sequence >= SERVER_KNOBS->PARALLEL_GET_MORE_REQUESTS &&
|
if (sequence >= SERVER_KNOBS->PARALLEL_GET_MORE_REQUESTS &&
|
||||||
self->peekTracker.find(peekId) == self->peekTracker.end()) {
|
self->peekTracker.find(peekId) == self->peekTracker.end()) {
|
||||||
throw operation_obsolete();
|
throw operation_obsolete();
|
||||||
}
|
}
|
||||||
auto& trackerData = self->peekTracker[peekId];
|
auto& trackerData = self->peekTracker[peekId];
|
||||||
if (sequence == 0 && trackerData.sequence_version.find(0) == trackerData.sequence_version.end()) {
|
if (sequence == 0 && trackerData.sequence_version.find(0) == trackerData.sequence_version.end()) {
|
||||||
trackerData.sequence_version[0].send(std::make_pair(req.begin, req.onlySpilled));
|
trackerData.sequence_version[0].send(std::make_pair(reqBegin, reqOnlySpilled));
|
||||||
}
|
}
|
||||||
auto seqBegin = trackerData.sequence_version.begin();
|
auto seqBegin = trackerData.sequence_version.begin();
|
||||||
// The peek cursor and this comparison need to agree about the maximum number of in-flight requests.
|
// The peek cursor and this comparison need to agree about the maximum number of in-flight requests.
|
||||||
|
@ -476,12 +483,12 @@ ACTOR Future<Void> logRouterPeekMessages(LogRouterData* self, TLogPeekRequest re
|
||||||
|
|
||||||
trackerData.lastUpdate = now();
|
trackerData.lastUpdate = now();
|
||||||
std::pair<Version, bool> prevPeekData = wait(trackerData.sequence_version[sequence].getFuture());
|
std::pair<Version, bool> prevPeekData = wait(trackerData.sequence_version[sequence].getFuture());
|
||||||
req.begin = prevPeekData.first;
|
reqBegin = prevPeekData.first;
|
||||||
req.onlySpilled = prevPeekData.second;
|
reqOnlySpilled = prevPeekData.second;
|
||||||
wait(yield());
|
wait(yield());
|
||||||
} catch (Error& e) {
|
} catch (Error& e) {
|
||||||
if (e.code() == error_code_timed_out || e.code() == error_code_operation_obsolete) {
|
if (e.code() == error_code_timed_out || e.code() == error_code_operation_obsolete) {
|
||||||
req.reply.sendError(e);
|
replyPromise.sendError(e);
|
||||||
return Void();
|
return Void();
|
||||||
} else {
|
} else {
|
||||||
throw;
|
throw;
|
||||||
|
@ -489,62 +496,62 @@ ACTOR Future<Void> logRouterPeekMessages(LogRouterData* self, TLogPeekRequest re
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//TraceEvent("LogRouterPeek1", self->dbgid).detail("From", req.reply.getEndpoint().getPrimaryAddress()).detail("Ver", self->version.get()).detail("Begin", req.begin);
|
//TraceEvent("LogRouterPeek1", self->dbgid).detail("From", replyPromise.getEndpoint().getPrimaryAddress()).detail("Ver", self->version.get()).detail("Begin", reqBegin);
|
||||||
if (req.returnIfBlocked && self->version.get() < req.begin) {
|
if (reqReturnIfBlocked && self->version.get() < reqBegin) {
|
||||||
//TraceEvent("LogRouterPeek2", self->dbgid);
|
//TraceEvent("LogRouterPeek2", self->dbgid);
|
||||||
req.reply.sendError(end_of_stream());
|
replyPromise.sendError(end_of_stream());
|
||||||
if (req.sequence.present()) {
|
if (reqSequence.present()) {
|
||||||
auto& trackerData = self->peekTracker[peekId];
|
auto& trackerData = self->peekTracker[peekId];
|
||||||
auto& sequenceData = trackerData.sequence_version[sequence + 1];
|
auto& sequenceData = trackerData.sequence_version[sequence + 1];
|
||||||
if (!sequenceData.isSet()) {
|
if (!sequenceData.isSet()) {
|
||||||
sequenceData.send(std::make_pair(req.begin, req.onlySpilled));
|
sequenceData.send(std::make_pair(reqBegin, reqOnlySpilled));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return Void();
|
return Void();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (self->version.get() < req.begin) {
|
if (self->version.get() < reqBegin) {
|
||||||
wait(self->version.whenAtLeast(req.begin));
|
wait(self->version.whenAtLeast(reqBegin));
|
||||||
wait(delay(SERVER_KNOBS->TLOG_PEEK_DELAY, g_network->getCurrentTask()));
|
wait(delay(SERVER_KNOBS->TLOG_PEEK_DELAY, g_network->getCurrentTask()));
|
||||||
}
|
}
|
||||||
|
|
||||||
Version poppedVer = poppedVersion(self, req.tag);
|
Version poppedVer = poppedVersion(self, reqTag);
|
||||||
|
|
||||||
if (poppedVer > req.begin || req.begin < self->startVersion) {
|
if (poppedVer > reqBegin || reqBegin < self->startVersion) {
|
||||||
// This should only happen if a packet is sent multiple times and the reply is not needed.
|
// This should only happen if a packet is sent multiple times and the reply is not needed.
|
||||||
// Since we are using popped differently, do not send a reply.
|
// Since we are using popped differently, do not send a reply.
|
||||||
TraceEvent(SevWarnAlways, "LogRouterPeekPopped", self->dbgid)
|
TraceEvent(SevWarnAlways, "LogRouterPeekPopped", self->dbgid)
|
||||||
.detail("Begin", req.begin)
|
.detail("Begin", reqBegin)
|
||||||
.detail("Popped", poppedVer)
|
.detail("Popped", poppedVer)
|
||||||
.detail("Start", self->startVersion);
|
.detail("Start", self->startVersion);
|
||||||
req.reply.send(Never());
|
replyPromise.send(Never());
|
||||||
if (req.sequence.present()) {
|
if (reqSequence.present()) {
|
||||||
auto& trackerData = self->peekTracker[peekId];
|
auto& trackerData = self->peekTracker[peekId];
|
||||||
auto& sequenceData = trackerData.sequence_version[sequence + 1];
|
auto& sequenceData = trackerData.sequence_version[sequence + 1];
|
||||||
if (!sequenceData.isSet()) {
|
if (!sequenceData.isSet()) {
|
||||||
sequenceData.send(std::make_pair(req.begin, req.onlySpilled));
|
sequenceData.send(std::make_pair(reqBegin, reqOnlySpilled));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return Void();
|
return Void();
|
||||||
}
|
}
|
||||||
|
|
||||||
Version endVersion = self->version.get() + 1;
|
Version endVersion = self->version.get() + 1;
|
||||||
peekMessagesFromMemory(self, req, messages, endVersion);
|
peekMessagesFromMemory(self, reqTag, reqBegin, messages, endVersion);
|
||||||
|
|
||||||
TLogPeekReply reply;
|
TLogPeekReply reply;
|
||||||
reply.maxKnownVersion = self->version.get();
|
reply.maxKnownVersion = self->version.get();
|
||||||
reply.minKnownCommittedVersion = self->poppedVersion;
|
reply.minKnownCommittedVersion = self->poppedVersion;
|
||||||
reply.messages = messages.toValue();
|
reply.messages = StringRef(reply.arena, messages.toValue());
|
||||||
reply.popped = self->minPopped.get() >= self->startVersion ? self->minPopped.get() : 0;
|
reply.popped = self->minPopped.get() >= self->startVersion ? self->minPopped.get() : 0;
|
||||||
reply.end = endVersion;
|
reply.end = endVersion;
|
||||||
reply.onlySpilled = false;
|
reply.onlySpilled = false;
|
||||||
|
|
||||||
if (req.sequence.present()) {
|
if (reqSequence.present()) {
|
||||||
auto& trackerData = self->peekTracker[peekId];
|
auto& trackerData = self->peekTracker[peekId];
|
||||||
trackerData.lastUpdate = now();
|
trackerData.lastUpdate = now();
|
||||||
auto& sequenceData = trackerData.sequence_version[sequence + 1];
|
auto& sequenceData = trackerData.sequence_version[sequence + 1];
|
||||||
if (trackerData.sequence_version.size() && sequence + 1 < trackerData.sequence_version.begin()->first) {
|
if (trackerData.sequence_version.size() && sequence + 1 < trackerData.sequence_version.begin()->first) {
|
||||||
req.reply.sendError(operation_obsolete());
|
replyPromise.sendError(operation_obsolete());
|
||||||
if (!sequenceData.isSet())
|
if (!sequenceData.isSet())
|
||||||
sequenceData.sendError(operation_obsolete());
|
sequenceData.sendError(operation_obsolete());
|
||||||
return Void();
|
return Void();
|
||||||
|
@ -552,20 +559,60 @@ ACTOR Future<Void> logRouterPeekMessages(LogRouterData* self, TLogPeekRequest re
|
||||||
if (sequenceData.isSet()) {
|
if (sequenceData.isSet()) {
|
||||||
if (sequenceData.getFuture().get().first != reply.end) {
|
if (sequenceData.getFuture().get().first != reply.end) {
|
||||||
TEST(true); // tlog peek second attempt ended at a different version
|
TEST(true); // tlog peek second attempt ended at a different version
|
||||||
req.reply.sendError(operation_obsolete());
|
replyPromise.sendError(operation_obsolete());
|
||||||
return Void();
|
return Void();
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
sequenceData.send(std::make_pair(reply.end, reply.onlySpilled));
|
sequenceData.send(std::make_pair(reply.end, reply.onlySpilled));
|
||||||
}
|
}
|
||||||
reply.begin = req.begin;
|
reply.begin = reqBegin;
|
||||||
}
|
}
|
||||||
|
|
||||||
req.reply.send(reply);
|
replyPromise.send(reply);
|
||||||
//TraceEvent("LogRouterPeek4", self->dbgid);
|
//TraceEvent("LogRouterPeek4", self->dbgid);
|
||||||
return Void();
|
return Void();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// This actor keep pushing TLogPeekStreamReply until it's removed from the cluster or should recover
|
||||||
|
ACTOR Future<Void> logRouterPeekStream(LogRouterData* self, TLogPeekStreamRequest req) {
|
||||||
|
self->activePeekStreams++;
|
||||||
|
|
||||||
|
state Version begin = req.begin;
|
||||||
|
state bool onlySpilled = false;
|
||||||
|
req.reply.setByteLimit(std::min(SERVER_KNOBS->MAXIMUM_PEEK_BYTES, req.limitBytes));
|
||||||
|
loop {
|
||||||
|
state TLogPeekStreamReply reply;
|
||||||
|
state Promise<TLogPeekReply> promise;
|
||||||
|
state Future<TLogPeekReply> future(promise.getFuture());
|
||||||
|
try {
|
||||||
|
wait(req.reply.onReady() && store(reply.rep, future) &&
|
||||||
|
logRouterPeekMessages(promise, self, begin, req.tag, req.returnIfBlocked, onlySpilled));
|
||||||
|
|
||||||
|
reply.rep.begin = begin;
|
||||||
|
req.reply.send(reply);
|
||||||
|
begin = reply.rep.end;
|
||||||
|
onlySpilled = reply.rep.onlySpilled;
|
||||||
|
if (reply.rep.end > self->version.get()) {
|
||||||
|
wait(delay(SERVER_KNOBS->TLOG_PEEK_DELAY, g_network->getCurrentTask()));
|
||||||
|
} else {
|
||||||
|
wait(delay(0, g_network->getCurrentTask()));
|
||||||
|
}
|
||||||
|
} catch (Error& e) {
|
||||||
|
self->activePeekStreams--;
|
||||||
|
TraceEvent(SevDebug, "TLogPeekStreamEnd", self->dbgid)
|
||||||
|
.detail("PeerAddr", req.reply.getEndpoint().getPrimaryAddress())
|
||||||
|
.error(e, true);
|
||||||
|
|
||||||
|
if (e.code() == error_code_end_of_stream || e.code() == error_code_operation_obsolete) {
|
||||||
|
req.reply.sendError(e);
|
||||||
|
return Void();
|
||||||
|
} else {
|
||||||
|
throw;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
ACTOR Future<Void> cleanupPeekTrackers(LogRouterData* self) {
|
ACTOR Future<Void> cleanupPeekTrackers(LogRouterData* self) {
|
||||||
loop {
|
loop {
|
||||||
double minTimeUntilExpiration = SERVER_KNOBS->PEEK_TRACKER_EXPIRATION_TIME;
|
double minTimeUntilExpiration = SERVER_KNOBS->PEEK_TRACKER_EXPIRATION_TIME;
|
||||||
|
@ -643,7 +690,13 @@ ACTOR Future<Void> logRouterCore(TLogInterface interf,
|
||||||
logRouterData.logSystem->set(ILogSystem::fromServerDBInfo(logRouterData.dbgid, db->get(), true));
|
logRouterData.logSystem->set(ILogSystem::fromServerDBInfo(logRouterData.dbgid, db->get(), true));
|
||||||
}
|
}
|
||||||
when(TLogPeekRequest req = waitNext(interf.peekMessages.getFuture())) {
|
when(TLogPeekRequest req = waitNext(interf.peekMessages.getFuture())) {
|
||||||
addActor.send(logRouterPeekMessages(&logRouterData, req));
|
addActor.send(logRouterPeekMessages(
|
||||||
|
req.reply, &logRouterData, req.begin, req.tag, req.returnIfBlocked, req.onlySpilled, req.sequence));
|
||||||
|
}
|
||||||
|
when(TLogPeekStreamRequest req = waitNext(interf.peekStreamMessages.getFuture())) {
|
||||||
|
TraceEvent(SevDebug, "LogRouterPeekStream", logRouterData.dbgid)
|
||||||
|
.detail("Token", interf.peekStreamMessages.getEndpoint().token);
|
||||||
|
addActor.send(logRouterPeekStream(&logRouterData, req));
|
||||||
}
|
}
|
||||||
when(TLogPopRequest req = waitNext(interf.popMessages.getFuture())) {
|
when(TLogPopRequest req = waitNext(interf.popMessages.getFuture())) {
|
||||||
// Request from remote tLog to pop data from LR
|
// Request from remote tLog to pop data from LR
|
||||||
|
|
|
@ -427,7 +427,7 @@ struct ILogSystem {
|
||||||
|
|
||||||
TLogPeekReply results;
|
TLogPeekReply results;
|
||||||
ArenaReader rd;
|
ArenaReader rd;
|
||||||
LogMessageVersion messageVersion, end;
|
LogMessageVersion messageVersion, end; // the version of current message; the intended end version of current cursor
|
||||||
Version poppedVersion;
|
Version poppedVersion;
|
||||||
TagsAndMessage messageAndTags;
|
TagsAndMessage messageAndTags;
|
||||||
bool hasMsg;
|
bool hasMsg;
|
||||||
|
@ -437,9 +437,11 @@ struct ILogSystem {
|
||||||
|
|
||||||
bool onlySpilled;
|
bool onlySpilled;
|
||||||
bool parallelGetMore;
|
bool parallelGetMore;
|
||||||
|
bool usePeekStream;
|
||||||
int sequence;
|
int sequence;
|
||||||
Deque<Future<TLogPeekReply>> futureResults;
|
Deque<Future<TLogPeekReply>> futureResults;
|
||||||
Future<Void> interfaceChanged;
|
Future<Void> interfaceChanged;
|
||||||
|
Optional<ReplyPromiseStream<TLogPeekStreamReply>> peekReplyStream;
|
||||||
|
|
||||||
double lastReset;
|
double lastReset;
|
||||||
Future<Void> resetCheck;
|
Future<Void> resetCheck;
|
||||||
|
|
|
@ -25,6 +25,24 @@
|
||||||
#include "fdbrpc/ReplicationUtils.h"
|
#include "fdbrpc/ReplicationUtils.h"
|
||||||
#include "flow/actorcompiler.h" // has to be last include
|
#include "flow/actorcompiler.h" // has to be last include
|
||||||
|
|
||||||
|
// create a peek stream for cursor when it's possible
|
||||||
|
ACTOR Future<Void> tryEstablishPeekStream(ILogSystem::ServerPeekCursor* self) {
|
||||||
|
if (self->peekReplyStream.present())
|
||||||
|
return Void();
|
||||||
|
else if (!self->interf || !self->interf->get().present()) {
|
||||||
|
self->peekReplyStream.reset();
|
||||||
|
return Never();
|
||||||
|
}
|
||||||
|
wait(IFailureMonitor::failureMonitor().onStateEqual(self->interf->get().interf().peekStreamMessages.getEndpoint(),
|
||||||
|
FailureStatus(false)));
|
||||||
|
self->peekReplyStream = self->interf->get().interf().peekStreamMessages.getReplyStream(TLogPeekStreamRequest(
|
||||||
|
self->messageVersion.version, self->tag, self->returnIfBlocked, std::numeric_limits<int>::max()));
|
||||||
|
TraceEvent(SevDebug, "SPC_StreamCreated", self->randomID)
|
||||||
|
.detail("PeerAddr", self->interf->get().interf().peekStreamMessages.getEndpoint().getPrimaryAddress())
|
||||||
|
.detail("PeerToken", self->interf->get().interf().peekStreamMessages.getEndpoint().token);
|
||||||
|
return Void();
|
||||||
|
}
|
||||||
|
|
||||||
ILogSystem::ServerPeekCursor::ServerPeekCursor(Reference<AsyncVar<OptionalInterface<TLogInterface>>> const& interf,
|
ILogSystem::ServerPeekCursor::ServerPeekCursor(Reference<AsyncVar<OptionalInterface<TLogInterface>>> const& interf,
|
||||||
Tag tag,
|
Tag tag,
|
||||||
Version begin,
|
Version begin,
|
||||||
|
@ -33,11 +51,15 @@ ILogSystem::ServerPeekCursor::ServerPeekCursor(Reference<AsyncVar<OptionalInterf
|
||||||
bool parallelGetMore)
|
bool parallelGetMore)
|
||||||
: interf(interf), tag(tag), rd(results.arena, results.messages, Unversioned()), messageVersion(begin), end(end),
|
: interf(interf), tag(tag), rd(results.arena, results.messages, Unversioned()), messageVersion(begin), end(end),
|
||||||
poppedVersion(0), hasMsg(false), randomID(deterministicRandom()->randomUniqueID()),
|
poppedVersion(0), hasMsg(false), randomID(deterministicRandom()->randomUniqueID()),
|
||||||
returnIfBlocked(returnIfBlocked), onlySpilled(false), parallelGetMore(parallelGetMore), sequence(0), lastReset(0),
|
returnIfBlocked(returnIfBlocked), onlySpilled(false), parallelGetMore(parallelGetMore),
|
||||||
resetCheck(Void()), slowReplies(0), fastReplies(0), unknownReplies(0) {
|
usePeekStream(SERVER_KNOBS->PEEK_USING_STREAMING), sequence(0), lastReset(0), resetCheck(Void()), slowReplies(0),
|
||||||
|
fastReplies(0), unknownReplies(0) {
|
||||||
this->results.maxKnownVersion = 0;
|
this->results.maxKnownVersion = 0;
|
||||||
this->results.minKnownCommittedVersion = 0;
|
this->results.minKnownCommittedVersion = 0;
|
||||||
//TraceEvent("SPC_Starting", randomID).detail("Tag", tag.toString()).detail("Begin", begin).detail("End", end).backtrace();
|
DisabledTraceEvent(SevDebug, "SPC_Starting", randomID)
|
||||||
|
.detail("Tag", tag.toString())
|
||||||
|
.detail("Begin", begin)
|
||||||
|
.detail("End", end);
|
||||||
}
|
}
|
||||||
|
|
||||||
ILogSystem::ServerPeekCursor::ServerPeekCursor(TLogPeekReply const& results,
|
ILogSystem::ServerPeekCursor::ServerPeekCursor(TLogPeekReply const& results,
|
||||||
|
@ -50,8 +72,8 @@ ILogSystem::ServerPeekCursor::ServerPeekCursor(TLogPeekReply const& results,
|
||||||
: tag(tag), results(results), rd(results.arena, results.messages, Unversioned()), messageVersion(messageVersion),
|
: tag(tag), results(results), rd(results.arena, results.messages, Unversioned()), messageVersion(messageVersion),
|
||||||
end(end), poppedVersion(poppedVersion), messageAndTags(message), hasMsg(hasMsg),
|
end(end), poppedVersion(poppedVersion), messageAndTags(message), hasMsg(hasMsg),
|
||||||
randomID(deterministicRandom()->randomUniqueID()), returnIfBlocked(false), onlySpilled(false),
|
randomID(deterministicRandom()->randomUniqueID()), returnIfBlocked(false), onlySpilled(false),
|
||||||
parallelGetMore(false), sequence(0), lastReset(0), resetCheck(Void()), slowReplies(0), fastReplies(0),
|
parallelGetMore(false), usePeekStream(false), sequence(0), lastReset(0), resetCheck(Void()), slowReplies(0),
|
||||||
unknownReplies(0) {
|
fastReplies(0), unknownReplies(0) {
|
||||||
//TraceEvent("SPC_Clone", randomID);
|
//TraceEvent("SPC_Clone", randomID);
|
||||||
this->results.maxKnownVersion = 0;
|
this->results.maxKnownVersion = 0;
|
||||||
this->results.minKnownCommittedVersion = 0;
|
this->results.minKnownCommittedVersion = 0;
|
||||||
|
@ -153,6 +175,20 @@ void ILogSystem::ServerPeekCursor::advanceTo(LogMessageVersion n) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// This function is called after the cursor received one TLogPeekReply to update its members, which is the common logic
|
||||||
|
// in getMore helper functions.
|
||||||
|
void updateCursorWithReply(ILogSystem::ServerPeekCursor* self, const TLogPeekReply& res) {
|
||||||
|
self->results = res;
|
||||||
|
self->onlySpilled = res.onlySpilled;
|
||||||
|
if (res.popped.present())
|
||||||
|
self->poppedVersion = std::min(std::max(self->poppedVersion, res.popped.get()), self->end.version);
|
||||||
|
self->rd = ArenaReader(self->results.arena, self->results.messages, Unversioned());
|
||||||
|
LogMessageVersion skipSeq = self->messageVersion;
|
||||||
|
self->hasMsg = true;
|
||||||
|
self->nextMessage();
|
||||||
|
self->advanceTo(skipSeq);
|
||||||
|
}
|
||||||
|
|
||||||
ACTOR Future<Void> resetChecker(ILogSystem::ServerPeekCursor* self, NetworkAddress addr) {
|
ACTOR Future<Void> resetChecker(ILogSystem::ServerPeekCursor* self, NetworkAddress addr) {
|
||||||
self->slowReplies = 0;
|
self->slowReplies = 0;
|
||||||
self->unknownReplies = 0;
|
self->unknownReplies = 0;
|
||||||
|
@ -208,11 +244,10 @@ ACTOR Future<TLogPeekReply> recordRequestMetrics(ILogSystem::ServerPeekCursor* s
|
||||||
}
|
}
|
||||||
|
|
||||||
ACTOR Future<Void> serverPeekParallelGetMore(ILogSystem::ServerPeekCursor* self, TaskPriority taskID) {
|
ACTOR Future<Void> serverPeekParallelGetMore(ILogSystem::ServerPeekCursor* self, TaskPriority taskID) {
|
||||||
if (!self->interf || self->messageVersion >= self->end) {
|
if (!self->interf || self->isExhausted()) {
|
||||||
if (self->hasMessage())
|
if (self->hasMessage())
|
||||||
return Void();
|
return Void();
|
||||||
wait(Future<Void>(Never()));
|
return Never();
|
||||||
throw internal_error();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!self->interfaceChanged.isValid()) {
|
if (!self->interfaceChanged.isValid()) {
|
||||||
|
@ -253,16 +288,7 @@ ACTOR Future<Void> serverPeekParallelGetMore(ILogSystem::ServerPeekCursor* self,
|
||||||
}
|
}
|
||||||
expectedBegin = res.end;
|
expectedBegin = res.end;
|
||||||
self->futureResults.pop_front();
|
self->futureResults.pop_front();
|
||||||
self->results = res;
|
updateCursorWithReply(self, res);
|
||||||
self->onlySpilled = res.onlySpilled;
|
|
||||||
if (res.popped.present())
|
|
||||||
self->poppedVersion =
|
|
||||||
std::min(std::max(self->poppedVersion, res.popped.get()), self->end.version);
|
|
||||||
self->rd = ArenaReader(self->results.arena, self->results.messages, Unversioned());
|
|
||||||
LogMessageVersion skipSeq = self->messageVersion;
|
|
||||||
self->hasMsg = true;
|
|
||||||
self->nextMessage();
|
|
||||||
self->advanceTo(skipSeq);
|
|
||||||
//TraceEvent("SPC_GetMoreB", self->randomID).detail("Has", self->hasMessage()).detail("End", res.end).detail("Popped", res.popped.present() ? res.popped.get() : 0);
|
//TraceEvent("SPC_GetMoreB", self->randomID).detail("Has", self->hasMessage()).detail("End", res.end).detail("Popped", res.popped.present() ? res.popped.get() : 0);
|
||||||
return Void();
|
return Void();
|
||||||
}
|
}
|
||||||
|
@ -296,10 +322,70 @@ ACTOR Future<Void> serverPeekParallelGetMore(ILogSystem::ServerPeekCursor* self,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ACTOR Future<Void> serverPeekStreamGetMore(ILogSystem::ServerPeekCursor* self, TaskPriority taskID) {
|
||||||
|
if (!self->interf || self->isExhausted()) {
|
||||||
|
self->peekReplyStream.reset();
|
||||||
|
if (self->hasMessage())
|
||||||
|
return Void();
|
||||||
|
return Never();
|
||||||
|
}
|
||||||
|
|
||||||
|
loop {
|
||||||
|
try {
|
||||||
|
state Version expectedBegin = self->messageVersion.version;
|
||||||
|
state Future<TLogPeekReply> fPeekReply = self->peekReplyStream.present()
|
||||||
|
? map(waitAndForward(self->peekReplyStream.get().getFuture()),
|
||||||
|
[](const TLogPeekStreamReply& r) { return r.rep; })
|
||||||
|
: Never();
|
||||||
|
choose {
|
||||||
|
when(wait(self->peekReplyStream.present() ? Never() : tryEstablishPeekStream(self))) {}
|
||||||
|
when(wait(self->interf->onChange())) {
|
||||||
|
self->onlySpilled = false;
|
||||||
|
self->peekReplyStream.reset();
|
||||||
|
}
|
||||||
|
when(TLogPeekReply res = wait(
|
||||||
|
self->peekReplyStream.present()
|
||||||
|
? recordRequestMetrics(
|
||||||
|
self,
|
||||||
|
self->interf->get().interf().peekStreamMessages.getEndpoint().getPrimaryAddress(),
|
||||||
|
fPeekReply)
|
||||||
|
: Never())) {
|
||||||
|
if (res.begin.get() != expectedBegin) {
|
||||||
|
throw operation_obsolete();
|
||||||
|
}
|
||||||
|
updateCursorWithReply(self, res);
|
||||||
|
expectedBegin = res.end;
|
||||||
|
DisabledTraceEvent(SevDebug, "SPC_GetMoreB", self->randomID)
|
||||||
|
.detail("Has", self->hasMessage())
|
||||||
|
.detail("End", res.end)
|
||||||
|
.detail("Popped", res.popped.present() ? res.popped.get() : 0);
|
||||||
|
|
||||||
|
// NOTE: delay is necessary here since ReplyPromiseStream delivers reply on high priority. Here we
|
||||||
|
// change the priority to the intended one.
|
||||||
|
wait(delay(0, taskID));
|
||||||
|
return Void();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (Error& e) {
|
||||||
|
DisabledTraceEvent(SevDebug, "SPC_GetMoreB_Error", self->randomID).error(e, true);
|
||||||
|
if (e.code() == error_code_connection_failed || e.code() == error_code_operation_obsolete) {
|
||||||
|
// NOTE: delay in order to avoid the endless retry loop block other tasks
|
||||||
|
self->peekReplyStream.reset();
|
||||||
|
wait(delay(0));
|
||||||
|
} else if (e.code() == error_code_end_of_stream) {
|
||||||
|
self->peekReplyStream.reset();
|
||||||
|
self->end.reset(self->messageVersion.version);
|
||||||
|
return Void();
|
||||||
|
} else {
|
||||||
|
throw;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
ACTOR Future<Void> serverPeekGetMore(ILogSystem::ServerPeekCursor* self, TaskPriority taskID) {
|
ACTOR Future<Void> serverPeekGetMore(ILogSystem::ServerPeekCursor* self, TaskPriority taskID) {
|
||||||
if (!self->interf || self->messageVersion >= self->end) {
|
if (!self->interf || self->isExhausted()) {
|
||||||
wait(Future<Void>(Never()));
|
return Never();
|
||||||
throw internal_error();
|
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
loop {
|
loop {
|
||||||
|
@ -313,16 +399,7 @@ ACTOR Future<Void> serverPeekGetMore(ILogSystem::ServerPeekCursor* self, TaskPri
|
||||||
self->onlySpilled),
|
self->onlySpilled),
|
||||||
taskID))
|
taskID))
|
||||||
: Never())) {
|
: Never())) {
|
||||||
self->results = res;
|
updateCursorWithReply(self, res);
|
||||||
self->onlySpilled = res.onlySpilled;
|
|
||||||
if (res.popped.present())
|
|
||||||
self->poppedVersion =
|
|
||||||
std::min(std::max(self->poppedVersion, res.popped.get()), self->end.version);
|
|
||||||
self->rd = ArenaReader(self->results.arena, self->results.messages, Unversioned());
|
|
||||||
LogMessageVersion skipSeq = self->messageVersion;
|
|
||||||
self->hasMsg = true;
|
|
||||||
self->nextMessage();
|
|
||||||
self->advanceTo(skipSeq);
|
|
||||||
//TraceEvent("SPC_GetMoreB", self->randomID).detail("Has", self->hasMessage()).detail("End", res.end).detail("Popped", res.popped.present() ? res.popped.get() : 0);
|
//TraceEvent("SPC_GetMoreB", self->randomID).detail("Has", self->hasMessage()).detail("End", res.end).detail("Popped", res.popped.present() ? res.popped.get() : 0);
|
||||||
return Void();
|
return Void();
|
||||||
}
|
}
|
||||||
|
@ -339,11 +416,17 @@ ACTOR Future<Void> serverPeekGetMore(ILogSystem::ServerPeekCursor* self, TaskPri
|
||||||
}
|
}
|
||||||
|
|
||||||
Future<Void> ILogSystem::ServerPeekCursor::getMore(TaskPriority taskID) {
|
Future<Void> ILogSystem::ServerPeekCursor::getMore(TaskPriority taskID) {
|
||||||
//TraceEvent("SPC_GetMore", randomID).detail("HasMessage", hasMessage()).detail("More", !more.isValid() || more.isReady()).detail("MessageVersion", messageVersion.toString()).detail("End", end.toString());
|
// TraceEvent("SPC_GetMore", randomID)
|
||||||
|
// .detail("HasMessage", hasMessage())
|
||||||
|
// .detail("More", !more.isValid() || more.isReady())
|
||||||
|
// .detail("MessageVersion", messageVersion.toString())
|
||||||
|
// .detail("End", end.toString());
|
||||||
if (hasMessage() && !parallelGetMore)
|
if (hasMessage() && !parallelGetMore)
|
||||||
return Void();
|
return Void();
|
||||||
if (!more.isValid() || more.isReady()) {
|
if (!more.isValid() || more.isReady()) {
|
||||||
if (parallelGetMore || onlySpilled || futureResults.size()) {
|
if (usePeekStream && (tag.locality >= 0 || tag.locality == tagLocalityLogRouter || tag.locality == tagLocalityRemoteLog)) {
|
||||||
|
more = serverPeekStreamGetMore(this, taskID);
|
||||||
|
} else if (parallelGetMore || onlySpilled || futureResults.size()) {
|
||||||
more = serverPeekParallelGetMore(this, taskID);
|
more = serverPeekParallelGetMore(this, taskID);
|
||||||
} else {
|
} else {
|
||||||
more = serverPeekGetMore(this, taskID);
|
more = serverPeekGetMore(this, taskID);
|
||||||
|
@ -361,6 +444,12 @@ ACTOR Future<Void> serverPeekOnFailed(ILogSystem::ServerPeekCursor* self) {
|
||||||
: Never())) {
|
: Never())) {
|
||||||
return Void();
|
return Void();
|
||||||
}
|
}
|
||||||
|
when(wait(self->interf->get().present()
|
||||||
|
? IFailureMonitor::failureMonitor().onStateEqual(
|
||||||
|
self->interf->get().interf().peekStreamMessages.getEndpoint(), FailureStatus())
|
||||||
|
: Never())) {
|
||||||
|
return Void();
|
||||||
|
}
|
||||||
when(wait(self->interf->onChange())) {}
|
when(wait(self->interf->onChange())) {}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -373,9 +462,14 @@ Future<Void> ILogSystem::ServerPeekCursor::onFailed() {
|
||||||
bool ILogSystem::ServerPeekCursor::isActive() const {
|
bool ILogSystem::ServerPeekCursor::isActive() const {
|
||||||
if (!interf->get().present())
|
if (!interf->get().present())
|
||||||
return false;
|
return false;
|
||||||
if (messageVersion >= end)
|
if (isExhausted())
|
||||||
return false;
|
return false;
|
||||||
return IFailureMonitor::failureMonitor().getState(interf->get().interf().peekMessages.getEndpoint()).isAvailable();
|
return IFailureMonitor::failureMonitor()
|
||||||
|
.getState(interf->get().interf().peekMessages.getEndpoint())
|
||||||
|
.isAvailable() &&
|
||||||
|
IFailureMonitor::failureMonitor()
|
||||||
|
.getState(interf->get().interf().peekStreamMessages.getEndpoint())
|
||||||
|
.isAvailable();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ILogSystem::ServerPeekCursor::isExhausted() const {
|
bool ILogSystem::ServerPeekCursor::isExhausted() const {
|
||||||
|
|
|
@ -798,8 +798,7 @@ ACTOR static Future<Void> finishMoveKeys(Database occ,
|
||||||
|
|
||||||
// Wait for a durable quorum of servers in destServers to have keys available (readWrite)
|
// Wait for a durable quorum of servers in destServers to have keys available (readWrite)
|
||||||
// They must also have at least the transaction read version so they can't "forget" the shard
|
// They must also have at least the transaction read version so they can't "forget" the shard
|
||||||
// between
|
// between now and when this transaction commits.
|
||||||
// now and when this transaction commits.
|
|
||||||
state vector<Future<Void>> serverReady; // only for count below
|
state vector<Future<Void>> serverReady; // only for count below
|
||||||
state vector<Future<Void>> tssReady; // for waiting in parallel with tss
|
state vector<Future<Void>> tssReady; // for waiting in parallel with tss
|
||||||
state vector<StorageServerInterface> tssReadyInterfs;
|
state vector<StorageServerInterface> tssReadyInterfs;
|
||||||
|
|
|
@ -300,6 +300,7 @@ struct TLogData : NonCopyable {
|
||||||
int64_t instanceID;
|
int64_t instanceID;
|
||||||
int64_t bytesInput;
|
int64_t bytesInput;
|
||||||
int64_t bytesDurable;
|
int64_t bytesDurable;
|
||||||
|
int activePeekStreams = 0;
|
||||||
|
|
||||||
Version prevVersion;
|
Version prevVersion;
|
||||||
|
|
||||||
|
@ -477,6 +478,7 @@ struct LogData : NonCopyable, public ReferenceCounted<LogData> {
|
||||||
});
|
});
|
||||||
specialCounter(
|
specialCounter(
|
||||||
cc, "QueueDiskBytesTotal", [tLogData]() { return tLogData->rawPersistentQueue->getStorageBytes().total; });
|
cc, "QueueDiskBytesTotal", [tLogData]() { return tLogData->rawPersistentQueue->getStorageBytes().total; });
|
||||||
|
specialCounter(cc, "ActivePeekStreams", [tLogData]() { return tLogData->activePeekStreams; });
|
||||||
}
|
}
|
||||||
|
|
||||||
~LogData() {
|
~LogData() {
|
||||||
|
@ -931,14 +933,15 @@ ACTOR Future<Void> tLogPop(TLogData* self, TLogPopRequest req, Reference<LogData
|
||||||
}
|
}
|
||||||
|
|
||||||
void peekMessagesFromMemory(Reference<LogData> self,
|
void peekMessagesFromMemory(Reference<LogData> self,
|
||||||
TLogPeekRequest const& req,
|
Tag tag,
|
||||||
|
Version reqBegin,
|
||||||
BinaryWriter& messages,
|
BinaryWriter& messages,
|
||||||
Version& endVersion) {
|
Version& endVersion) {
|
||||||
OldTag oldTag = convertTag(req.tag);
|
OldTag oldTag = convertTag(tag);
|
||||||
ASSERT(!messages.getLength());
|
ASSERT(!messages.getLength());
|
||||||
|
|
||||||
auto& deque = get_version_messages(self, oldTag);
|
auto& deque = get_version_messages(self, oldTag);
|
||||||
Version begin = std::max(req.begin, self->persistentDataDurableVersion + 1);
|
Version begin = std::max(reqBegin, self->persistentDataDurableVersion + 1);
|
||||||
auto it = std::lower_bound(deque.begin(),
|
auto it = std::lower_bound(deque.begin(),
|
||||||
deque.end(),
|
deque.end(),
|
||||||
std::make_pair(begin, LengthPrefixedStringRef()),
|
std::make_pair(begin, LengthPrefixedStringRef()),
|
||||||
|
@ -963,24 +966,33 @@ void peekMessagesFromMemory(Reference<LogData> self,
|
||||||
uint32_t subVersion;
|
uint32_t subVersion;
|
||||||
rd >> messageLength >> subVersion;
|
rd >> messageLength >> subVersion;
|
||||||
messageLength += sizeof(uint16_t) + sizeof(Tag);
|
messageLength += sizeof(uint16_t) + sizeof(Tag);
|
||||||
messages << messageLength << subVersion << uint16_t(1) << req.tag;
|
messages << messageLength << subVersion << uint16_t(1) << tag;
|
||||||
messageLength -= (sizeof(subVersion) + sizeof(uint16_t) + sizeof(Tag));
|
messageLength -= (sizeof(subVersion) + sizeof(uint16_t) + sizeof(Tag));
|
||||||
messages.serializeBytes(rd.readBytes(messageLength), messageLength);
|
messages.serializeBytes(rd.readBytes(messageLength), messageLength);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ACTOR Future<Void> tLogPeekMessages(TLogData* self, TLogPeekRequest req, Reference<LogData> logData) {
|
// Common logics to peek TLog and create TLogPeekReply that serves both streaming peek or normal peek request
|
||||||
|
ACTOR template <typename PromiseType>
|
||||||
|
Future<Void> tLogPeekMessages(PromiseType replyPromise,
|
||||||
|
TLogData* self,
|
||||||
|
Reference<LogData> logData,
|
||||||
|
Version reqBegin,
|
||||||
|
Tag reqTag,
|
||||||
|
bool reqReturnIfBlocked = false,
|
||||||
|
bool reqOnlySpilled = false,
|
||||||
|
Optional<std::pair<UID, int>> reqSequence = Optional<std::pair<UID, int>>()) {
|
||||||
state BinaryWriter messages(Unversioned());
|
state BinaryWriter messages(Unversioned());
|
||||||
state BinaryWriter messages2(Unversioned());
|
state BinaryWriter messages2(Unversioned());
|
||||||
state int sequence = -1;
|
state int sequence = -1;
|
||||||
state UID peekId;
|
state UID peekId;
|
||||||
state OldTag oldTag = convertTag(req.tag);
|
state OldTag oldTag = convertTag(reqTag);
|
||||||
|
|
||||||
if (req.sequence.present()) {
|
if (reqSequence.present()) {
|
||||||
try {
|
try {
|
||||||
peekId = req.sequence.get().first;
|
peekId = reqSequence.get().first;
|
||||||
sequence = req.sequence.get().second;
|
sequence = reqSequence.get().second;
|
||||||
if (sequence >= SERVER_KNOBS->PARALLEL_GET_MORE_REQUESTS &&
|
if (sequence >= SERVER_KNOBS->PARALLEL_GET_MORE_REQUESTS &&
|
||||||
self->peekTracker.find(peekId) == self->peekTracker.end()) {
|
self->peekTracker.find(peekId) == self->peekTracker.end()) {
|
||||||
throw operation_obsolete();
|
throw operation_obsolete();
|
||||||
|
@ -989,12 +1001,12 @@ ACTOR Future<Void> tLogPeekMessages(TLogData* self, TLogPeekRequest req, Referen
|
||||||
auto& trackerData = self->peekTracker[peekId];
|
auto& trackerData = self->peekTracker[peekId];
|
||||||
trackerData.lastUpdate = now();
|
trackerData.lastUpdate = now();
|
||||||
Version ver = wait(trackerData.sequence_version[sequence].getFuture());
|
Version ver = wait(trackerData.sequence_version[sequence].getFuture());
|
||||||
req.begin = std::max(ver, req.begin);
|
reqBegin = std::max(ver, reqBegin);
|
||||||
wait(yield());
|
wait(yield());
|
||||||
}
|
}
|
||||||
} catch (Error& e) {
|
} catch (Error& e) {
|
||||||
if (e.code() == error_code_timed_out || e.code() == error_code_operation_obsolete) {
|
if (e.code() == error_code_timed_out || e.code() == error_code_operation_obsolete) {
|
||||||
req.reply.sendError(e);
|
replyPromise.sendError(e);
|
||||||
return Void();
|
return Void();
|
||||||
} else {
|
} else {
|
||||||
throw;
|
throw;
|
||||||
|
@ -1002,22 +1014,22 @@ ACTOR Future<Void> tLogPeekMessages(TLogData* self, TLogPeekRequest req, Referen
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (req.returnIfBlocked && logData->version.get() < req.begin) {
|
if (reqReturnIfBlocked && logData->version.get() < reqBegin) {
|
||||||
req.reply.sendError(end_of_stream());
|
replyPromise.sendError(end_of_stream());
|
||||||
return Void();
|
return Void();
|
||||||
}
|
}
|
||||||
|
|
||||||
//TraceEvent("TLogPeekMessages0", self->dbgid).detail("ReqBeginEpoch", req.begin.epoch).detail("ReqBeginSeq", req.begin.sequence).detail("Epoch", self->epoch()).detail("PersistentDataSeq", self->persistentDataSequence).detail("Tag1", req.tag1).detail("Tag2", req.tag2);
|
//TraceEvent("TLogPeekMessages0", self->dbgid).detail("ReqBeginEpoch", reqBegin.epoch).detail("ReqBeginSeq", reqBegin.sequence).detail("Epoch", self->epoch()).detail("PersistentDataSeq", self->persistentDataSequence).detail("Tag1", reqTag1).detail("Tag2", reqTag2);
|
||||||
// Wait until we have something to return that the caller doesn't already have
|
// Wait until we have something to return that the caller doesn't already have
|
||||||
if (logData->version.get() < req.begin) {
|
if (logData->version.get() < reqBegin) {
|
||||||
wait(logData->version.whenAtLeast(req.begin));
|
wait(logData->version.whenAtLeast(reqBegin));
|
||||||
wait(delay(SERVER_KNOBS->TLOG_PEEK_DELAY, g_network->getCurrentTask()));
|
wait(delay(SERVER_KNOBS->TLOG_PEEK_DELAY, g_network->getCurrentTask()));
|
||||||
}
|
}
|
||||||
|
|
||||||
state Version endVersion = logData->version.get() + 1;
|
state Version endVersion = logData->version.get() + 1;
|
||||||
|
|
||||||
Version poppedVer = poppedVersion(logData, oldTag);
|
Version poppedVer = poppedVersion(logData, oldTag);
|
||||||
if (poppedVer > req.begin) {
|
if (poppedVer > reqBegin) {
|
||||||
TLogPeekReply rep;
|
TLogPeekReply rep;
|
||||||
rep.maxKnownVersion = logData->version.get();
|
rep.maxKnownVersion = logData->version.get();
|
||||||
rep.minKnownCommittedVersion = 0;
|
rep.minKnownCommittedVersion = 0;
|
||||||
|
@ -1025,12 +1037,12 @@ ACTOR Future<Void> tLogPeekMessages(TLogData* self, TLogPeekRequest req, Referen
|
||||||
rep.end = poppedVer;
|
rep.end = poppedVer;
|
||||||
rep.onlySpilled = false;
|
rep.onlySpilled = false;
|
||||||
|
|
||||||
if (req.sequence.present()) {
|
if (reqSequence.present()) {
|
||||||
auto& trackerData = self->peekTracker[peekId];
|
auto& trackerData = self->peekTracker[peekId];
|
||||||
auto& sequenceData = trackerData.sequence_version[sequence + 1];
|
auto& sequenceData = trackerData.sequence_version[sequence + 1];
|
||||||
trackerData.lastUpdate = now();
|
trackerData.lastUpdate = now();
|
||||||
if (trackerData.sequence_version.size() && sequence + 1 < trackerData.sequence_version.begin()->first) {
|
if (trackerData.sequence_version.size() && sequence + 1 < trackerData.sequence_version.begin()->first) {
|
||||||
req.reply.sendError(operation_obsolete());
|
replyPromise.sendError(operation_obsolete());
|
||||||
if (!sequenceData.isSet())
|
if (!sequenceData.isSet())
|
||||||
sequenceData.sendError(operation_obsolete());
|
sequenceData.sendError(operation_obsolete());
|
||||||
return Void();
|
return Void();
|
||||||
|
@ -1038,37 +1050,37 @@ ACTOR Future<Void> tLogPeekMessages(TLogData* self, TLogPeekRequest req, Referen
|
||||||
if (sequenceData.isSet()) {
|
if (sequenceData.isSet()) {
|
||||||
if (sequenceData.getFuture().get() != rep.end) {
|
if (sequenceData.getFuture().get() != rep.end) {
|
||||||
TEST(true); // tlog peek second attempt ended at a different version
|
TEST(true); // tlog peek second attempt ended at a different version
|
||||||
req.reply.sendError(operation_obsolete());
|
replyPromise.sendError(operation_obsolete());
|
||||||
return Void();
|
return Void();
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
sequenceData.send(rep.end);
|
sequenceData.send(rep.end);
|
||||||
}
|
}
|
||||||
rep.begin = req.begin;
|
rep.begin = reqBegin;
|
||||||
}
|
}
|
||||||
|
|
||||||
req.reply.send(rep);
|
replyPromise.send(rep);
|
||||||
return Void();
|
return Void();
|
||||||
}
|
}
|
||||||
|
|
||||||
// grab messages from disk
|
// grab messages from disk
|
||||||
//TraceEvent("TLogPeekMessages", self->dbgid).detail("ReqBeginEpoch", req.begin.epoch).detail("ReqBeginSeq", req.begin.sequence).detail("Epoch", self->epoch()).detail("PersistentDataSeq", self->persistentDataSequence).detail("Tag1", req.tag1).detail("Tag2", req.tag2);
|
//TraceEvent("TLogPeekMessages", self->dbgid).detail("ReqBeginEpoch", reqBegin.epoch).detail("ReqBeginSeq", reqBegin.sequence).detail("Epoch", self->epoch()).detail("PersistentDataSeq", self->persistentDataSequence).detail("Tag1", reqTag1).detail("Tag2", reqTag2);
|
||||||
if (req.begin <= logData->persistentDataDurableVersion) {
|
if (reqBegin <= logData->persistentDataDurableVersion) {
|
||||||
// Just in case the durable version changes while we are waiting for the read, we grab this data from memory. We
|
// Just in case the durable version changes while we are waiting for the read, we grab this data from memory. We
|
||||||
// may or may not actually send it depending on whether we get enough data from disk. SOMEDAY: Only do this if
|
// may or may not actually send it depending on whether we get enough data from disk. SOMEDAY: Only do this if
|
||||||
// an initial attempt to read from disk results in insufficient data and the required data is no longer in
|
// an initial attempt to read from disk results in insufficient data and the required data is no longer in
|
||||||
// memory SOMEDAY: Should we only send part of the messages we collected, to actually limit the size of the
|
// memory SOMEDAY: Should we only send part of the messages we collected, to actually limit the size of the
|
||||||
// result?
|
// result?
|
||||||
|
|
||||||
peekMessagesFromMemory(logData, req, messages2, endVersion);
|
peekMessagesFromMemory(logData, reqTag, reqBegin, messages2, endVersion);
|
||||||
|
|
||||||
RangeResult kvs = wait(self->persistentData->readRange(
|
RangeResult kvs = wait(self->persistentData->readRange(
|
||||||
KeyRangeRef(persistTagMessagesKey(logData->logId, oldTag, req.begin),
|
KeyRangeRef(persistTagMessagesKey(logData->logId, oldTag, reqBegin),
|
||||||
persistTagMessagesKey(logData->logId, oldTag, logData->persistentDataDurableVersion + 1)),
|
persistTagMessagesKey(logData->logId, oldTag, logData->persistentDataDurableVersion + 1)),
|
||||||
SERVER_KNOBS->DESIRED_TOTAL_BYTES,
|
SERVER_KNOBS->DESIRED_TOTAL_BYTES,
|
||||||
SERVER_KNOBS->DESIRED_TOTAL_BYTES));
|
SERVER_KNOBS->DESIRED_TOTAL_BYTES));
|
||||||
|
|
||||||
//TraceEvent("TLogPeekResults", self->dbgid).detail("ForAddress", req.reply.getEndpoint().getPrimaryAddress()).detail("Tag1Results", s1).detail("Tag2Results", s2).detail("Tag1ResultsLim", kv1.size()).detail("Tag2ResultsLim", kv2.size()).detail("Tag1ResultsLast", kv1.size() ? kv1[0].key : "").detail("Tag2ResultsLast", kv2.size() ? kv2[0].key : "").detail("Limited", limited).detail("NextEpoch", next_pos.epoch).detail("NextSeq", next_pos.sequence).detail("NowEpoch", self->epoch()).detail("NowSeq", self->sequence.getNextSequence());
|
//TraceEvent("TLogPeekResults", self->dbgid).detail("ForAddress", replyPromise.getEndpoint().getPrimaryAddress()).detail("Tag1Results", s1).detail("Tag2Results", s2).detail("Tag1ResultsLim", kv1.size()).detail("Tag2ResultsLim", kv2.size()).detail("Tag1ResultsLast", kv1.size() ? kv1[0].key : "").detail("Tag2ResultsLast", kv2.size() ? kv2[0].key : "").detail("Limited", limited).detail("NextEpoch", next_pos.epoch).detail("NextSeq", next_pos.sequence).detail("NowEpoch", self->epoch()).detail("NowSeq", self->sequence.getNextSequence());
|
||||||
|
|
||||||
for (auto& kv : kvs) {
|
for (auto& kv : kvs) {
|
||||||
auto ver = decodeTagMessagesKey(kv.key);
|
auto ver = decodeTagMessagesKey(kv.key);
|
||||||
|
@ -1080,7 +1092,7 @@ ACTOR Future<Void> tLogPeekMessages(TLogData* self, TLogPeekRequest req, Referen
|
||||||
uint32_t subVersion;
|
uint32_t subVersion;
|
||||||
rd >> messageLength >> subVersion;
|
rd >> messageLength >> subVersion;
|
||||||
messageLength += sizeof(uint16_t) + sizeof(Tag);
|
messageLength += sizeof(uint16_t) + sizeof(Tag);
|
||||||
messages << messageLength << subVersion << uint16_t(1) << req.tag;
|
messages << messageLength << subVersion << uint16_t(1) << reqTag;
|
||||||
messageLength -= (sizeof(subVersion) + sizeof(uint16_t) + sizeof(Tag));
|
messageLength -= (sizeof(subVersion) + sizeof(uint16_t) + sizeof(Tag));
|
||||||
messages.serializeBytes(rd.readBytes(messageLength), messageLength);
|
messages.serializeBytes(rd.readBytes(messageLength), messageLength);
|
||||||
}
|
}
|
||||||
|
@ -1091,39 +1103,79 @@ ACTOR Future<Void> tLogPeekMessages(TLogData* self, TLogPeekRequest req, Referen
|
||||||
else
|
else
|
||||||
messages.serializeBytes(messages2.toValue());
|
messages.serializeBytes(messages2.toValue());
|
||||||
} else {
|
} else {
|
||||||
peekMessagesFromMemory(logData, req, messages, endVersion);
|
peekMessagesFromMemory(logData, reqTag, reqBegin, messages, endVersion);
|
||||||
//TraceEvent("TLogPeekResults", self->dbgid).detail("ForAddress", req.reply.getEndpoint().getPrimaryAddress()).detail("MessageBytes", messages.getLength()).detail("NextEpoch", next_pos.epoch).detail("NextSeq", next_pos.sequence).detail("NowSeq", self->sequence.getNextSequence());
|
//TraceEvent("TLogPeekResults", self->dbgid).detail("ForAddress", replyPromise.getEndpoint().getPrimaryAddress()).detail("MessageBytes", messages.getLength()).detail("NextEpoch", next_pos.epoch).detail("NextSeq", next_pos.sequence).detail("NowSeq", self->sequence.getNextSequence());
|
||||||
}
|
}
|
||||||
|
|
||||||
TLogPeekReply reply;
|
TLogPeekReply reply;
|
||||||
reply.maxKnownVersion = logData->version.get();
|
reply.maxKnownVersion = logData->version.get();
|
||||||
reply.minKnownCommittedVersion = 0;
|
reply.minKnownCommittedVersion = 0;
|
||||||
reply.onlySpilled = false;
|
reply.onlySpilled = false;
|
||||||
reply.messages = messages.toValue();
|
reply.messages = StringRef(reply.arena, messages.toValue());
|
||||||
reply.end = endVersion;
|
reply.end = endVersion;
|
||||||
|
|
||||||
//TraceEvent("TlogPeek", self->dbgid).detail("LogId", logData->logId).detail("EndVer", reply.end).detail("MsgBytes", reply.messages.expectedSize()).detail("ForAddress", req.reply.getEndpoint().getPrimaryAddress());
|
//TraceEvent("TlogPeek", self->dbgid).detail("LogId", logData->logId).detail("EndVer", reply.end).detail("MsgBytes", reply.messages.expectedSize()).detail("ForAddress", replyPromise.getEndpoint().getPrimaryAddress());
|
||||||
|
|
||||||
if (req.sequence.present()) {
|
if (reqSequence.present()) {
|
||||||
auto& trackerData = self->peekTracker[peekId];
|
auto& trackerData = self->peekTracker[peekId];
|
||||||
trackerData.lastUpdate = now();
|
trackerData.lastUpdate = now();
|
||||||
auto& sequenceData = trackerData.sequence_version[sequence + 1];
|
auto& sequenceData = trackerData.sequence_version[sequence + 1];
|
||||||
if (sequenceData.isSet()) {
|
if (sequenceData.isSet()) {
|
||||||
if (sequenceData.getFuture().get() != reply.end) {
|
if (sequenceData.getFuture().get() != reply.end) {
|
||||||
TEST(true); // tlog peek second attempt ended at a different version (2)
|
TEST(true); // tlog peek second attempt ended at a different version (2)
|
||||||
req.reply.sendError(operation_obsolete());
|
replyPromise.sendError(operation_obsolete());
|
||||||
return Void();
|
return Void();
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
sequenceData.send(reply.end);
|
sequenceData.send(reply.end);
|
||||||
}
|
}
|
||||||
reply.begin = req.begin;
|
reply.begin = reqBegin;
|
||||||
}
|
}
|
||||||
|
|
||||||
req.reply.send(reply);
|
replyPromise.send(reply);
|
||||||
return Void();
|
return Void();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// This actor keep pushing TLogPeekStreamReply until it's removed from the cluster or should recover
|
||||||
|
ACTOR Future<Void> tLogPeekStream(TLogData* self, TLogPeekStreamRequest req, Reference<LogData> logData) {
|
||||||
|
self->activePeekStreams++;
|
||||||
|
|
||||||
|
state Version begin = req.begin;
|
||||||
|
state bool onlySpilled = false;
|
||||||
|
req.reply.setByteLimit(std::min(SERVER_KNOBS->MAXIMUM_PEEK_BYTES, req.limitBytes));
|
||||||
|
loop {
|
||||||
|
state TLogPeekStreamReply reply;
|
||||||
|
state Promise<TLogPeekReply> promise;
|
||||||
|
state Future<TLogPeekReply> future(promise.getFuture());
|
||||||
|
try {
|
||||||
|
wait(req.reply.onReady() && store(reply.rep, future) &&
|
||||||
|
tLogPeekMessages(promise, self, logData, begin, req.tag, req.returnIfBlocked, onlySpilled));
|
||||||
|
|
||||||
|
reply.rep.begin = begin;
|
||||||
|
req.reply.send(reply);
|
||||||
|
begin = reply.rep.end;
|
||||||
|
onlySpilled = reply.rep.onlySpilled;
|
||||||
|
if (reply.rep.end > logData->version.get()) {
|
||||||
|
wait(delay(SERVER_KNOBS->TLOG_PEEK_DELAY, g_network->getCurrentTask()));
|
||||||
|
} else {
|
||||||
|
wait(delay(0, g_network->getCurrentTask()));
|
||||||
|
}
|
||||||
|
} catch (Error& e) {
|
||||||
|
self->activePeekStreams--;
|
||||||
|
TraceEvent(SevDebug, "TLogPeekStreamEnd", logData->logId)
|
||||||
|
.detail("PeerAddr", req.reply.getEndpoint().getPrimaryAddress())
|
||||||
|
.error(e, true);
|
||||||
|
|
||||||
|
if (e.code() == error_code_end_of_stream || e.code() == error_code_operation_obsolete) {
|
||||||
|
req.reply.sendError(e);
|
||||||
|
return Void();
|
||||||
|
} else {
|
||||||
|
throw;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
ACTOR Future<Void> doQueueCommit(TLogData* self, Reference<LogData> logData) {
|
ACTOR Future<Void> doQueueCommit(TLogData* self, Reference<LogData> logData) {
|
||||||
state Version ver = logData->version.get();
|
state Version ver = logData->version.get();
|
||||||
state Version commitNumber = self->queueCommitBegin + 1;
|
state Version commitNumber = self->queueCommitBegin + 1;
|
||||||
|
@ -1288,7 +1340,13 @@ ACTOR Future<Void> serveTLogInterface(TLogData* self,
|
||||||
PromiseStream<Void> warningCollectorInput) {
|
PromiseStream<Void> warningCollectorInput) {
|
||||||
loop choose {
|
loop choose {
|
||||||
when(TLogPeekRequest req = waitNext(tli.peekMessages.getFuture())) {
|
when(TLogPeekRequest req = waitNext(tli.peekMessages.getFuture())) {
|
||||||
logData->addActor.send(tLogPeekMessages(self, req, logData));
|
logData->addActor.send(tLogPeekMessages(
|
||||||
|
req.reply, self, logData, req.begin, req.tag, req.returnIfBlocked, req.onlySpilled, req.sequence));
|
||||||
|
}
|
||||||
|
when(TLogPeekStreamRequest req = waitNext(tli.peekStreamMessages.getFuture())) {
|
||||||
|
TraceEvent(SevDebug, "TLogPeekStream", logData->logId)
|
||||||
|
.detail("Token", tli.peekStreamMessages.getEndpoint().token);
|
||||||
|
logData->addActor.send(tLogPeekStream(self, req, logData));
|
||||||
}
|
}
|
||||||
when(TLogPopRequest req = waitNext(tli.popMessages.getFuture())) {
|
when(TLogPopRequest req = waitNext(tli.popMessages.getFuture())) {
|
||||||
logData->addActor.send(tLogPop(self, req, logData));
|
logData->addActor.send(tLogPop(self, req, logData));
|
||||||
|
@ -1435,6 +1493,7 @@ ACTOR Future<Void> restorePersistentState(TLogData* self, LocalityData locality)
|
||||||
recruited.initEndpoints();
|
recruited.initEndpoints();
|
||||||
|
|
||||||
DUMPTOKEN(recruited.peekMessages);
|
DUMPTOKEN(recruited.peekMessages);
|
||||||
|
DUMPTOKEN(recruited.peekStreamMessages);
|
||||||
DUMPTOKEN(recruited.popMessages);
|
DUMPTOKEN(recruited.popMessages);
|
||||||
DUMPTOKEN(recruited.commit);
|
DUMPTOKEN(recruited.commit);
|
||||||
DUMPTOKEN(recruited.lock);
|
DUMPTOKEN(recruited.lock);
|
||||||
|
@ -1574,7 +1633,7 @@ ACTOR Future<Void> tLog(IKeyValueStore* persistentData,
|
||||||
state TLogData self(tlogId, workerID, persistentData, persistentQueue, db);
|
state TLogData self(tlogId, workerID, persistentData, persistentQueue, db);
|
||||||
state Future<Void> error = actorCollection(self.sharedActors.getFuture());
|
state Future<Void> error = actorCollection(self.sharedActors.getFuture());
|
||||||
|
|
||||||
TraceEvent("SharedTlog", tlogId).log();
|
TraceEvent("SharedTlog", tlogId).detail("Version", "4.6");
|
||||||
|
|
||||||
try {
|
try {
|
||||||
wait(restorePersistentState(&self, locality));
|
wait(restorePersistentState(&self, locality));
|
||||||
|
|
|
@ -276,6 +276,7 @@ struct TLogData : NonCopyable {
|
||||||
int64_t targetVolatileBytes; // The number of bytes of mutations this TLog should hold in memory before spilling.
|
int64_t targetVolatileBytes; // The number of bytes of mutations this TLog should hold in memory before spilling.
|
||||||
int64_t overheadBytesInput;
|
int64_t overheadBytesInput;
|
||||||
int64_t overheadBytesDurable;
|
int64_t overheadBytesDurable;
|
||||||
|
int activePeekStreams = 0;
|
||||||
|
|
||||||
WorkerCache<TLogInterface> tlogCache;
|
WorkerCache<TLogInterface> tlogCache;
|
||||||
|
|
||||||
|
@ -572,6 +573,7 @@ struct LogData : NonCopyable, public ReferenceCounted<LogData> {
|
||||||
});
|
});
|
||||||
specialCounter(
|
specialCounter(
|
||||||
cc, "QueueDiskBytesTotal", [tLogData]() { return tLogData->rawPersistentQueue->getStorageBytes().total; });
|
cc, "QueueDiskBytesTotal", [tLogData]() { return tLogData->rawPersistentQueue->getStorageBytes().total; });
|
||||||
|
specialCounter(cc, "ActivePeekStreams", [tLogData]() { return tLogData->activePeekStreams; });
|
||||||
}
|
}
|
||||||
|
|
||||||
~LogData() {
|
~LogData() {
|
||||||
|
@ -1172,15 +1174,16 @@ std::deque<std::pair<Version, LengthPrefixedStringRef>>& getVersionMessages(Refe
|
||||||
};
|
};
|
||||||
|
|
||||||
void peekMessagesFromMemory(Reference<LogData> self,
|
void peekMessagesFromMemory(Reference<LogData> self,
|
||||||
TLogPeekRequest const& req,
|
Tag tag,
|
||||||
|
Version begin,
|
||||||
BinaryWriter& messages,
|
BinaryWriter& messages,
|
||||||
Version& endVersion) {
|
Version& endVersion) {
|
||||||
ASSERT(!messages.getLength());
|
ASSERT(!messages.getLength());
|
||||||
|
|
||||||
auto& deque = getVersionMessages(self, req.tag);
|
auto& deque = getVersionMessages(self, tag);
|
||||||
//TraceEvent("TLogPeekMem", self->dbgid).detail("Tag", req.tag1).detail("PDS", self->persistentDataSequence).detail("PDDS", self->persistentDataDurableSequence).detail("Oldest", map1.empty() ? 0 : map1.begin()->key ).detail("OldestMsgCount", map1.empty() ? 0 : map1.begin()->value.size());
|
//TraceEvent("TLogPeekMem", self->dbgid).detail("Tag", req.tag1).detail("PDS", self->persistentDataSequence).detail("PDDS", self->persistentDataDurableSequence).detail("Oldest", map1.empty() ? 0 : map1.begin()->key ).detail("OldestMsgCount", map1.empty() ? 0 : map1.begin()->value.size());
|
||||||
|
|
||||||
Version begin = std::max(req.begin, self->persistentDataDurableVersion + 1);
|
begin = std::max(begin, self->persistentDataDurableVersion + 1);
|
||||||
auto it = std::lower_bound(deque.begin(),
|
auto it = std::lower_bound(deque.begin(),
|
||||||
deque.end(),
|
deque.end(),
|
||||||
std::make_pair(begin, LengthPrefixedStringRef()),
|
std::make_pair(begin, LengthPrefixedStringRef()),
|
||||||
|
@ -1203,29 +1206,38 @@ void peekMessagesFromMemory(Reference<LogData> self,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ACTOR Future<Void> tLogPeekMessages(TLogData* self, TLogPeekRequest req, Reference<LogData> logData) {
|
// Common logics to peek TLog and create TLogPeekReply that serves both streaming peek or normal peek request
|
||||||
|
ACTOR template <typename PromiseType>
|
||||||
|
Future<Void> tLogPeekMessages(PromiseType replyPromise,
|
||||||
|
TLogData* self,
|
||||||
|
Reference<LogData> logData,
|
||||||
|
Version reqBegin,
|
||||||
|
Tag reqTag,
|
||||||
|
bool reqReturnIfBlocked = false,
|
||||||
|
bool reqOnlySpilled = false,
|
||||||
|
Optional<std::pair<UID, int>> reqSequence = Optional<std::pair<UID, int>>()) {
|
||||||
state BinaryWriter messages(Unversioned());
|
state BinaryWriter messages(Unversioned());
|
||||||
state BinaryWriter messages2(Unversioned());
|
state BinaryWriter messages2(Unversioned());
|
||||||
state int sequence = -1;
|
state int sequence = -1;
|
||||||
state UID peekId;
|
state UID peekId;
|
||||||
state double queueStart = now();
|
state double queueStart = now();
|
||||||
|
|
||||||
if (req.tag.locality == tagLocalityTxs && req.tag.id >= logData->txsTags && logData->txsTags > 0) {
|
if (reqTag.locality == tagLocalityTxs && reqTag.id >= logData->txsTags && logData->txsTags > 0) {
|
||||||
req.tag.id = req.tag.id % logData->txsTags;
|
reqTag.id = reqTag.id % logData->txsTags;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (req.sequence.present()) {
|
if (reqSequence.present()) {
|
||||||
try {
|
try {
|
||||||
peekId = req.sequence.get().first;
|
peekId = reqSequence.get().first;
|
||||||
sequence = req.sequence.get().second;
|
sequence = reqSequence.get().second;
|
||||||
if (sequence >= SERVER_KNOBS->PARALLEL_GET_MORE_REQUESTS &&
|
if (sequence >= SERVER_KNOBS->PARALLEL_GET_MORE_REQUESTS &&
|
||||||
logData->peekTracker.find(peekId) == logData->peekTracker.end()) {
|
logData->peekTracker.find(peekId) == logData->peekTracker.end()) {
|
||||||
throw operation_obsolete();
|
throw operation_obsolete();
|
||||||
}
|
}
|
||||||
auto& trackerData = logData->peekTracker[peekId];
|
auto& trackerData = logData->peekTracker[peekId];
|
||||||
if (sequence == 0 && trackerData.sequence_version.find(0) == trackerData.sequence_version.end()) {
|
if (sequence == 0 && trackerData.sequence_version.find(0) == trackerData.sequence_version.end()) {
|
||||||
trackerData.tag = req.tag;
|
trackerData.tag = reqTag;
|
||||||
trackerData.sequence_version[0].send(std::make_pair(req.begin, req.onlySpilled));
|
trackerData.sequence_version[0].send(std::make_pair(reqBegin, reqOnlySpilled));
|
||||||
}
|
}
|
||||||
auto seqBegin = trackerData.sequence_version.begin();
|
auto seqBegin = trackerData.sequence_version.begin();
|
||||||
while (trackerData.sequence_version.size() &&
|
while (trackerData.sequence_version.size() &&
|
||||||
|
@ -1252,12 +1264,12 @@ ACTOR Future<Void> tLogPeekMessages(TLogData* self, TLogPeekRequest req, Referen
|
||||||
trackerData.lastUpdate = now();
|
trackerData.lastUpdate = now();
|
||||||
std::pair<Version, bool> prevPeekData = wait(fPrevPeekData);
|
std::pair<Version, bool> prevPeekData = wait(fPrevPeekData);
|
||||||
|
|
||||||
req.begin = std::max(prevPeekData.first, req.begin);
|
reqBegin = std::max(prevPeekData.first, reqBegin);
|
||||||
req.onlySpilled = prevPeekData.second;
|
reqOnlySpilled = prevPeekData.second;
|
||||||
wait(yield());
|
wait(yield());
|
||||||
} catch (Error& e) {
|
} catch (Error& e) {
|
||||||
if (e.code() == error_code_timed_out || e.code() == error_code_operation_obsolete) {
|
if (e.code() == error_code_timed_out || e.code() == error_code_operation_obsolete) {
|
||||||
req.reply.sendError(e);
|
replyPromise.sendError(e);
|
||||||
return Void();
|
return Void();
|
||||||
} else {
|
} else {
|
||||||
throw;
|
throw;
|
||||||
|
@ -1267,32 +1279,32 @@ ACTOR Future<Void> tLogPeekMessages(TLogData* self, TLogPeekRequest req, Referen
|
||||||
|
|
||||||
state double blockStart = now();
|
state double blockStart = now();
|
||||||
|
|
||||||
if (req.returnIfBlocked && logData->version.get() < req.begin) {
|
if (reqReturnIfBlocked && logData->version.get() < reqBegin) {
|
||||||
req.reply.sendError(end_of_stream());
|
replyPromise.sendError(end_of_stream());
|
||||||
if (req.sequence.present()) {
|
if (reqSequence.present()) {
|
||||||
auto& trackerData = logData->peekTracker[peekId];
|
auto& trackerData = logData->peekTracker[peekId];
|
||||||
auto& sequenceData = trackerData.sequence_version[sequence + 1];
|
auto& sequenceData = trackerData.sequence_version[sequence + 1];
|
||||||
if (!sequenceData.isSet()) {
|
if (!sequenceData.isSet()) {
|
||||||
sequenceData.send(std::make_pair(req.begin, req.onlySpilled));
|
sequenceData.send(std::make_pair(reqBegin, reqOnlySpilled));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return Void();
|
return Void();
|
||||||
}
|
}
|
||||||
|
|
||||||
//TraceEvent("TLogPeekMessages0", self->dbgid).detail("ReqBeginEpoch", req.begin.epoch).detail("ReqBeginSeq", req.begin.sequence).detail("Epoch", self->epoch()).detail("PersistentDataSeq", self->persistentDataSequence).detail("Tag1", req.tag1).detail("Tag2", req.tag2);
|
//TraceEvent("TLogPeekMessages0", self->dbgid).detail("ReqBeginEpoch", reqBegin.epoch).detail("ReqBeginSeq", reqBegin.sequence).detail("Epoch", self->epoch()).detail("PersistentDataSeq", self->persistentDataSequence).detail("Tag1", reqTag1).detail("Tag2", reqTag2);
|
||||||
// Wait until we have something to return that the caller doesn't already have
|
// Wait until we have something to return that the caller doesn't already have
|
||||||
if (logData->version.get() < req.begin) {
|
if (logData->version.get() < reqBegin) {
|
||||||
wait(logData->version.whenAtLeast(req.begin));
|
wait(logData->version.whenAtLeast(reqBegin));
|
||||||
wait(delay(SERVER_KNOBS->TLOG_PEEK_DELAY, g_network->getCurrentTask()));
|
wait(delay(SERVER_KNOBS->TLOG_PEEK_DELAY, g_network->getCurrentTask()));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (logData->locality != tagLocalitySatellite && req.tag.locality == tagLocalityLogRouter) {
|
if (logData->locality != tagLocalitySatellite && reqTag.locality == tagLocalityLogRouter) {
|
||||||
wait(self->concurrentLogRouterReads.take());
|
wait(self->concurrentLogRouterReads.take());
|
||||||
state FlowLock::Releaser globalReleaser(self->concurrentLogRouterReads);
|
state FlowLock::Releaser globalReleaser(self->concurrentLogRouterReads);
|
||||||
wait(delay(0.0, TaskPriority::Low));
|
wait(delay(0.0, TaskPriority::Low));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (req.begin <= logData->persistentDataDurableVersion && req.tag.locality != tagLocalityTxs && req.tag != txsTag) {
|
if (reqBegin <= logData->persistentDataDurableVersion && reqTag.locality != tagLocalityTxs && reqTag != txsTag) {
|
||||||
// Reading spilled data will almost always imply that the storage server is >5s behind the rest
|
// Reading spilled data will almost always imply that the storage server is >5s behind the rest
|
||||||
// of the cluster. We shouldn't prioritize spending CPU on helping this server catch up
|
// of the cluster. We shouldn't prioritize spending CPU on helping this server catch up
|
||||||
// slightly faster over keeping the rest of the cluster operating normally.
|
// slightly faster over keeping the rest of the cluster operating normally.
|
||||||
|
@ -1303,8 +1315,8 @@ ACTOR Future<Void> tLogPeekMessages(TLogData* self, TLogPeekRequest req, Referen
|
||||||
|
|
||||||
state double workStart = now();
|
state double workStart = now();
|
||||||
|
|
||||||
Version poppedVer = poppedVersion(logData, req.tag);
|
Version poppedVer = poppedVersion(logData, reqTag);
|
||||||
if (poppedVer > req.begin) {
|
if (poppedVer > reqBegin) {
|
||||||
TLogPeekReply rep;
|
TLogPeekReply rep;
|
||||||
rep.maxKnownVersion = logData->version.get();
|
rep.maxKnownVersion = logData->version.get();
|
||||||
rep.minKnownCommittedVersion = logData->minKnownCommittedVersion;
|
rep.minKnownCommittedVersion = logData->minKnownCommittedVersion;
|
||||||
|
@ -1312,12 +1324,12 @@ ACTOR Future<Void> tLogPeekMessages(TLogData* self, TLogPeekRequest req, Referen
|
||||||
rep.end = poppedVer;
|
rep.end = poppedVer;
|
||||||
rep.onlySpilled = false;
|
rep.onlySpilled = false;
|
||||||
|
|
||||||
if (req.sequence.present()) {
|
if (reqSequence.present()) {
|
||||||
auto& trackerData = logData->peekTracker[peekId];
|
auto& trackerData = logData->peekTracker[peekId];
|
||||||
auto& sequenceData = trackerData.sequence_version[sequence + 1];
|
auto& sequenceData = trackerData.sequence_version[sequence + 1];
|
||||||
trackerData.lastUpdate = now();
|
trackerData.lastUpdate = now();
|
||||||
if (trackerData.sequence_version.size() && sequence + 1 < trackerData.sequence_version.begin()->first) {
|
if (trackerData.sequence_version.size() && sequence + 1 < trackerData.sequence_version.begin()->first) {
|
||||||
req.reply.sendError(operation_obsolete());
|
replyPromise.sendError(operation_obsolete());
|
||||||
if (!sequenceData.isSet())
|
if (!sequenceData.isSet())
|
||||||
sequenceData.sendError(operation_obsolete());
|
sequenceData.sendError(operation_obsolete());
|
||||||
return Void();
|
return Void();
|
||||||
|
@ -1325,16 +1337,16 @@ ACTOR Future<Void> tLogPeekMessages(TLogData* self, TLogPeekRequest req, Referen
|
||||||
if (sequenceData.isSet()) {
|
if (sequenceData.isSet()) {
|
||||||
if (sequenceData.getFuture().get().first != rep.end) {
|
if (sequenceData.getFuture().get().first != rep.end) {
|
||||||
TEST(true); // tlog peek second attempt ended at a different version
|
TEST(true); // tlog peek second attempt ended at a different version
|
||||||
req.reply.sendError(operation_obsolete());
|
replyPromise.sendError(operation_obsolete());
|
||||||
return Void();
|
return Void();
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
sequenceData.send(std::make_pair(rep.end, rep.onlySpilled));
|
sequenceData.send(std::make_pair(rep.end, rep.onlySpilled));
|
||||||
}
|
}
|
||||||
rep.begin = req.begin;
|
rep.begin = reqBegin;
|
||||||
}
|
}
|
||||||
|
|
||||||
req.reply.send(rep);
|
replyPromise.send(rep);
|
||||||
return Void();
|
return Void();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1342,27 +1354,27 @@ ACTOR Future<Void> tLogPeekMessages(TLogData* self, TLogPeekRequest req, Referen
|
||||||
state bool onlySpilled = false;
|
state bool onlySpilled = false;
|
||||||
|
|
||||||
// grab messages from disk
|
// grab messages from disk
|
||||||
//TraceEvent("TLogPeekMessages", self->dbgid).detail("ReqBeginEpoch", req.begin.epoch).detail("ReqBeginSeq", req.begin.sequence).detail("Epoch", self->epoch()).detail("PersistentDataSeq", self->persistentDataSequence).detail("Tag1", req.tag1).detail("Tag2", req.tag2);
|
//TraceEvent("TLogPeekMessages", self->dbgid).detail("ReqBeginEpoch", reqBegin.epoch).detail("ReqBeginSeq", reqBegin.sequence).detail("Epoch", self->epoch()).detail("PersistentDataSeq", self->persistentDataSequence).detail("Tag1", reqTag1).detail("Tag2", reqTag2);
|
||||||
if (req.begin <= logData->persistentDataDurableVersion) {
|
if (reqBegin <= logData->persistentDataDurableVersion) {
|
||||||
// Just in case the durable version changes while we are waiting for the read, we grab this data from memory. We
|
// Just in case the durable version changes while we are waiting for the read, we grab this data from memory. We
|
||||||
// may or may not actually send it depending on whether we get enough data from disk. SOMEDAY: Only do this if
|
// may or may not actually send it depending on whether we get enough data from disk. SOMEDAY: Only do this if
|
||||||
// an initial attempt to read from disk results in insufficient data and the required data is no longer in
|
// an initial attempt to read from disk results in insufficient data and the required data is no longer in
|
||||||
// memory SOMEDAY: Should we only send part of the messages we collected, to actually limit the size of the
|
// memory SOMEDAY: Should we only send part of the messages we collected, to actually limit the size of the
|
||||||
// result?
|
// result?
|
||||||
|
|
||||||
if (req.onlySpilled) {
|
if (reqOnlySpilled) {
|
||||||
endVersion = logData->persistentDataDurableVersion + 1;
|
endVersion = logData->persistentDataDurableVersion + 1;
|
||||||
} else {
|
} else {
|
||||||
peekMessagesFromMemory(logData, req, messages2, endVersion);
|
peekMessagesFromMemory(logData, reqTag, reqBegin, messages2, endVersion);
|
||||||
}
|
}
|
||||||
|
|
||||||
RangeResult kvs = wait(self->persistentData->readRange(
|
RangeResult kvs = wait(self->persistentData->readRange(
|
||||||
KeyRangeRef(persistTagMessagesKey(logData->logId, req.tag, req.begin),
|
KeyRangeRef(persistTagMessagesKey(logData->logId, reqTag, reqBegin),
|
||||||
persistTagMessagesKey(logData->logId, req.tag, logData->persistentDataDurableVersion + 1)),
|
persistTagMessagesKey(logData->logId, reqTag, logData->persistentDataDurableVersion + 1)),
|
||||||
SERVER_KNOBS->DESIRED_TOTAL_BYTES,
|
SERVER_KNOBS->DESIRED_TOTAL_BYTES,
|
||||||
SERVER_KNOBS->DESIRED_TOTAL_BYTES));
|
SERVER_KNOBS->DESIRED_TOTAL_BYTES));
|
||||||
|
|
||||||
//TraceEvent("TLogPeekResults", self->dbgid).detail("ForAddress", req.reply.getEndpoint().address).detail("Tag1Results", s1).detail("Tag2Results", s2).detail("Tag1ResultsLim", kv1.size()).detail("Tag2ResultsLim", kv2.size()).detail("Tag1ResultsLast", kv1.size() ? kv1[0].key : "").detail("Tag2ResultsLast", kv2.size() ? kv2[0].key : "").detail("Limited", limited).detail("NextEpoch", next_pos.epoch).detail("NextSeq", next_pos.sequence).detail("NowEpoch", self->epoch()).detail("NowSeq", self->sequence.getNextSequence());
|
//TraceEvent("TLogPeekResults", self->dbgid).detail("ForAddress", replyPromise.getEndpoint().address).detail("Tag1Results", s1).detail("Tag2Results", s2).detail("Tag1ResultsLim", kv1.size()).detail("Tag2ResultsLim", kv2.size()).detail("Tag1ResultsLast", kv1.size() ? kv1[0].key : "").detail("Tag2ResultsLast", kv2.size() ? kv2[0].key : "").detail("Limited", limited).detail("NextEpoch", next_pos.epoch).detail("NextSeq", next_pos.sequence).detail("NowEpoch", self->epoch()).detail("NowSeq", self->sequence.getNextSequence());
|
||||||
|
|
||||||
for (auto& kv : kvs) {
|
for (auto& kv : kvs) {
|
||||||
auto ver = decodeTagMessagesKey(kv.key);
|
auto ver = decodeTagMessagesKey(kv.key);
|
||||||
|
@ -1377,20 +1389,20 @@ ACTOR Future<Void> tLogPeekMessages(TLogData* self, TLogPeekRequest req, Referen
|
||||||
messages.serializeBytes(messages2.toValue());
|
messages.serializeBytes(messages2.toValue());
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
peekMessagesFromMemory(logData, req, messages, endVersion);
|
peekMessagesFromMemory(logData, reqTag, reqBegin, messages, endVersion);
|
||||||
//TraceEvent("TLogPeekResults", self->dbgid).detail("ForAddress", req.reply.getEndpoint().address).detail("MessageBytes", messages.getLength()).detail("NextEpoch", next_pos.epoch).detail("NextSeq", next_pos.sequence).detail("NowSeq", self->sequence.getNextSequence());
|
//TraceEvent("TLogPeekResults", self->dbgid).detail("ForAddress", replyPromise.getEndpoint().address).detail("MessageBytes", messages.getLength()).detail("NextEpoch", next_pos.epoch).detail("NextSeq", next_pos.sequence).detail("NowSeq", self->sequence.getNextSequence());
|
||||||
}
|
}
|
||||||
|
|
||||||
TLogPeekReply reply;
|
TLogPeekReply reply;
|
||||||
reply.maxKnownVersion = logData->version.get();
|
reply.maxKnownVersion = logData->version.get();
|
||||||
reply.minKnownCommittedVersion = logData->minKnownCommittedVersion;
|
reply.minKnownCommittedVersion = logData->minKnownCommittedVersion;
|
||||||
reply.messages = messages.toValue();
|
reply.messages = StringRef(reply.arena, messages.toValue());
|
||||||
reply.end = endVersion;
|
reply.end = endVersion;
|
||||||
reply.onlySpilled = onlySpilled;
|
reply.onlySpilled = onlySpilled;
|
||||||
|
|
||||||
//TraceEvent("TlogPeek", self->dbgid).detail("LogId", logData->logId).detail("EndVer", reply.end).detail("MsgBytes", reply.messages.expectedSize()).detail("ForAddress", req.reply.getEndpoint().address);
|
//TraceEvent("TlogPeek", self->dbgid).detail("LogId", logData->logId).detail("EndVer", reply.end).detail("MsgBytes", reply.messages.expectedSize()).detail("ForAddress", replyPromise.getEndpoint().address);
|
||||||
|
|
||||||
if (req.sequence.present()) {
|
if (reqSequence.present()) {
|
||||||
auto& trackerData = logData->peekTracker[peekId];
|
auto& trackerData = logData->peekTracker[peekId];
|
||||||
trackerData.lastUpdate = now();
|
trackerData.lastUpdate = now();
|
||||||
|
|
||||||
|
@ -1414,7 +1426,7 @@ ACTOR Future<Void> tLogPeekMessages(TLogData* self, TLogPeekRequest req, Referen
|
||||||
|
|
||||||
auto& sequenceData = trackerData.sequence_version[sequence + 1];
|
auto& sequenceData = trackerData.sequence_version[sequence + 1];
|
||||||
if (trackerData.sequence_version.size() && sequence + 1 < trackerData.sequence_version.begin()->first) {
|
if (trackerData.sequence_version.size() && sequence + 1 < trackerData.sequence_version.begin()->first) {
|
||||||
req.reply.sendError(operation_obsolete());
|
replyPromise.sendError(operation_obsolete());
|
||||||
if (!sequenceData.isSet())
|
if (!sequenceData.isSet())
|
||||||
sequenceData.sendError(operation_obsolete());
|
sequenceData.sendError(operation_obsolete());
|
||||||
return Void();
|
return Void();
|
||||||
|
@ -1423,19 +1435,59 @@ ACTOR Future<Void> tLogPeekMessages(TLogData* self, TLogPeekRequest req, Referen
|
||||||
trackerData.duplicatePeeks++;
|
trackerData.duplicatePeeks++;
|
||||||
if (sequenceData.getFuture().get().first != reply.end) {
|
if (sequenceData.getFuture().get().first != reply.end) {
|
||||||
TEST(true); // tlog peek second attempt ended at a different version (2)
|
TEST(true); // tlog peek second attempt ended at a different version (2)
|
||||||
req.reply.sendError(operation_obsolete());
|
replyPromise.sendError(operation_obsolete());
|
||||||
return Void();
|
return Void();
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
sequenceData.send(std::make_pair(reply.end, reply.onlySpilled));
|
sequenceData.send(std::make_pair(reply.end, reply.onlySpilled));
|
||||||
}
|
}
|
||||||
reply.begin = req.begin;
|
reply.begin = reqBegin;
|
||||||
}
|
}
|
||||||
|
|
||||||
req.reply.send(reply);
|
replyPromise.send(reply);
|
||||||
return Void();
|
return Void();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// This actor keep pushing TLogPeekStreamReply until it's removed from the cluster or should recover
|
||||||
|
ACTOR Future<Void> tLogPeekStream(TLogData* self, TLogPeekStreamRequest req, Reference<LogData> logData) {
|
||||||
|
self->activePeekStreams++;
|
||||||
|
|
||||||
|
state Version begin = req.begin;
|
||||||
|
state bool onlySpilled = false;
|
||||||
|
req.reply.setByteLimit(std::min(SERVER_KNOBS->MAXIMUM_PEEK_BYTES, req.limitBytes));
|
||||||
|
loop {
|
||||||
|
state TLogPeekStreamReply reply;
|
||||||
|
state Promise<TLogPeekReply> promise;
|
||||||
|
state Future<TLogPeekReply> future(promise.getFuture());
|
||||||
|
try {
|
||||||
|
wait(req.reply.onReady() && store(reply.rep, future) &&
|
||||||
|
tLogPeekMessages(promise, self, logData, begin, req.tag, req.returnIfBlocked, onlySpilled));
|
||||||
|
|
||||||
|
reply.rep.begin = begin;
|
||||||
|
req.reply.send(reply);
|
||||||
|
begin = reply.rep.end;
|
||||||
|
onlySpilled = reply.rep.onlySpilled;
|
||||||
|
if (reply.rep.end > logData->version.get()) {
|
||||||
|
wait(delay(SERVER_KNOBS->TLOG_PEEK_DELAY, g_network->getCurrentTask()));
|
||||||
|
} else {
|
||||||
|
wait(delay(0, g_network->getCurrentTask()));
|
||||||
|
}
|
||||||
|
} catch (Error& e) {
|
||||||
|
self->activePeekStreams--;
|
||||||
|
TraceEvent(SevDebug, "TLogPeekStreamEnd", logData->logId)
|
||||||
|
.detail("PeerAddr", req.reply.getEndpoint().getPrimaryAddress())
|
||||||
|
.error(e, true);
|
||||||
|
|
||||||
|
if (e.code() == error_code_end_of_stream || e.code() == error_code_operation_obsolete) {
|
||||||
|
req.reply.sendError(e);
|
||||||
|
return Void();
|
||||||
|
} else {
|
||||||
|
throw;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
ACTOR Future<Void> doQueueCommit(TLogData* self,
|
ACTOR Future<Void> doQueueCommit(TLogData* self,
|
||||||
Reference<LogData> logData,
|
Reference<LogData> logData,
|
||||||
std::vector<Reference<LogData>> missingFinalCommit) {
|
std::vector<Reference<LogData>> missingFinalCommit) {
|
||||||
|
@ -1930,7 +1982,13 @@ ACTOR Future<Void> serveTLogInterface(TLogData* self,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
when(TLogPeekRequest req = waitNext(tli.peekMessages.getFuture())) {
|
when(TLogPeekRequest req = waitNext(tli.peekMessages.getFuture())) {
|
||||||
logData->addActor.send(tLogPeekMessages(self, req, logData));
|
logData->addActor.send(tLogPeekMessages(
|
||||||
|
req.reply, self, logData, req.begin, req.tag, req.returnIfBlocked, req.onlySpilled, req.sequence));
|
||||||
|
}
|
||||||
|
when(TLogPeekStreamRequest req = waitNext(tli.peekStreamMessages.getFuture())) {
|
||||||
|
TraceEvent(SevDebug, "TLogPeekStream", logData->logId)
|
||||||
|
.detail("Token", tli.peekStreamMessages.getEndpoint().token);
|
||||||
|
logData->addActor.send(tLogPeekStream(self, req, logData));
|
||||||
}
|
}
|
||||||
when(TLogPopRequest req = waitNext(tli.popMessages.getFuture())) {
|
when(TLogPopRequest req = waitNext(tli.popMessages.getFuture())) {
|
||||||
logData->addActor.send(tLogPop(self, req, logData));
|
logData->addActor.send(tLogPop(self, req, logData));
|
||||||
|
@ -2327,6 +2385,7 @@ ACTOR Future<Void> restorePersistentState(TLogData* self,
|
||||||
recruited.initEndpoints();
|
recruited.initEndpoints();
|
||||||
|
|
||||||
DUMPTOKEN(recruited.peekMessages);
|
DUMPTOKEN(recruited.peekMessages);
|
||||||
|
DUMPTOKEN(recruited.peekStreamMessages);
|
||||||
DUMPTOKEN(recruited.popMessages);
|
DUMPTOKEN(recruited.popMessages);
|
||||||
DUMPTOKEN(recruited.commit);
|
DUMPTOKEN(recruited.commit);
|
||||||
DUMPTOKEN(recruited.lock);
|
DUMPTOKEN(recruited.lock);
|
||||||
|
@ -2537,6 +2596,7 @@ ACTOR Future<Void> tLogStart(TLogData* self, InitializeTLogRequest req, Locality
|
||||||
recruited.initEndpoints();
|
recruited.initEndpoints();
|
||||||
|
|
||||||
DUMPTOKEN(recruited.peekMessages);
|
DUMPTOKEN(recruited.peekMessages);
|
||||||
|
DUMPTOKEN(recruited.peekStreamMessages);
|
||||||
DUMPTOKEN(recruited.popMessages);
|
DUMPTOKEN(recruited.popMessages);
|
||||||
DUMPTOKEN(recruited.commit);
|
DUMPTOKEN(recruited.commit);
|
||||||
DUMPTOKEN(recruited.lock);
|
DUMPTOKEN(recruited.lock);
|
||||||
|
@ -2729,7 +2789,8 @@ ACTOR Future<Void> tLog(IKeyValueStore* persistentData,
|
||||||
state TLogData self(tlogId, workerID, persistentData, persistentQueue, db, degraded, folder);
|
state TLogData self(tlogId, workerID, persistentData, persistentQueue, db, degraded, folder);
|
||||||
state Future<Void> error = actorCollection(self.sharedActors.getFuture());
|
state Future<Void> error = actorCollection(self.sharedActors.getFuture());
|
||||||
|
|
||||||
TraceEvent("SharedTlog", tlogId).log();
|
TraceEvent("SharedTlog", tlogId).detail("Version", "6.0");
|
||||||
|
|
||||||
try {
|
try {
|
||||||
if (restoreFromDisk) {
|
if (restoreFromDisk) {
|
||||||
wait(restorePersistentState(&self, locality, oldLog, recovered, tlogRequests));
|
wait(restorePersistentState(&self, locality, oldLog, recovered, tlogRequests));
|
||||||
|
|
|
@ -339,6 +339,7 @@ struct TLogData : NonCopyable {
|
||||||
int64_t targetVolatileBytes; // The number of bytes of mutations this TLog should hold in memory before spilling.
|
int64_t targetVolatileBytes; // The number of bytes of mutations this TLog should hold in memory before spilling.
|
||||||
int64_t overheadBytesInput;
|
int64_t overheadBytesInput;
|
||||||
int64_t overheadBytesDurable;
|
int64_t overheadBytesDurable;
|
||||||
|
int activePeekStreams = 0;
|
||||||
|
|
||||||
WorkerCache<TLogInterface> tlogCache;
|
WorkerCache<TLogInterface> tlogCache;
|
||||||
FlowLock peekMemoryLimiter;
|
FlowLock peekMemoryLimiter;
|
||||||
|
@ -661,6 +662,7 @@ struct LogData : NonCopyable, public ReferenceCounted<LogData> {
|
||||||
cc, "QueueDiskBytesTotal", [tLogData]() { return tLogData->rawPersistentQueue->getStorageBytes().total; });
|
cc, "QueueDiskBytesTotal", [tLogData]() { return tLogData->rawPersistentQueue->getStorageBytes().total; });
|
||||||
specialCounter(cc, "PeekMemoryReserved", [tLogData]() { return tLogData->peekMemoryLimiter.activePermits(); });
|
specialCounter(cc, "PeekMemoryReserved", [tLogData]() { return tLogData->peekMemoryLimiter.activePermits(); });
|
||||||
specialCounter(cc, "PeekMemoryRequestsStalled", [tLogData]() { return tLogData->peekMemoryLimiter.waiters(); });
|
specialCounter(cc, "PeekMemoryRequestsStalled", [tLogData]() { return tLogData->peekMemoryLimiter.waiters(); });
|
||||||
|
specialCounter(cc, "ActivePeekStreams", [tLogData]() { return tLogData->activePeekStreams; });
|
||||||
}
|
}
|
||||||
|
|
||||||
~LogData() {
|
~LogData() {
|
||||||
|
@ -1440,17 +1442,19 @@ ACTOR Future<Void> tLogPopCore(TLogData* self, Tag inputTag, Version to, Referen
|
||||||
}
|
}
|
||||||
|
|
||||||
uint64_t PoppedVersionLag = logData->persistentDataDurableVersion - logData->queuePoppedVersion;
|
uint64_t PoppedVersionLag = logData->persistentDataDurableVersion - logData->queuePoppedVersion;
|
||||||
if ( SERVER_KNOBS->ENABLE_DETAILED_TLOG_POP_TRACE &&
|
if (SERVER_KNOBS->ENABLE_DETAILED_TLOG_POP_TRACE &&
|
||||||
(logData->queuePoppedVersion > 0) && //avoid generating massive events at beginning
|
(logData->queuePoppedVersion > 0) && // avoid generating massive events at beginning
|
||||||
(tagData->unpoppedRecovered || PoppedVersionLag >= SERVER_KNOBS->TLOG_POPPED_VER_LAG_THRESHOLD_FOR_TLOGPOP_TRACE)) { //when recovery or long lag
|
(tagData->unpoppedRecovered ||
|
||||||
|
PoppedVersionLag >=
|
||||||
|
SERVER_KNOBS->TLOG_POPPED_VER_LAG_THRESHOLD_FOR_TLOGPOP_TRACE)) { // when recovery or long lag
|
||||||
TraceEvent("TLogPopDetails", logData->logId)
|
TraceEvent("TLogPopDetails", logData->logId)
|
||||||
.detail("Tag", tagData->tag.toString())
|
.detail("Tag", tagData->tag.toString())
|
||||||
.detail("UpTo", upTo)
|
.detail("UpTo", upTo)
|
||||||
.detail("PoppedVersionLag", PoppedVersionLag)
|
.detail("PoppedVersionLag", PoppedVersionLag)
|
||||||
.detail("MinPoppedTag", logData->minPoppedTag.toString())
|
.detail("MinPoppedTag", logData->minPoppedTag.toString())
|
||||||
.detail("QueuePoppedVersion", logData->queuePoppedVersion)
|
.detail("QueuePoppedVersion", logData->queuePoppedVersion)
|
||||||
.detail("UnpoppedRecovered", tagData->unpoppedRecovered ? "True" : "False")
|
.detail("UnpoppedRecovered", tagData->unpoppedRecovered ? "True" : "False")
|
||||||
.detail("NothingPersistent", tagData->nothingPersistent ? "True" : "False");
|
.detail("NothingPersistent", tagData->nothingPersistent ? "True" : "False");
|
||||||
}
|
}
|
||||||
if (upTo > logData->persistentDataDurableVersion)
|
if (upTo > logData->persistentDataDurableVersion)
|
||||||
wait(tagData->eraseMessagesBefore(upTo, self, logData, TaskPriority::TLogPop));
|
wait(tagData->eraseMessagesBefore(upTo, self, logData, TaskPriority::TLogPop));
|
||||||
|
@ -1487,15 +1491,16 @@ ACTOR Future<Void> tLogPop(TLogData* self, TLogPopRequest req, Reference<LogData
|
||||||
}
|
}
|
||||||
|
|
||||||
void peekMessagesFromMemory(Reference<LogData> self,
|
void peekMessagesFromMemory(Reference<LogData> self,
|
||||||
TLogPeekRequest const& req,
|
Tag tag,
|
||||||
|
Version begin,
|
||||||
BinaryWriter& messages,
|
BinaryWriter& messages,
|
||||||
Version& endVersion) {
|
Version& endVersion) {
|
||||||
ASSERT(!messages.getLength());
|
ASSERT(!messages.getLength());
|
||||||
|
|
||||||
auto& deque = getVersionMessages(self, req.tag);
|
auto& deque = getVersionMessages(self, tag);
|
||||||
//TraceEvent("TLogPeekMem", self->dbgid).detail("Tag", req.tag1).detail("PDS", self->persistentDataSequence).detail("PDDS", self->persistentDataDurableSequence).detail("Oldest", map1.empty() ? 0 : map1.begin()->key ).detail("OldestMsgCount", map1.empty() ? 0 : map1.begin()->value.size());
|
//TraceEvent("TLogPeekMem", self->dbgid).detail("Tag", req.tag1).detail("PDS", self->persistentDataSequence).detail("PDDS", self->persistentDataDurableSequence).detail("Oldest", map1.empty() ? 0 : map1.begin()->key ).detail("OldestMsgCount", map1.empty() ? 0 : map1.begin()->value.size());
|
||||||
|
|
||||||
Version begin = std::max(req.begin, self->persistentDataDurableVersion + 1);
|
begin = std::max(begin, self->persistentDataDurableVersion + 1);
|
||||||
auto it = std::lower_bound(deque.begin(),
|
auto it = std::lower_bound(deque.begin(),
|
||||||
deque.end(),
|
deque.end(),
|
||||||
std::make_pair(begin, LengthPrefixedStringRef()),
|
std::make_pair(begin, LengthPrefixedStringRef()),
|
||||||
|
@ -1540,29 +1545,38 @@ ACTOR Future<std::vector<StringRef>> parseMessagesForTag(StringRef commitBlob, T
|
||||||
return relevantMessages;
|
return relevantMessages;
|
||||||
}
|
}
|
||||||
|
|
||||||
ACTOR Future<Void> tLogPeekMessages(TLogData* self, TLogPeekRequest req, Reference<LogData> logData) {
|
// Common logics to peek TLog and create TLogPeekReply that serves both streaming peek or normal peek request
|
||||||
|
ACTOR template <typename PromiseType>
|
||||||
|
Future<Void> tLogPeekMessages(PromiseType replyPromise,
|
||||||
|
TLogData* self,
|
||||||
|
Reference<LogData> logData,
|
||||||
|
Version reqBegin,
|
||||||
|
Tag reqTag,
|
||||||
|
bool reqReturnIfBlocked = false,
|
||||||
|
bool reqOnlySpilled = false,
|
||||||
|
Optional<std::pair<UID, int>> reqSequence = Optional<std::pair<UID, int>>()) {
|
||||||
state BinaryWriter messages(Unversioned());
|
state BinaryWriter messages(Unversioned());
|
||||||
state BinaryWriter messages2(Unversioned());
|
state BinaryWriter messages2(Unversioned());
|
||||||
state int sequence = -1;
|
state int sequence = -1;
|
||||||
state UID peekId;
|
state UID peekId;
|
||||||
state double queueStart = now();
|
state double queueStart = now();
|
||||||
|
|
||||||
if (req.tag.locality == tagLocalityTxs && req.tag.id >= logData->txsTags && logData->txsTags > 0) {
|
if (reqTag.locality == tagLocalityTxs && reqTag.id >= logData->txsTags && logData->txsTags > 0) {
|
||||||
req.tag.id = req.tag.id % logData->txsTags;
|
reqTag.id = reqTag.id % logData->txsTags;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (req.sequence.present()) {
|
if (reqSequence.present()) {
|
||||||
try {
|
try {
|
||||||
peekId = req.sequence.get().first;
|
peekId = reqSequence.get().first;
|
||||||
sequence = req.sequence.get().second;
|
sequence = reqSequence.get().second;
|
||||||
if (sequence >= SERVER_KNOBS->PARALLEL_GET_MORE_REQUESTS &&
|
if (sequence >= SERVER_KNOBS->PARALLEL_GET_MORE_REQUESTS &&
|
||||||
logData->peekTracker.find(peekId) == logData->peekTracker.end()) {
|
logData->peekTracker.find(peekId) == logData->peekTracker.end()) {
|
||||||
throw operation_obsolete();
|
throw operation_obsolete();
|
||||||
}
|
}
|
||||||
auto& trackerData = logData->peekTracker[peekId];
|
auto& trackerData = logData->peekTracker[peekId];
|
||||||
if (sequence == 0 && trackerData.sequence_version.find(0) == trackerData.sequence_version.end()) {
|
if (sequence == 0 && trackerData.sequence_version.find(0) == trackerData.sequence_version.end()) {
|
||||||
trackerData.tag = req.tag;
|
trackerData.tag = reqTag;
|
||||||
trackerData.sequence_version[0].send(std::make_pair(req.begin, req.onlySpilled));
|
trackerData.sequence_version[0].send(std::make_pair(reqBegin, reqOnlySpilled));
|
||||||
}
|
}
|
||||||
auto seqBegin = trackerData.sequence_version.begin();
|
auto seqBegin = trackerData.sequence_version.begin();
|
||||||
// The peek cursor and this comparison need to agree about the maximum number of in-flight requests.
|
// The peek cursor and this comparison need to agree about the maximum number of in-flight requests.
|
||||||
|
@ -1589,12 +1603,12 @@ ACTOR Future<Void> tLogPeekMessages(TLogData* self, TLogPeekRequest req, Referen
|
||||||
}
|
}
|
||||||
trackerData.lastUpdate = now();
|
trackerData.lastUpdate = now();
|
||||||
std::pair<Version, bool> prevPeekData = wait(fPrevPeekData);
|
std::pair<Version, bool> prevPeekData = wait(fPrevPeekData);
|
||||||
req.begin = std::max(prevPeekData.first, req.begin);
|
reqBegin = std::max(prevPeekData.first, reqBegin);
|
||||||
req.onlySpilled = prevPeekData.second;
|
reqOnlySpilled = prevPeekData.second;
|
||||||
wait(yield());
|
wait(yield());
|
||||||
} catch (Error& e) {
|
} catch (Error& e) {
|
||||||
if (e.code() == error_code_timed_out || e.code() == error_code_operation_obsolete) {
|
if (e.code() == error_code_timed_out || e.code() == error_code_operation_obsolete) {
|
||||||
req.reply.sendError(e);
|
replyPromise.sendError(e);
|
||||||
return Void();
|
return Void();
|
||||||
} else {
|
} else {
|
||||||
throw;
|
throw;
|
||||||
|
@ -1604,32 +1618,32 @@ ACTOR Future<Void> tLogPeekMessages(TLogData* self, TLogPeekRequest req, Referen
|
||||||
|
|
||||||
state double blockStart = now();
|
state double blockStart = now();
|
||||||
|
|
||||||
if (req.returnIfBlocked && logData->version.get() < req.begin) {
|
if (reqReturnIfBlocked && logData->version.get() < reqBegin) {
|
||||||
req.reply.sendError(end_of_stream());
|
replyPromise.sendError(end_of_stream());
|
||||||
if (req.sequence.present()) {
|
if (reqSequence.present()) {
|
||||||
auto& trackerData = logData->peekTracker[peekId];
|
auto& trackerData = logData->peekTracker[peekId];
|
||||||
auto& sequenceData = trackerData.sequence_version[sequence + 1];
|
auto& sequenceData = trackerData.sequence_version[sequence + 1];
|
||||||
if (!sequenceData.isSet()) {
|
if (!sequenceData.isSet()) {
|
||||||
sequenceData.send(std::make_pair(req.begin, req.onlySpilled));
|
sequenceData.send(std::make_pair(reqBegin, reqOnlySpilled));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return Void();
|
return Void();
|
||||||
}
|
}
|
||||||
|
|
||||||
//TraceEvent("TLogPeekMessages0", self->dbgid).detail("ReqBeginEpoch", req.begin.epoch).detail("ReqBeginSeq", req.begin.sequence).detail("Epoch", self->epoch()).detail("PersistentDataSeq", self->persistentDataSequence).detail("Tag1", req.tag1).detail("Tag2", req.tag2);
|
//TraceEvent("TLogPeekMessages0", self->dbgid).detail("ReqBeginEpoch", reqBegin.epoch).detail("ReqBeginSeq", reqBegin.sequence).detail("Epoch", self->epoch()).detail("PersistentDataSeq", self->persistentDataSequence).detail("Tag1", reqTag1).detail("Tag2", reqTag2);
|
||||||
// Wait until we have something to return that the caller doesn't already have
|
// Wait until we have something to return that the caller doesn't already have
|
||||||
if (logData->version.get() < req.begin) {
|
if (logData->version.get() < reqBegin) {
|
||||||
wait(logData->version.whenAtLeast(req.begin));
|
wait(logData->version.whenAtLeast(reqBegin));
|
||||||
wait(delay(SERVER_KNOBS->TLOG_PEEK_DELAY, g_network->getCurrentTask()));
|
wait(delay(SERVER_KNOBS->TLOG_PEEK_DELAY, g_network->getCurrentTask()));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (req.tag.locality == tagLocalityLogRouter) {
|
if (reqTag.locality == tagLocalityLogRouter) {
|
||||||
wait(self->concurrentLogRouterReads.take());
|
wait(self->concurrentLogRouterReads.take());
|
||||||
state FlowLock::Releaser globalReleaser(self->concurrentLogRouterReads);
|
state FlowLock::Releaser globalReleaser(self->concurrentLogRouterReads);
|
||||||
wait(delay(0.0, TaskPriority::Low));
|
wait(delay(0.0, TaskPriority::Low));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (req.begin <= logData->persistentDataDurableVersion && req.tag.locality != tagLocalityTxs && req.tag != txsTag) {
|
if (reqBegin <= logData->persistentDataDurableVersion && reqTag.locality != tagLocalityTxs && reqTag != txsTag) {
|
||||||
// Reading spilled data will almost always imply that the storage server is >5s behind the rest
|
// Reading spilled data will almost always imply that the storage server is >5s behind the rest
|
||||||
// of the cluster. We shouldn't prioritize spending CPU on helping this server catch up
|
// of the cluster. We shouldn't prioritize spending CPU on helping this server catch up
|
||||||
// slightly faster over keeping the rest of the cluster operating normally.
|
// slightly faster over keeping the rest of the cluster operating normally.
|
||||||
|
@ -1640,8 +1654,8 @@ ACTOR Future<Void> tLogPeekMessages(TLogData* self, TLogPeekRequest req, Referen
|
||||||
|
|
||||||
state double workStart = now();
|
state double workStart = now();
|
||||||
|
|
||||||
Version poppedVer = poppedVersion(logData, req.tag);
|
Version poppedVer = poppedVersion(logData, reqTag);
|
||||||
if (poppedVer > req.begin) {
|
if (poppedVer > reqBegin) {
|
||||||
TLogPeekReply rep;
|
TLogPeekReply rep;
|
||||||
rep.maxKnownVersion = logData->version.get();
|
rep.maxKnownVersion = logData->version.get();
|
||||||
rep.minKnownCommittedVersion = logData->minKnownCommittedVersion;
|
rep.minKnownCommittedVersion = logData->minKnownCommittedVersion;
|
||||||
|
@ -1649,12 +1663,12 @@ ACTOR Future<Void> tLogPeekMessages(TLogData* self, TLogPeekRequest req, Referen
|
||||||
rep.end = poppedVer;
|
rep.end = poppedVer;
|
||||||
rep.onlySpilled = false;
|
rep.onlySpilled = false;
|
||||||
|
|
||||||
if (req.sequence.present()) {
|
if (reqSequence.present()) {
|
||||||
auto& trackerData = logData->peekTracker[peekId];
|
auto& trackerData = logData->peekTracker[peekId];
|
||||||
auto& sequenceData = trackerData.sequence_version[sequence + 1];
|
auto& sequenceData = trackerData.sequence_version[sequence + 1];
|
||||||
trackerData.lastUpdate = now();
|
trackerData.lastUpdate = now();
|
||||||
if (trackerData.sequence_version.size() && sequence + 1 < trackerData.sequence_version.begin()->first) {
|
if (trackerData.sequence_version.size() && sequence + 1 < trackerData.sequence_version.begin()->first) {
|
||||||
req.reply.sendError(operation_obsolete());
|
replyPromise.sendError(operation_obsolete());
|
||||||
if (!sequenceData.isSet())
|
if (!sequenceData.isSet())
|
||||||
sequenceData.sendError(operation_obsolete());
|
sequenceData.sendError(operation_obsolete());
|
||||||
return Void();
|
return Void();
|
||||||
|
@ -1662,16 +1676,16 @@ ACTOR Future<Void> tLogPeekMessages(TLogData* self, TLogPeekRequest req, Referen
|
||||||
if (sequenceData.isSet()) {
|
if (sequenceData.isSet()) {
|
||||||
if (sequenceData.getFuture().get().first != rep.end) {
|
if (sequenceData.getFuture().get().first != rep.end) {
|
||||||
TEST(true); // tlog peek second attempt ended at a different version
|
TEST(true); // tlog peek second attempt ended at a different version
|
||||||
req.reply.sendError(operation_obsolete());
|
replyPromise.sendError(operation_obsolete());
|
||||||
return Void();
|
return Void();
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
sequenceData.send(std::make_pair(rep.end, rep.onlySpilled));
|
sequenceData.send(std::make_pair(rep.end, rep.onlySpilled));
|
||||||
}
|
}
|
||||||
rep.begin = req.begin;
|
rep.begin = reqBegin;
|
||||||
}
|
}
|
||||||
|
|
||||||
req.reply.send(rep);
|
replyPromise.send(rep);
|
||||||
return Void();
|
return Void();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1679,24 +1693,24 @@ ACTOR Future<Void> tLogPeekMessages(TLogData* self, TLogPeekRequest req, Referen
|
||||||
state bool onlySpilled = false;
|
state bool onlySpilled = false;
|
||||||
|
|
||||||
// grab messages from disk
|
// grab messages from disk
|
||||||
//TraceEvent("TLogPeekMessages", self->dbgid).detail("ReqBeginEpoch", req.begin.epoch).detail("ReqBeginSeq", req.begin.sequence).detail("Epoch", self->epoch()).detail("PersistentDataSeq", self->persistentDataSequence).detail("Tag1", req.tag1).detail("Tag2", req.tag2);
|
//TraceEvent("TLogPeekMessages", self->dbgid).detail("ReqBeginEpoch", reqBegin.epoch).detail("ReqBeginSeq", reqBegin.sequence).detail("Epoch", self->epoch()).detail("PersistentDataSeq", self->persistentDataSequence).detail("Tag1", reqTag1).detail("Tag2", reqTag2);
|
||||||
if (req.begin <= logData->persistentDataDurableVersion) {
|
if (reqBegin <= logData->persistentDataDurableVersion) {
|
||||||
// Just in case the durable version changes while we are waiting for the read, we grab this data from memory. We
|
// Just in case the durable version changes while we are waiting for the read, we grab this data from memory. We
|
||||||
// may or may not actually send it depending on whether we get enough data from disk. SOMEDAY: Only do this if
|
// may or may not actually send it depending on whether we get enough data from disk. SOMEDAY: Only do this if
|
||||||
// an initial attempt to read from disk results in insufficient data and the required data is no longer in
|
// an initial attempt to read from disk results in insufficient data and the required data is no longer in
|
||||||
// memory SOMEDAY: Should we only send part of the messages we collected, to actually limit the size of the
|
// memory SOMEDAY: Should we only send part of the messages we collected, to actually limit the size of the
|
||||||
// result?
|
// result?
|
||||||
|
|
||||||
if (req.onlySpilled) {
|
if (reqOnlySpilled) {
|
||||||
endVersion = logData->persistentDataDurableVersion + 1;
|
endVersion = logData->persistentDataDurableVersion + 1;
|
||||||
} else {
|
} else {
|
||||||
peekMessagesFromMemory(logData, req, messages2, endVersion);
|
peekMessagesFromMemory(logData, reqTag, reqBegin, messages2, endVersion);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (req.tag.locality == tagLocalityTxs || req.tag == txsTag) {
|
if (reqTag.locality == tagLocalityTxs || reqTag == txsTag) {
|
||||||
RangeResult kvs = wait(self->persistentData->readRange(
|
RangeResult kvs = wait(self->persistentData->readRange(
|
||||||
KeyRangeRef(persistTagMessagesKey(logData->logId, req.tag, req.begin),
|
KeyRangeRef(persistTagMessagesKey(logData->logId, reqTag, reqBegin),
|
||||||
persistTagMessagesKey(logData->logId, req.tag, logData->persistentDataDurableVersion + 1)),
|
persistTagMessagesKey(logData->logId, reqTag, logData->persistentDataDurableVersion + 1)),
|
||||||
SERVER_KNOBS->DESIRED_TOTAL_BYTES,
|
SERVER_KNOBS->DESIRED_TOTAL_BYTES,
|
||||||
SERVER_KNOBS->DESIRED_TOTAL_BYTES));
|
SERVER_KNOBS->DESIRED_TOTAL_BYTES));
|
||||||
|
|
||||||
|
@ -1716,11 +1730,11 @@ ACTOR Future<Void> tLogPeekMessages(TLogData* self, TLogPeekRequest req, Referen
|
||||||
// FIXME: Limit to approximately DESIRED_TOTATL_BYTES somehow.
|
// FIXME: Limit to approximately DESIRED_TOTATL_BYTES somehow.
|
||||||
RangeResult kvrefs = wait(self->persistentData->readRange(
|
RangeResult kvrefs = wait(self->persistentData->readRange(
|
||||||
KeyRangeRef(
|
KeyRangeRef(
|
||||||
persistTagMessageRefsKey(logData->logId, req.tag, req.begin),
|
persistTagMessageRefsKey(logData->logId, reqTag, reqBegin),
|
||||||
persistTagMessageRefsKey(logData->logId, req.tag, logData->persistentDataDurableVersion + 1)),
|
persistTagMessageRefsKey(logData->logId, reqTag, logData->persistentDataDurableVersion + 1)),
|
||||||
SERVER_KNOBS->TLOG_SPILL_REFERENCE_MAX_BATCHES_PER_PEEK + 1));
|
SERVER_KNOBS->TLOG_SPILL_REFERENCE_MAX_BATCHES_PER_PEEK + 1));
|
||||||
|
|
||||||
//TraceEvent("TLogPeekResults", self->dbgid).detail("ForAddress", req.reply.getEndpoint().getPrimaryAddress()).detail("Tag1Results", s1).detail("Tag2Results", s2).detail("Tag1ResultsLim", kv1.size()).detail("Tag2ResultsLim", kv2.size()).detail("Tag1ResultsLast", kv1.size() ? kv1[0].key : "").detail("Tag2ResultsLast", kv2.size() ? kv2[0].key : "").detail("Limited", limited).detail("NextEpoch", next_pos.epoch).detail("NextSeq", next_pos.sequence).detail("NowEpoch", self->epoch()).detail("NowSeq", self->sequence.getNextSequence());
|
//TraceEvent("TLogPeekResults", self->dbgid).detail("ForAddress", replyPromise.getEndpoint().getPrimaryAddress()).detail("Tag1Results", s1).detail("Tag2Results", s2).detail("Tag1ResultsLim", kv1.size()).detail("Tag2ResultsLim", kv2.size()).detail("Tag1ResultsLast", kv1.size() ? kv1[0].key : "").detail("Tag2ResultsLast", kv2.size() ? kv2[0].key : "").detail("Limited", limited).detail("NextEpoch", next_pos.epoch).detail("NextSeq", next_pos.sequence).detail("NowEpoch", self->epoch()).detail("NowSeq", self->sequence.getNextSequence());
|
||||||
|
|
||||||
state std::vector<std::pair<IDiskQueue::location, IDiskQueue::location>> commitLocations;
|
state std::vector<std::pair<IDiskQueue::location, IDiskQueue::location>> commitLocations;
|
||||||
state bool earlyEnd = false;
|
state bool earlyEnd = false;
|
||||||
|
@ -1737,7 +1751,7 @@ ACTOR Future<Void> tLogPeekMessages(TLogData* self, TLogPeekRequest req, Referen
|
||||||
earlyEnd = true;
|
earlyEnd = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (sd.version >= req.begin) {
|
if (sd.version >= reqBegin) {
|
||||||
firstVersion = std::min(firstVersion, sd.version);
|
firstVersion = std::min(firstVersion, sd.version);
|
||||||
const IDiskQueue::location end = sd.start.lo + sd.length;
|
const IDiskQueue::location end = sd.start.lo + sd.length;
|
||||||
commitLocations.emplace_back(sd.start, end);
|
commitLocations.emplace_back(sd.start, end);
|
||||||
|
@ -1779,7 +1793,7 @@ ACTOR Future<Void> tLogPeekMessages(TLogData* self, TLogPeekRequest req, Referen
|
||||||
messages << VERSION_HEADER << entry.version;
|
messages << VERSION_HEADER << entry.version;
|
||||||
|
|
||||||
std::vector<StringRef> rawMessages =
|
std::vector<StringRef> rawMessages =
|
||||||
wait(parseMessagesForTag(entry.messages, req.tag, logData->logRouterTags));
|
wait(parseMessagesForTag(entry.messages, reqTag, logData->logRouterTags));
|
||||||
for (const StringRef& msg : rawMessages) {
|
for (const StringRef& msg : rawMessages) {
|
||||||
messages.serializeBytes(msg);
|
messages.serializeBytes(msg);
|
||||||
}
|
}
|
||||||
|
@ -1799,25 +1813,25 @@ ACTOR Future<Void> tLogPeekMessages(TLogData* self, TLogPeekRequest req, Referen
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (req.onlySpilled) {
|
if (reqOnlySpilled) {
|
||||||
endVersion = logData->persistentDataDurableVersion + 1;
|
endVersion = logData->persistentDataDurableVersion + 1;
|
||||||
} else {
|
} else {
|
||||||
peekMessagesFromMemory(logData, req, messages, endVersion);
|
peekMessagesFromMemory(logData, reqTag, reqBegin, messages, endVersion);
|
||||||
}
|
}
|
||||||
|
|
||||||
//TraceEvent("TLogPeekResults", self->dbgid).detail("ForAddress", req.reply.getEndpoint().getPrimaryAddress()).detail("MessageBytes", messages.getLength()).detail("NextEpoch", next_pos.epoch).detail("NextSeq", next_pos.sequence).detail("NowSeq", self->sequence.getNextSequence());
|
//TraceEvent("TLogPeekResults", self->dbgid).detail("ForAddress", replyPromise.getEndpoint().getPrimaryAddress()).detail("MessageBytes", messages.getLength()).detail("NextEpoch", next_pos.epoch).detail("NextSeq", next_pos.sequence).detail("NowSeq", self->sequence.getNextSequence());
|
||||||
}
|
}
|
||||||
|
|
||||||
TLogPeekReply reply;
|
TLogPeekReply reply;
|
||||||
reply.maxKnownVersion = logData->version.get();
|
reply.maxKnownVersion = logData->version.get();
|
||||||
reply.minKnownCommittedVersion = logData->minKnownCommittedVersion;
|
reply.minKnownCommittedVersion = logData->minKnownCommittedVersion;
|
||||||
reply.messages = messages.toValue();
|
reply.messages = StringRef(reply.arena, messages.toValue());
|
||||||
reply.end = endVersion;
|
reply.end = endVersion;
|
||||||
reply.onlySpilled = onlySpilled;
|
reply.onlySpilled = onlySpilled;
|
||||||
|
|
||||||
//TraceEvent("TlogPeek", self->dbgid).detail("LogId", logData->logId).detail("EndVer", reply.end).detail("MsgBytes", reply.messages.expectedSize()).detail("ForAddress", req.reply.getEndpoint().getPrimaryAddress());
|
//TraceEvent("TlogPeek", self->dbgid).detail("LogId", logData->logId).detail("EndVer", reply.end).detail("MsgBytes", reply.messages.expectedSize()).detail("ForAddress", replyPromise.getEndpoint().getPrimaryAddress());
|
||||||
|
|
||||||
if (req.sequence.present()) {
|
if (reqSequence.present()) {
|
||||||
auto& trackerData = logData->peekTracker[peekId];
|
auto& trackerData = logData->peekTracker[peekId];
|
||||||
trackerData.lastUpdate = now();
|
trackerData.lastUpdate = now();
|
||||||
|
|
||||||
|
@ -1841,7 +1855,7 @@ ACTOR Future<Void> tLogPeekMessages(TLogData* self, TLogPeekRequest req, Referen
|
||||||
|
|
||||||
auto& sequenceData = trackerData.sequence_version[sequence + 1];
|
auto& sequenceData = trackerData.sequence_version[sequence + 1];
|
||||||
if (trackerData.sequence_version.size() && sequence + 1 < trackerData.sequence_version.begin()->first) {
|
if (trackerData.sequence_version.size() && sequence + 1 < trackerData.sequence_version.begin()->first) {
|
||||||
req.reply.sendError(operation_obsolete());
|
replyPromise.sendError(operation_obsolete());
|
||||||
if (!sequenceData.isSet())
|
if (!sequenceData.isSet())
|
||||||
sequenceData.sendError(operation_obsolete());
|
sequenceData.sendError(operation_obsolete());
|
||||||
return Void();
|
return Void();
|
||||||
|
@ -1850,19 +1864,59 @@ ACTOR Future<Void> tLogPeekMessages(TLogData* self, TLogPeekRequest req, Referen
|
||||||
trackerData.duplicatePeeks++;
|
trackerData.duplicatePeeks++;
|
||||||
if (sequenceData.getFuture().get().first != reply.end) {
|
if (sequenceData.getFuture().get().first != reply.end) {
|
||||||
TEST(true); // tlog peek second attempt ended at a different version (2)
|
TEST(true); // tlog peek second attempt ended at a different version (2)
|
||||||
req.reply.sendError(operation_obsolete());
|
replyPromise.sendError(operation_obsolete());
|
||||||
return Void();
|
return Void();
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
sequenceData.send(std::make_pair(reply.end, reply.onlySpilled));
|
sequenceData.send(std::make_pair(reply.end, reply.onlySpilled));
|
||||||
}
|
}
|
||||||
reply.begin = req.begin;
|
reply.begin = reqBegin;
|
||||||
}
|
}
|
||||||
|
|
||||||
req.reply.send(reply);
|
replyPromise.send(reply);
|
||||||
return Void();
|
return Void();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// This actor keep pushing TLogPeekStreamReply until it's removed from the cluster or should recover
|
||||||
|
ACTOR Future<Void> tLogPeekStream(TLogData* self, TLogPeekStreamRequest req, Reference<LogData> logData) {
|
||||||
|
self->activePeekStreams++;
|
||||||
|
|
||||||
|
state Version begin = req.begin;
|
||||||
|
state bool onlySpilled = false;
|
||||||
|
req.reply.setByteLimit(std::min(SERVER_KNOBS->MAXIMUM_PEEK_BYTES, req.limitBytes));
|
||||||
|
loop {
|
||||||
|
state TLogPeekStreamReply reply;
|
||||||
|
state Promise<TLogPeekReply> promise;
|
||||||
|
state Future<TLogPeekReply> future(promise.getFuture());
|
||||||
|
try {
|
||||||
|
wait(req.reply.onReady() && store(reply.rep, future) &&
|
||||||
|
tLogPeekMessages(promise, self, logData, begin, req.tag, req.returnIfBlocked, onlySpilled));
|
||||||
|
|
||||||
|
reply.rep.begin = begin;
|
||||||
|
req.reply.send(reply);
|
||||||
|
begin = reply.rep.end;
|
||||||
|
onlySpilled = reply.rep.onlySpilled;
|
||||||
|
if (reply.rep.end > logData->version.get()) {
|
||||||
|
wait(delay(SERVER_KNOBS->TLOG_PEEK_DELAY, g_network->getCurrentTask()));
|
||||||
|
} else {
|
||||||
|
wait(delay(0, g_network->getCurrentTask()));
|
||||||
|
}
|
||||||
|
} catch (Error& e) {
|
||||||
|
self->activePeekStreams--;
|
||||||
|
TraceEvent(SevDebug, "TLogPeekStreamEnd", logData->logId)
|
||||||
|
.detail("PeerAddr", req.reply.getEndpoint().getPrimaryAddress())
|
||||||
|
.error(e, true);
|
||||||
|
|
||||||
|
if (e.code() == error_code_end_of_stream || e.code() == error_code_operation_obsolete) {
|
||||||
|
req.reply.sendError(e);
|
||||||
|
return Void();
|
||||||
|
} else {
|
||||||
|
throw;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
ACTOR Future<Void> watchDegraded(TLogData* self) {
|
ACTOR Future<Void> watchDegraded(TLogData* self) {
|
||||||
if (g_network->isSimulated() && g_simulator.speedUpSimulation) {
|
if (g_network->isSimulated() && g_simulator.speedUpSimulation) {
|
||||||
return Void();
|
return Void();
|
||||||
|
@ -2373,7 +2427,13 @@ ACTOR Future<Void> serveTLogInterface(TLogData* self,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
when(TLogPeekRequest req = waitNext(tli.peekMessages.getFuture())) {
|
when(TLogPeekRequest req = waitNext(tli.peekMessages.getFuture())) {
|
||||||
logData->addActor.send(tLogPeekMessages(self, req, logData));
|
logData->addActor.send(tLogPeekMessages(
|
||||||
|
req.reply, self, logData, req.begin, req.tag, req.returnIfBlocked, req.onlySpilled, req.sequence));
|
||||||
|
}
|
||||||
|
when(TLogPeekStreamRequest req = waitNext(tli.peekStreamMessages.getFuture())) {
|
||||||
|
TraceEvent(SevDebug, "TLogPeekStream", logData->logId)
|
||||||
|
.detail("Token", tli.peekStreamMessages.getEndpoint().token);
|
||||||
|
logData->addActor.send(tLogPeekStream(self, req, logData));
|
||||||
}
|
}
|
||||||
when(TLogPopRequest req = waitNext(tli.popMessages.getFuture())) {
|
when(TLogPopRequest req = waitNext(tli.popMessages.getFuture())) {
|
||||||
logData->addActor.send(tLogPop(self, req, logData));
|
logData->addActor.send(tLogPop(self, req, logData));
|
||||||
|
@ -2788,6 +2848,7 @@ ACTOR Future<Void> restorePersistentState(TLogData* self,
|
||||||
recruited.initEndpoints();
|
recruited.initEndpoints();
|
||||||
|
|
||||||
DUMPTOKEN(recruited.peekMessages);
|
DUMPTOKEN(recruited.peekMessages);
|
||||||
|
DUMPTOKEN(recruited.peekStreamMessages);
|
||||||
DUMPTOKEN(recruited.popMessages);
|
DUMPTOKEN(recruited.popMessages);
|
||||||
DUMPTOKEN(recruited.commit);
|
DUMPTOKEN(recruited.commit);
|
||||||
DUMPTOKEN(recruited.lock);
|
DUMPTOKEN(recruited.lock);
|
||||||
|
@ -2826,9 +2887,9 @@ ACTOR Future<Void> restorePersistentState(TLogData* self,
|
||||||
logsByVersion.emplace_back(ver, id1);
|
logsByVersion.emplace_back(ver, id1);
|
||||||
|
|
||||||
TraceEvent("TLogPersistentStateRestore", self->dbgid)
|
TraceEvent("TLogPersistentStateRestore", self->dbgid)
|
||||||
.detail("LogId", logData->logId)
|
.detail("LogId", logData->logId)
|
||||||
.detail("Ver", ver)
|
.detail("Ver", ver)
|
||||||
.detail("RecoveryCount", logData->recoveryCount);
|
.detail("RecoveryCount", logData->recoveryCount);
|
||||||
// Restore popped keys. Pop operations that took place after the last (committed) updatePersistentDataVersion
|
// Restore popped keys. Pop operations that took place after the last (committed) updatePersistentDataVersion
|
||||||
// might be lost, but that is fine because we will get the corresponding data back, too.
|
// might be lost, but that is fine because we will get the corresponding data back, too.
|
||||||
tagKeys = prefixRange(rawId.withPrefix(persistTagPoppedKeys.begin));
|
tagKeys = prefixRange(rawId.withPrefix(persistTagPoppedKeys.begin));
|
||||||
|
@ -3019,6 +3080,7 @@ ACTOR Future<Void> tLogStart(TLogData* self, InitializeTLogRequest req, Locality
|
||||||
recruited.initEndpoints();
|
recruited.initEndpoints();
|
||||||
|
|
||||||
DUMPTOKEN(recruited.peekMessages);
|
DUMPTOKEN(recruited.peekMessages);
|
||||||
|
DUMPTOKEN(recruited.peekStreamMessages);
|
||||||
DUMPTOKEN(recruited.popMessages);
|
DUMPTOKEN(recruited.popMessages);
|
||||||
DUMPTOKEN(recruited.commit);
|
DUMPTOKEN(recruited.commit);
|
||||||
DUMPTOKEN(recruited.lock);
|
DUMPTOKEN(recruited.lock);
|
||||||
|
@ -3218,7 +3280,8 @@ ACTOR Future<Void> tLog(IKeyValueStore* persistentData,
|
||||||
state TLogData self(tlogId, workerID, persistentData, persistentQueue, db, degraded, folder);
|
state TLogData self(tlogId, workerID, persistentData, persistentQueue, db, degraded, folder);
|
||||||
state Future<Void> error = actorCollection(self.sharedActors.getFuture());
|
state Future<Void> error = actorCollection(self.sharedActors.getFuture());
|
||||||
|
|
||||||
TraceEvent("SharedTlog", tlogId).log();
|
TraceEvent("SharedTlog", tlogId).detail("Version", "6.2");
|
||||||
|
|
||||||
try {
|
try {
|
||||||
if (restoreFromDisk) {
|
if (restoreFromDisk) {
|
||||||
wait(restorePersistentState(&self, locality, oldLog, recovered, tlogRequests));
|
wait(restorePersistentState(&self, locality, oldLog, recovered, tlogRequests));
|
||||||
|
|
|
@ -637,13 +637,12 @@ ACTOR Future<Void> waitForQuietDatabase(Database cx,
|
||||||
// In a simulated environment, wait 5 seconds so that workers can move to their optimal locations
|
// In a simulated environment, wait 5 seconds so that workers can move to their optimal locations
|
||||||
if (g_network->isSimulated())
|
if (g_network->isSimulated())
|
||||||
wait(delay(5.0));
|
wait(delay(5.0));
|
||||||
|
|
||||||
// The quiet database check (which runs at the end of every test) will always time out due to active data movement.
|
// The quiet database check (which runs at the end of every test) will always time out due to active data movement.
|
||||||
// To get around this, quiet Database will disable the perpetual wiggle in the setup phase.
|
// To get around this, quiet Database will disable the perpetual wiggle in the setup phase.
|
||||||
printf("Set perpetual_storage_wiggle=0 ...\n");
|
printf("Set perpetual_storage_wiggle=0 ...\n");
|
||||||
wait(setPerpetualStorageWiggle(cx, false, LockAware::True));
|
wait(setPerpetualStorageWiggle(cx, false, LockAware::True));
|
||||||
printf("Set perpetual_storage_wiggle=0 Done.\n");
|
printf("Set perpetual_storage_wiggle=0 Done.\n");
|
||||||
|
|
||||||
// Require 3 consecutive successful quiet database checks spaced 2 second apart
|
// Require 3 consecutive successful quiet database checks spaced 2 second apart
|
||||||
state int numSuccesses = 0;
|
state int numSuccesses = 0;
|
||||||
|
|
||||||
|
|
|
@ -23,8 +23,9 @@
|
||||||
#include "fdbrpc/FailureMonitor.h"
|
#include "fdbrpc/FailureMonitor.h"
|
||||||
#include "fdbrpc/Smoother.h"
|
#include "fdbrpc/Smoother.h"
|
||||||
#include "fdbrpc/simulator.h"
|
#include "fdbrpc/simulator.h"
|
||||||
|
#include "fdbclient/DatabaseContext.h"
|
||||||
#include "fdbclient/ReadYourWrites.h"
|
#include "fdbclient/ReadYourWrites.h"
|
||||||
#include "fdbclient/TagThrottle.h"
|
#include "fdbclient/TagThrottle.actor.h"
|
||||||
#include "fdbserver/Knobs.h"
|
#include "fdbserver/Knobs.h"
|
||||||
#include "fdbserver/DataDistribution.actor.h"
|
#include "fdbserver/DataDistribution.actor.h"
|
||||||
#include "fdbserver/RatekeeperInterface.h"
|
#include "fdbserver/RatekeeperInterface.h"
|
||||||
|
@ -527,6 +528,9 @@ struct RatekeeperLimits {
|
||||||
context(context) {}
|
context(context) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
namespace RatekeeperActorCpp {
|
||||||
|
|
||||||
|
// Differentiate from GrvProxyInfo in DatabaseContext.h
|
||||||
struct GrvProxyInfo {
|
struct GrvProxyInfo {
|
||||||
int64_t totalTransactions;
|
int64_t totalTransactions;
|
||||||
int64_t batchTransactions;
|
int64_t batchTransactions;
|
||||||
|
@ -540,6 +544,8 @@ struct GrvProxyInfo {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
} // namespace RatekeeperActorCpp
|
||||||
|
|
||||||
struct RatekeeperData {
|
struct RatekeeperData {
|
||||||
UID id;
|
UID id;
|
||||||
Database db;
|
Database db;
|
||||||
|
@ -547,7 +553,7 @@ struct RatekeeperData {
|
||||||
Map<UID, StorageQueueInfo> storageQueueInfo;
|
Map<UID, StorageQueueInfo> storageQueueInfo;
|
||||||
Map<UID, TLogQueueInfo> tlogQueueInfo;
|
Map<UID, TLogQueueInfo> tlogQueueInfo;
|
||||||
|
|
||||||
std::map<UID, GrvProxyInfo> grvProxyInfo;
|
std::map<UID, RatekeeperActorCpp::GrvProxyInfo> grvProxyInfo;
|
||||||
Smoother smoothReleasedTransactions, smoothBatchReleasedTransactions, smoothTotalDurableBytes;
|
Smoother smoothReleasedTransactions, smoothBatchReleasedTransactions, smoothTotalDurableBytes;
|
||||||
HealthMetrics healthMetrics;
|
HealthMetrics healthMetrics;
|
||||||
DatabaseConfiguration configuration;
|
DatabaseConfiguration configuration;
|
||||||
|
@ -595,8 +601,8 @@ struct RatekeeperData {
|
||||||
SERVER_KNOBS->MAX_TL_SS_VERSION_DIFFERENCE_BATCH,
|
SERVER_KNOBS->MAX_TL_SS_VERSION_DIFFERENCE_BATCH,
|
||||||
SERVER_KNOBS->TARGET_DURABILITY_LAG_VERSIONS_BATCH),
|
SERVER_KNOBS->TARGET_DURABILITY_LAG_VERSIONS_BATCH),
|
||||||
autoThrottlingEnabled(false) {
|
autoThrottlingEnabled(false) {
|
||||||
expiredTagThrottleCleanup =
|
expiredTagThrottleCleanup = recurring([this]() { ThrottleApi::expire(this->db.getReference()); },
|
||||||
recurring([this]() { ThrottleApi::expire(this->db); }, SERVER_KNOBS->TAG_THROTTLE_EXPIRED_CLEANUP_INTERVAL);
|
SERVER_KNOBS->TAG_THROTTLE_EXPIRED_CLEANUP_INTERVAL);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -942,7 +948,8 @@ void tryAutoThrottleTag(RatekeeperData* self,
|
||||||
TagSet tags;
|
TagSet tags;
|
||||||
tags.addTag(tag);
|
tags.addTag(tag);
|
||||||
|
|
||||||
self->addActor.send(ThrottleApi::throttleTags(self->db,
|
Reference<DatabaseContext> db = Reference<DatabaseContext>::addRef(self->db.getPtr());
|
||||||
|
self->addActor.send(ThrottleApi::throttleTags(db,
|
||||||
tags,
|
tags,
|
||||||
clientRate.get(),
|
clientRate.get(),
|
||||||
SERVER_KNOBS->AUTO_TAG_THROTTLE_DURATION,
|
SERVER_KNOBS->AUTO_TAG_THROTTLE_DURATION,
|
||||||
|
|
|
@ -19,6 +19,11 @@ extern "C" void stackSignalHandler(int sig) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
#define SIGUSR1 10
|
||||||
|
#define SIGUSR2 12
|
||||||
|
#endif
|
||||||
|
|
||||||
void setupStackSignal() {
|
void setupStackSignal() {
|
||||||
std::signal(SIGUSR1, &stackSignalHandler);
|
std::signal(SIGUSR1, &stackSignalHandler);
|
||||||
}
|
}
|
||||||
|
|
|
@ -38,6 +38,8 @@ struct TLogInterface {
|
||||||
UID sharedTLogID;
|
UID sharedTLogID;
|
||||||
|
|
||||||
RequestStream<struct TLogPeekRequest> peekMessages;
|
RequestStream<struct TLogPeekRequest> peekMessages;
|
||||||
|
RequestStream<struct TLogPeekStreamRequest>
|
||||||
|
peekStreamMessages; // request establish a peek stream with the TLog server
|
||||||
RequestStream<struct TLogPopRequest> popMessages;
|
RequestStream<struct TLogPopRequest> popMessages;
|
||||||
|
|
||||||
RequestStream<struct TLogCommitRequest> commit;
|
RequestStream<struct TLogCommitRequest> commit;
|
||||||
|
@ -68,6 +70,7 @@ struct TLogInterface {
|
||||||
NetworkAddressList addresses() const { return peekMessages.getEndpoint().addresses; }
|
NetworkAddressList addresses() const { return peekMessages.getEndpoint().addresses; }
|
||||||
|
|
||||||
void initEndpoints() {
|
void initEndpoints() {
|
||||||
|
// NOTE: the adding order should be the same as the hardcoded indices in serialize()
|
||||||
std::vector<std::pair<FlowReceiver*, TaskPriority>> streams;
|
std::vector<std::pair<FlowReceiver*, TaskPriority>> streams;
|
||||||
streams.push_back(peekMessages.getReceiver(TaskPriority::TLogPeek));
|
streams.push_back(peekMessages.getReceiver(TaskPriority::TLogPeek));
|
||||||
streams.push_back(popMessages.getReceiver(TaskPriority::TLogPop));
|
streams.push_back(popMessages.getReceiver(TaskPriority::TLogPop));
|
||||||
|
@ -80,6 +83,7 @@ struct TLogInterface {
|
||||||
streams.push_back(disablePopRequest.getReceiver());
|
streams.push_back(disablePopRequest.getReceiver());
|
||||||
streams.push_back(enablePopRequest.getReceiver());
|
streams.push_back(enablePopRequest.getReceiver());
|
||||||
streams.push_back(snapRequest.getReceiver());
|
streams.push_back(snapRequest.getReceiver());
|
||||||
|
streams.push_back(peekStreamMessages.getReceiver(TaskPriority::TLogPeek));
|
||||||
FlowTransport::transport().addEndpoints(streams);
|
FlowTransport::transport().addEndpoints(streams);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -106,6 +110,8 @@ struct TLogInterface {
|
||||||
enablePopRequest =
|
enablePopRequest =
|
||||||
RequestStream<struct TLogEnablePopRequest>(peekMessages.getEndpoint().getAdjustedEndpoint(9));
|
RequestStream<struct TLogEnablePopRequest>(peekMessages.getEndpoint().getAdjustedEndpoint(9));
|
||||||
snapRequest = RequestStream<struct TLogSnapRequest>(peekMessages.getEndpoint().getAdjustedEndpoint(10));
|
snapRequest = RequestStream<struct TLogSnapRequest>(peekMessages.getEndpoint().getAdjustedEndpoint(10));
|
||||||
|
peekStreamMessages =
|
||||||
|
RequestStream<struct TLogPeekStreamRequest>(peekMessages.getEndpoint().getAdjustedEndpoint(11));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -209,6 +215,40 @@ struct TLogPeekRequest {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct TLogPeekStreamReply : public ReplyPromiseStreamReply {
|
||||||
|
constexpr static FileIdentifier file_identifier = 10072848;
|
||||||
|
TLogPeekReply rep;
|
||||||
|
|
||||||
|
TLogPeekStreamReply() = default;
|
||||||
|
explicit TLogPeekStreamReply(const TLogPeekReply& rep) : rep(rep) {}
|
||||||
|
|
||||||
|
int expectedSize() const { return rep.messages.expectedSize() + sizeof(TLogPeekStreamReply); }
|
||||||
|
|
||||||
|
template <class Ar>
|
||||||
|
void serialize(Ar& ar) {
|
||||||
|
serializer(ar, ReplyPromiseStreamReply::acknowledgeToken, rep);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct TLogPeekStreamRequest {
|
||||||
|
constexpr static FileIdentifier file_identifier = 10072821;
|
||||||
|
Arena arena;
|
||||||
|
Version begin;
|
||||||
|
Tag tag;
|
||||||
|
bool returnIfBlocked;
|
||||||
|
int limitBytes;
|
||||||
|
ReplyPromiseStream<TLogPeekStreamReply> reply;
|
||||||
|
|
||||||
|
TLogPeekStreamRequest() {}
|
||||||
|
TLogPeekStreamRequest(Version version, Tag tag, bool returnIfBlocked, int limitBytes)
|
||||||
|
: begin(version), tag(tag), returnIfBlocked(returnIfBlocked), limitBytes(limitBytes) {}
|
||||||
|
|
||||||
|
template <class Ar>
|
||||||
|
void serialize(Ar& ar) {
|
||||||
|
serializer(ar, arena, begin, tag, returnIfBlocked, limitBytes, reply);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
struct TLogPopRequest {
|
struct TLogPopRequest {
|
||||||
constexpr static FileIdentifier file_identifier = 5556423;
|
constexpr static FileIdentifier file_identifier = 5556423;
|
||||||
Arena arena;
|
Arena arena;
|
||||||
|
|
|
@ -341,6 +341,7 @@ struct TLogData : NonCopyable {
|
||||||
int64_t targetVolatileBytes; // The number of bytes of mutations this TLog should hold in memory before spilling.
|
int64_t targetVolatileBytes; // The number of bytes of mutations this TLog should hold in memory before spilling.
|
||||||
int64_t overheadBytesInput;
|
int64_t overheadBytesInput;
|
||||||
int64_t overheadBytesDurable;
|
int64_t overheadBytesDurable;
|
||||||
|
int activePeekStreams = 0;
|
||||||
|
|
||||||
WorkerCache<TLogInterface> tlogCache;
|
WorkerCache<TLogInterface> tlogCache;
|
||||||
FlowLock peekMemoryLimiter;
|
FlowLock peekMemoryLimiter;
|
||||||
|
@ -667,6 +668,7 @@ struct LogData : NonCopyable, public ReferenceCounted<LogData> {
|
||||||
specialCounter(cc, "PeekMemoryReserved", [tLogData]() { return tLogData->peekMemoryLimiter.activePermits(); });
|
specialCounter(cc, "PeekMemoryReserved", [tLogData]() { return tLogData->peekMemoryLimiter.activePermits(); });
|
||||||
specialCounter(cc, "PeekMemoryRequestsStalled", [tLogData]() { return tLogData->peekMemoryLimiter.waiters(); });
|
specialCounter(cc, "PeekMemoryRequestsStalled", [tLogData]() { return tLogData->peekMemoryLimiter.waiters(); });
|
||||||
specialCounter(cc, "Generation", [this]() { return this->recoveryCount; });
|
specialCounter(cc, "Generation", [this]() { return this->recoveryCount; });
|
||||||
|
specialCounter(cc, "ActivePeekStreams", [tLogData]() { return tLogData->activePeekStreams; });
|
||||||
}
|
}
|
||||||
|
|
||||||
~LogData() {
|
~LogData() {
|
||||||
|
@ -1166,17 +1168,19 @@ ACTOR Future<Void> tLogPopCore(TLogData* self, Tag inputTag, Version to, Referen
|
||||||
}
|
}
|
||||||
|
|
||||||
uint64_t PoppedVersionLag = logData->persistentDataDurableVersion - logData->queuePoppedVersion;
|
uint64_t PoppedVersionLag = logData->persistentDataDurableVersion - logData->queuePoppedVersion;
|
||||||
if ( SERVER_KNOBS->ENABLE_DETAILED_TLOG_POP_TRACE &&
|
if (SERVER_KNOBS->ENABLE_DETAILED_TLOG_POP_TRACE &&
|
||||||
(logData->queuePoppedVersion > 0) && //avoid generating massive events at beginning
|
(logData->queuePoppedVersion > 0) && // avoid generating massive events at beginning
|
||||||
(tagData->unpoppedRecovered || PoppedVersionLag >= SERVER_KNOBS->TLOG_POPPED_VER_LAG_THRESHOLD_FOR_TLOGPOP_TRACE)) { //when recovery or long lag
|
(tagData->unpoppedRecovered ||
|
||||||
|
PoppedVersionLag >=
|
||||||
|
SERVER_KNOBS->TLOG_POPPED_VER_LAG_THRESHOLD_FOR_TLOGPOP_TRACE)) { // when recovery or long lag
|
||||||
TraceEvent("TLogPopDetails", logData->logId)
|
TraceEvent("TLogPopDetails", logData->logId)
|
||||||
.detail("Tag", tagData->tag.toString())
|
.detail("Tag", tagData->tag.toString())
|
||||||
.detail("UpTo", upTo)
|
.detail("UpTo", upTo)
|
||||||
.detail("PoppedVersionLag", PoppedVersionLag)
|
.detail("PoppedVersionLag", PoppedVersionLag)
|
||||||
.detail("MinPoppedTag", logData->minPoppedTag.toString())
|
.detail("MinPoppedTag", logData->minPoppedTag.toString())
|
||||||
.detail("QueuePoppedVersion", logData->queuePoppedVersion)
|
.detail("QueuePoppedVersion", logData->queuePoppedVersion)
|
||||||
.detail("UnpoppedRecovered", tagData->unpoppedRecovered ? "True" : "False")
|
.detail("UnpoppedRecovered", tagData->unpoppedRecovered ? "True" : "False")
|
||||||
.detail("NothingPersistent", tagData->nothingPersistent ? "True" : "False");
|
.detail("NothingPersistent", tagData->nothingPersistent ? "True" : "False");
|
||||||
}
|
}
|
||||||
if (upTo > logData->persistentDataDurableVersion)
|
if (upTo > logData->persistentDataDurableVersion)
|
||||||
wait(tagData->eraseMessagesBefore(upTo, self, logData, TaskPriority::TLogPop));
|
wait(tagData->eraseMessagesBefore(upTo, self, logData, TaskPriority::TLogPop));
|
||||||
|
@ -1518,15 +1522,16 @@ std::deque<std::pair<Version, LengthPrefixedStringRef>>& getVersionMessages(Refe
|
||||||
};
|
};
|
||||||
|
|
||||||
void peekMessagesFromMemory(Reference<LogData> self,
|
void peekMessagesFromMemory(Reference<LogData> self,
|
||||||
TLogPeekRequest const& req,
|
Tag tag,
|
||||||
|
Version begin,
|
||||||
BinaryWriter& messages,
|
BinaryWriter& messages,
|
||||||
Version& endVersion) {
|
Version& endVersion) {
|
||||||
ASSERT(!messages.getLength());
|
ASSERT(!messages.getLength());
|
||||||
|
|
||||||
auto& deque = getVersionMessages(self, req.tag);
|
auto& deque = getVersionMessages(self, tag);
|
||||||
//TraceEvent("TLogPeekMem", self->dbgid).detail("Tag", req.tag1).detail("PDS", self->persistentDataSequence).detail("PDDS", self->persistentDataDurableSequence).detail("Oldest", map1.empty() ? 0 : map1.begin()->key ).detail("OldestMsgCount", map1.empty() ? 0 : map1.begin()->value.size());
|
//TraceEvent("TLogPeekMem", self->dbgid).detail("Tag", req.tag1).detail("PDS", self->persistentDataSequence).detail("PDDS", self->persistentDataDurableSequence).detail("Oldest", map1.empty() ? 0 : map1.begin()->key ).detail("OldestMsgCount", map1.empty() ? 0 : map1.begin()->value.size());
|
||||||
|
|
||||||
Version begin = std::max(req.begin, self->persistentDataDurableVersion + 1);
|
begin = std::max(begin, self->persistentDataDurableVersion + 1);
|
||||||
auto it = std::lower_bound(deque.begin(),
|
auto it = std::lower_bound(deque.begin(),
|
||||||
deque.end(),
|
deque.end(),
|
||||||
std::make_pair(begin, LengthPrefixedStringRef()),
|
std::make_pair(begin, LengthPrefixedStringRef()),
|
||||||
|
@ -1552,7 +1557,7 @@ void peekMessagesFromMemory(Reference<LogData> self,
|
||||||
void* data = messages.getData();
|
void* data = messages.getData();
|
||||||
DEBUG_TAGS_AND_MESSAGE(
|
DEBUG_TAGS_AND_MESSAGE(
|
||||||
"TLogPeek", currentVersion, StringRef((uint8_t*)data + offset, messages.getLength() - offset), self->logId)
|
"TLogPeek", currentVersion, StringRef((uint8_t*)data + offset, messages.getLength() - offset), self->logId)
|
||||||
.detail("PeekTag", req.tag);
|
.detail("PeekTag", tag);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1578,29 +1583,38 @@ ACTOR Future<std::vector<StringRef>> parseMessagesForTag(StringRef commitBlob, T
|
||||||
return relevantMessages;
|
return relevantMessages;
|
||||||
}
|
}
|
||||||
|
|
||||||
ACTOR Future<Void> tLogPeekMessages(TLogData* self, TLogPeekRequest req, Reference<LogData> logData) {
|
// Common logics to peek TLog and create TLogPeekReply that serves both streaming peek or normal peek request
|
||||||
|
ACTOR template <typename PromiseType>
|
||||||
|
Future<Void> tLogPeekMessages(PromiseType replyPromise,
|
||||||
|
TLogData* self,
|
||||||
|
Reference<LogData> logData,
|
||||||
|
Version reqBegin,
|
||||||
|
Tag reqTag,
|
||||||
|
bool reqReturnIfBlocked = false,
|
||||||
|
bool reqOnlySpilled = false,
|
||||||
|
Optional<std::pair<UID, int>> reqSequence = Optional<std::pair<UID, int>>()) {
|
||||||
state BinaryWriter messages(Unversioned());
|
state BinaryWriter messages(Unversioned());
|
||||||
state BinaryWriter messages2(Unversioned());
|
state BinaryWriter messages2(Unversioned());
|
||||||
state int sequence = -1;
|
state int sequence = -1;
|
||||||
state UID peekId;
|
state UID peekId;
|
||||||
state double queueStart = now();
|
state double queueStart = now();
|
||||||
|
|
||||||
if (req.tag.locality == tagLocalityTxs && req.tag.id >= logData->txsTags && logData->txsTags > 0) {
|
if (reqTag.locality == tagLocalityTxs && reqTag.id >= logData->txsTags && logData->txsTags > 0) {
|
||||||
req.tag.id = req.tag.id % logData->txsTags;
|
reqTag.id = reqTag.id % logData->txsTags;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (req.sequence.present()) {
|
if (reqSequence.present()) {
|
||||||
try {
|
try {
|
||||||
peekId = req.sequence.get().first;
|
peekId = reqSequence.get().first;
|
||||||
sequence = req.sequence.get().second;
|
sequence = reqSequence.get().second;
|
||||||
if (sequence >= SERVER_KNOBS->PARALLEL_GET_MORE_REQUESTS &&
|
if (sequence >= SERVER_KNOBS->PARALLEL_GET_MORE_REQUESTS &&
|
||||||
logData->peekTracker.find(peekId) == logData->peekTracker.end()) {
|
logData->peekTracker.find(peekId) == logData->peekTracker.end()) {
|
||||||
throw operation_obsolete();
|
throw operation_obsolete();
|
||||||
}
|
}
|
||||||
auto& trackerData = logData->peekTracker[peekId];
|
auto& trackerData = logData->peekTracker[peekId];
|
||||||
if (sequence == 0 && trackerData.sequence_version.find(0) == trackerData.sequence_version.end()) {
|
if (sequence == 0 && trackerData.sequence_version.find(0) == trackerData.sequence_version.end()) {
|
||||||
trackerData.tag = req.tag;
|
trackerData.tag = reqTag;
|
||||||
trackerData.sequence_version[0].send(std::make_pair(req.begin, req.onlySpilled));
|
trackerData.sequence_version[0].send(std::make_pair(reqBegin, reqOnlySpilled));
|
||||||
}
|
}
|
||||||
auto seqBegin = trackerData.sequence_version.begin();
|
auto seqBegin = trackerData.sequence_version.begin();
|
||||||
// The peek cursor and this comparison need to agree about the maximum number of in-flight requests.
|
// The peek cursor and this comparison need to agree about the maximum number of in-flight requests.
|
||||||
|
@ -1627,12 +1641,12 @@ ACTOR Future<Void> tLogPeekMessages(TLogData* self, TLogPeekRequest req, Referen
|
||||||
}
|
}
|
||||||
trackerData.lastUpdate = now();
|
trackerData.lastUpdate = now();
|
||||||
std::pair<Version, bool> prevPeekData = wait(fPrevPeekData);
|
std::pair<Version, bool> prevPeekData = wait(fPrevPeekData);
|
||||||
req.begin = std::max(prevPeekData.first, req.begin);
|
reqBegin = std::max(prevPeekData.first, reqBegin);
|
||||||
req.onlySpilled = prevPeekData.second;
|
reqOnlySpilled = prevPeekData.second;
|
||||||
wait(yield());
|
wait(yield());
|
||||||
} catch (Error& e) {
|
} catch (Error& e) {
|
||||||
if (e.code() == error_code_timed_out || e.code() == error_code_operation_obsolete) {
|
if (e.code() == error_code_timed_out || e.code() == error_code_operation_obsolete) {
|
||||||
req.reply.sendError(e);
|
replyPromise.sendError(e);
|
||||||
return Void();
|
return Void();
|
||||||
} else {
|
} else {
|
||||||
throw;
|
throw;
|
||||||
|
@ -1642,33 +1656,33 @@ ACTOR Future<Void> tLogPeekMessages(TLogData* self, TLogPeekRequest req, Referen
|
||||||
|
|
||||||
state double blockStart = now();
|
state double blockStart = now();
|
||||||
|
|
||||||
if (req.returnIfBlocked && logData->version.get() < req.begin) {
|
if (reqReturnIfBlocked && logData->version.get() < reqBegin) {
|
||||||
req.reply.sendError(end_of_stream());
|
replyPromise.sendError(end_of_stream());
|
||||||
if (req.sequence.present()) {
|
if (reqSequence.present()) {
|
||||||
auto& trackerData = logData->peekTracker[peekId];
|
auto& trackerData = logData->peekTracker[peekId];
|
||||||
auto& sequenceData = trackerData.sequence_version[sequence + 1];
|
auto& sequenceData = trackerData.sequence_version[sequence + 1];
|
||||||
trackerData.lastUpdate = now();
|
trackerData.lastUpdate = now();
|
||||||
if (!sequenceData.isSet()) {
|
if (!sequenceData.isSet()) {
|
||||||
sequenceData.send(std::make_pair(req.begin, req.onlySpilled));
|
sequenceData.send(std::make_pair(reqBegin, reqOnlySpilled));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return Void();
|
return Void();
|
||||||
}
|
}
|
||||||
|
|
||||||
//TraceEvent("TLogPeekMessages0", self->dbgid).detail("ReqBeginEpoch", req.begin.epoch).detail("ReqBeginSeq", req.begin.sequence).detail("Epoch", self->epoch()).detail("PersistentDataSeq", self->persistentDataSequence).detail("Tag1", req.tag1).detail("Tag2", req.tag2);
|
//TraceEvent("TLogPeekMessages0", self->dbgid).detail("ReqBeginEpoch", reqBegin.epoch).detail("ReqBeginSeq", reqBegin.sequence).detail("Epoch", self->epoch()).detail("PersistentDataSeq", self->persistentDataSequence).detail("Tag1", reqTag1).detail("Tag2", reqTag2);
|
||||||
// Wait until we have something to return that the caller doesn't already have
|
// Wait until we have something to return that the caller doesn't already have
|
||||||
if (logData->version.get() < req.begin) {
|
if (logData->version.get() < reqBegin) {
|
||||||
wait(logData->version.whenAtLeast(req.begin));
|
wait(logData->version.whenAtLeast(reqBegin));
|
||||||
wait(delay(SERVER_KNOBS->TLOG_PEEK_DELAY, g_network->getCurrentTask()));
|
wait(delay(SERVER_KNOBS->TLOG_PEEK_DELAY, g_network->getCurrentTask()));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (logData->locality != tagLocalitySatellite && req.tag.locality == tagLocalityLogRouter) {
|
if (logData->locality != tagLocalitySatellite && reqTag.locality == tagLocalityLogRouter) {
|
||||||
wait(self->concurrentLogRouterReads.take());
|
wait(self->concurrentLogRouterReads.take());
|
||||||
state FlowLock::Releaser globalReleaser(self->concurrentLogRouterReads);
|
state FlowLock::Releaser globalReleaser(self->concurrentLogRouterReads);
|
||||||
wait(delay(0.0, TaskPriority::Low));
|
wait(delay(0.0, TaskPriority::Low));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (req.begin <= logData->persistentDataDurableVersion && req.tag.locality != tagLocalityTxs && req.tag != txsTag) {
|
if (reqBegin <= logData->persistentDataDurableVersion && reqTag.locality != tagLocalityTxs && reqTag != txsTag) {
|
||||||
// Reading spilled data will almost always imply that the storage server is >5s behind the rest
|
// Reading spilled data will almost always imply that the storage server is >5s behind the rest
|
||||||
// of the cluster. We shouldn't prioritize spending CPU on helping this server catch up
|
// of the cluster. We shouldn't prioritize spending CPU on helping this server catch up
|
||||||
// slightly faster over keeping the rest of the cluster operating normally.
|
// slightly faster over keeping the rest of the cluster operating normally.
|
||||||
|
@ -1679,8 +1693,8 @@ ACTOR Future<Void> tLogPeekMessages(TLogData* self, TLogPeekRequest req, Referen
|
||||||
|
|
||||||
state double workStart = now();
|
state double workStart = now();
|
||||||
|
|
||||||
Version poppedVer = poppedVersion(logData, req.tag);
|
Version poppedVer = poppedVersion(logData, reqTag);
|
||||||
if (poppedVer > req.begin) {
|
if (poppedVer > reqBegin) {
|
||||||
TLogPeekReply rep;
|
TLogPeekReply rep;
|
||||||
rep.maxKnownVersion = logData->version.get();
|
rep.maxKnownVersion = logData->version.get();
|
||||||
rep.minKnownCommittedVersion = logData->minKnownCommittedVersion;
|
rep.minKnownCommittedVersion = logData->minKnownCommittedVersion;
|
||||||
|
@ -1688,12 +1702,12 @@ ACTOR Future<Void> tLogPeekMessages(TLogData* self, TLogPeekRequest req, Referen
|
||||||
rep.end = poppedVer;
|
rep.end = poppedVer;
|
||||||
rep.onlySpilled = false;
|
rep.onlySpilled = false;
|
||||||
|
|
||||||
if (req.sequence.present()) {
|
if (reqSequence.present()) {
|
||||||
auto& trackerData = logData->peekTracker[peekId];
|
auto& trackerData = logData->peekTracker[peekId];
|
||||||
auto& sequenceData = trackerData.sequence_version[sequence + 1];
|
auto& sequenceData = trackerData.sequence_version[sequence + 1];
|
||||||
trackerData.lastUpdate = now();
|
trackerData.lastUpdate = now();
|
||||||
if (trackerData.sequence_version.size() && sequence + 1 < trackerData.sequence_version.begin()->first) {
|
if (trackerData.sequence_version.size() && sequence + 1 < trackerData.sequence_version.begin()->first) {
|
||||||
req.reply.sendError(operation_obsolete());
|
replyPromise.sendError(operation_obsolete());
|
||||||
if (!sequenceData.isSet())
|
if (!sequenceData.isSet())
|
||||||
sequenceData.sendError(operation_obsolete());
|
sequenceData.sendError(operation_obsolete());
|
||||||
return Void();
|
return Void();
|
||||||
|
@ -1701,16 +1715,16 @@ ACTOR Future<Void> tLogPeekMessages(TLogData* self, TLogPeekRequest req, Referen
|
||||||
if (sequenceData.isSet()) {
|
if (sequenceData.isSet()) {
|
||||||
if (sequenceData.getFuture().get().first != rep.end) {
|
if (sequenceData.getFuture().get().first != rep.end) {
|
||||||
TEST(true); // tlog peek second attempt ended at a different version
|
TEST(true); // tlog peek second attempt ended at a different version
|
||||||
req.reply.sendError(operation_obsolete());
|
replyPromise.sendError(operation_obsolete());
|
||||||
return Void();
|
return Void();
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
sequenceData.send(std::make_pair(rep.end, rep.onlySpilled));
|
sequenceData.send(std::make_pair(rep.end, rep.onlySpilled));
|
||||||
}
|
}
|
||||||
rep.begin = req.begin;
|
rep.begin = reqBegin;
|
||||||
}
|
}
|
||||||
|
|
||||||
req.reply.send(rep);
|
replyPromise.send(rep);
|
||||||
return Void();
|
return Void();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1718,24 +1732,24 @@ ACTOR Future<Void> tLogPeekMessages(TLogData* self, TLogPeekRequest req, Referen
|
||||||
state bool onlySpilled = false;
|
state bool onlySpilled = false;
|
||||||
|
|
||||||
// grab messages from disk
|
// grab messages from disk
|
||||||
//TraceEvent("TLogPeekMessages", self->dbgid).detail("ReqBeginEpoch", req.begin.epoch).detail("ReqBeginSeq", req.begin.sequence).detail("Epoch", self->epoch()).detail("PersistentDataSeq", self->persistentDataSequence).detail("Tag1", req.tag1).detail("Tag2", req.tag2);
|
//TraceEvent("TLogPeekMessages", self->dbgid).detail("ReqBeginEpoch", reqBegin.epoch).detail("ReqBeginSeq", reqBegin.sequence).detail("Epoch", self->epoch()).detail("PersistentDataSeq", self->persistentDataSequence).detail("Tag1", reqTag1).detail("Tag2", reqTag2);
|
||||||
if (req.begin <= logData->persistentDataDurableVersion) {
|
if (reqBegin <= logData->persistentDataDurableVersion) {
|
||||||
// Just in case the durable version changes while we are waiting for the read, we grab this data from memory. We
|
// Just in case the durable version changes while we are waiting for the read, we grab this data from memory. We
|
||||||
// may or may not actually send it depending on whether we get enough data from disk. SOMEDAY: Only do this if
|
// may or may not actually send it depending on whether we get enough data from disk. SOMEDAY: Only do this if
|
||||||
// an initial attempt to read from disk results in insufficient data and the required data is no longer in
|
// an initial attempt to read from disk results in insufficient data and the required data is no longer in
|
||||||
// memory SOMEDAY: Should we only send part of the messages we collected, to actually limit the size of the
|
// memory SOMEDAY: Should we only send part of the messages we collected, to actually limit the size of the
|
||||||
// result?
|
// result?
|
||||||
|
|
||||||
if (req.onlySpilled) {
|
if (reqOnlySpilled) {
|
||||||
endVersion = logData->persistentDataDurableVersion + 1;
|
endVersion = logData->persistentDataDurableVersion + 1;
|
||||||
} else {
|
} else {
|
||||||
peekMessagesFromMemory(logData, req, messages2, endVersion);
|
peekMessagesFromMemory(logData, reqTag, reqBegin, messages2, endVersion);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (logData->shouldSpillByValue(req.tag)) {
|
if (logData->shouldSpillByValue(reqTag)) {
|
||||||
RangeResult kvs = wait(self->persistentData->readRange(
|
RangeResult kvs = wait(self->persistentData->readRange(
|
||||||
KeyRangeRef(persistTagMessagesKey(logData->logId, req.tag, req.begin),
|
KeyRangeRef(persistTagMessagesKey(logData->logId, reqTag, reqBegin),
|
||||||
persistTagMessagesKey(logData->logId, req.tag, logData->persistentDataDurableVersion + 1)),
|
persistTagMessagesKey(logData->logId, reqTag, logData->persistentDataDurableVersion + 1)),
|
||||||
SERVER_KNOBS->DESIRED_TOTAL_BYTES,
|
SERVER_KNOBS->DESIRED_TOTAL_BYTES,
|
||||||
SERVER_KNOBS->DESIRED_TOTAL_BYTES));
|
SERVER_KNOBS->DESIRED_TOTAL_BYTES));
|
||||||
|
|
||||||
|
@ -1755,11 +1769,11 @@ ACTOR Future<Void> tLogPeekMessages(TLogData* self, TLogPeekRequest req, Referen
|
||||||
// FIXME: Limit to approximately DESIRED_TOTATL_BYTES somehow.
|
// FIXME: Limit to approximately DESIRED_TOTATL_BYTES somehow.
|
||||||
RangeResult kvrefs = wait(self->persistentData->readRange(
|
RangeResult kvrefs = wait(self->persistentData->readRange(
|
||||||
KeyRangeRef(
|
KeyRangeRef(
|
||||||
persistTagMessageRefsKey(logData->logId, req.tag, req.begin),
|
persistTagMessageRefsKey(logData->logId, reqTag, reqBegin),
|
||||||
persistTagMessageRefsKey(logData->logId, req.tag, logData->persistentDataDurableVersion + 1)),
|
persistTagMessageRefsKey(logData->logId, reqTag, logData->persistentDataDurableVersion + 1)),
|
||||||
SERVER_KNOBS->TLOG_SPILL_REFERENCE_MAX_BATCHES_PER_PEEK + 1));
|
SERVER_KNOBS->TLOG_SPILL_REFERENCE_MAX_BATCHES_PER_PEEK + 1));
|
||||||
|
|
||||||
//TraceEvent("TLogPeekResults", self->dbgid).detail("ForAddress", req.reply.getEndpoint().getPrimaryAddress()).detail("Tag1Results", s1).detail("Tag2Results", s2).detail("Tag1ResultsLim", kv1.size()).detail("Tag2ResultsLim", kv2.size()).detail("Tag1ResultsLast", kv1.size() ? kv1[0].key : "").detail("Tag2ResultsLast", kv2.size() ? kv2[0].key : "").detail("Limited", limited).detail("NextEpoch", next_pos.epoch).detail("NextSeq", next_pos.sequence).detail("NowEpoch", self->epoch()).detail("NowSeq", self->sequence.getNextSequence());
|
//TraceEvent("TLogPeekResults", self->dbgid).detail("ForAddress", replyPromise.getEndpoint().getPrimaryAddress()).detail("Tag1Results", s1).detail("Tag2Results", s2).detail("Tag1ResultsLim", kv1.size()).detail("Tag2ResultsLim", kv2.size()).detail("Tag1ResultsLast", kv1.size() ? kv1[0].key : "").detail("Tag2ResultsLast", kv2.size() ? kv2[0].key : "").detail("Limited", limited).detail("NextEpoch", next_pos.epoch).detail("NextSeq", next_pos.sequence).detail("NowEpoch", self->epoch()).detail("NowSeq", self->sequence.getNextSequence());
|
||||||
|
|
||||||
state std::vector<std::pair<IDiskQueue::location, IDiskQueue::location>> commitLocations;
|
state std::vector<std::pair<IDiskQueue::location, IDiskQueue::location>> commitLocations;
|
||||||
state bool earlyEnd = false;
|
state bool earlyEnd = false;
|
||||||
|
@ -1776,7 +1790,7 @@ ACTOR Future<Void> tLogPeekMessages(TLogData* self, TLogPeekRequest req, Referen
|
||||||
earlyEnd = true;
|
earlyEnd = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (sd.version >= req.begin) {
|
if (sd.version >= reqBegin) {
|
||||||
firstVersion = std::min(firstVersion, sd.version);
|
firstVersion = std::min(firstVersion, sd.version);
|
||||||
const IDiskQueue::location end = sd.start.lo + sd.length;
|
const IDiskQueue::location end = sd.start.lo + sd.length;
|
||||||
commitLocations.emplace_back(sd.start, end);
|
commitLocations.emplace_back(sd.start, end);
|
||||||
|
@ -1818,12 +1832,12 @@ ACTOR Future<Void> tLogPeekMessages(TLogData* self, TLogPeekRequest req, Referen
|
||||||
messages << VERSION_HEADER << entry.version;
|
messages << VERSION_HEADER << entry.version;
|
||||||
|
|
||||||
std::vector<StringRef> rawMessages =
|
std::vector<StringRef> rawMessages =
|
||||||
wait(parseMessagesForTag(entry.messages, req.tag, logData->logRouterTags));
|
wait(parseMessagesForTag(entry.messages, reqTag, logData->logRouterTags));
|
||||||
for (const StringRef& msg : rawMessages) {
|
for (const StringRef& msg : rawMessages) {
|
||||||
messages.serializeBytes(msg);
|
messages.serializeBytes(msg);
|
||||||
DEBUG_TAGS_AND_MESSAGE("TLogPeekFromDisk", entry.version, msg, logData->logId)
|
DEBUG_TAGS_AND_MESSAGE("TLogPeekFromDisk", entry.version, msg, logData->logId)
|
||||||
.detail("DebugID", self->dbgid)
|
.detail("DebugID", self->dbgid)
|
||||||
.detail("PeekTag", req.tag);
|
.detail("PeekTag", reqTag);
|
||||||
}
|
}
|
||||||
|
|
||||||
lastRefMessageVersion = entry.version;
|
lastRefMessageVersion = entry.version;
|
||||||
|
@ -1841,28 +1855,28 @@ ACTOR Future<Void> tLogPeekMessages(TLogData* self, TLogPeekRequest req, Referen
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (req.onlySpilled) {
|
if (reqOnlySpilled) {
|
||||||
endVersion = logData->persistentDataDurableVersion + 1;
|
endVersion = logData->persistentDataDurableVersion + 1;
|
||||||
} else {
|
} else {
|
||||||
peekMessagesFromMemory(logData, req, messages, endVersion);
|
peekMessagesFromMemory(logData, reqTag, reqBegin, messages, endVersion);
|
||||||
}
|
}
|
||||||
|
|
||||||
//TraceEvent("TLogPeekResults", self->dbgid).detail("ForAddress", req.reply.getEndpoint().getPrimaryAddress()).detail("MessageBytes", messages.getLength()).detail("NextEpoch", next_pos.epoch).detail("NextSeq", next_pos.sequence).detail("NowSeq", self->sequence.getNextSequence());
|
//TraceEvent("TLogPeekResults", self->dbgid).detail("ForAddress", replyPromise.getEndpoint().getPrimaryAddress()).detail("MessageBytes", messages.getLength()).detail("NextEpoch", next_pos.epoch).detail("NextSeq", next_pos.sequence).detail("NowSeq", self->sequence.getNextSequence());
|
||||||
}
|
}
|
||||||
|
|
||||||
TLogPeekReply reply;
|
TLogPeekReply reply;
|
||||||
reply.maxKnownVersion = logData->version.get();
|
reply.maxKnownVersion = logData->version.get();
|
||||||
reply.minKnownCommittedVersion = logData->minKnownCommittedVersion;
|
reply.minKnownCommittedVersion = logData->minKnownCommittedVersion;
|
||||||
reply.messages = messages.toValue();
|
reply.messages = StringRef(reply.arena, messages.toValue());
|
||||||
reply.end = endVersion;
|
reply.end = endVersion;
|
||||||
reply.onlySpilled = onlySpilled;
|
reply.onlySpilled = onlySpilled;
|
||||||
|
|
||||||
//TraceEvent("TlogPeek", self->dbgid).detail("LogId", logData->logId).detail("Tag", req.tag.toString()).
|
//TraceEvent("TlogPeek", self->dbgid).detail("LogId", logData->logId).detail("Tag", reqTag.toString()).
|
||||||
// detail("BeginVer", req.begin).detail("EndVer", reply.end).
|
// detail("BeginVer", reqBegin).detail("EndVer", reply.end).
|
||||||
// detail("MsgBytes", reply.messages.expectedSize()).
|
// detail("MsgBytes", reply.messages.expectedSize()).
|
||||||
// detail("ForAddress", req.reply.getEndpoint().getPrimaryAddress());
|
// detail("ForAddress", replyPromise.getEndpoint().getPrimaryAddress());
|
||||||
|
|
||||||
if (req.sequence.present()) {
|
if (reqSequence.present()) {
|
||||||
auto& trackerData = logData->peekTracker[peekId];
|
auto& trackerData = logData->peekTracker[peekId];
|
||||||
trackerData.lastUpdate = now();
|
trackerData.lastUpdate = now();
|
||||||
|
|
||||||
|
@ -1886,9 +1900,9 @@ ACTOR Future<Void> tLogPeekMessages(TLogData* self, TLogPeekRequest req, Referen
|
||||||
|
|
||||||
auto& sequenceData = trackerData.sequence_version[sequence + 1];
|
auto& sequenceData = trackerData.sequence_version[sequence + 1];
|
||||||
if (trackerData.sequence_version.size() && sequence + 1 < trackerData.sequence_version.begin()->first) {
|
if (trackerData.sequence_version.size() && sequence + 1 < trackerData.sequence_version.begin()->first) {
|
||||||
req.reply.sendError(operation_obsolete());
|
replyPromise.sendError(operation_obsolete());
|
||||||
if (!sequenceData.isSet()) {
|
if (!sequenceData.isSet()) {
|
||||||
// It would technically be more correct to .send({req.begin, req.onlySpilled}), as the next
|
// It would technically be more correct to .send({reqBegin, reqOnlySpilled}), as the next
|
||||||
// request might still be in the window of active requests, but LogSystemPeekCursor will
|
// request might still be in the window of active requests, but LogSystemPeekCursor will
|
||||||
// throw away all future responses upon getting an operation_obsolete(), so computing a
|
// throw away all future responses upon getting an operation_obsolete(), so computing a
|
||||||
// response will probably be a waste of CPU.
|
// response will probably be a waste of CPU.
|
||||||
|
@ -1900,19 +1914,59 @@ ACTOR Future<Void> tLogPeekMessages(TLogData* self, TLogPeekRequest req, Referen
|
||||||
trackerData.duplicatePeeks++;
|
trackerData.duplicatePeeks++;
|
||||||
if (sequenceData.getFuture().get().first != reply.end) {
|
if (sequenceData.getFuture().get().first != reply.end) {
|
||||||
TEST(true); // tlog peek second attempt ended at a different version (2)
|
TEST(true); // tlog peek second attempt ended at a different version (2)
|
||||||
req.reply.sendError(operation_obsolete());
|
replyPromise.sendError(operation_obsolete());
|
||||||
return Void();
|
return Void();
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
sequenceData.send(std::make_pair(reply.end, reply.onlySpilled));
|
sequenceData.send(std::make_pair(reply.end, reply.onlySpilled));
|
||||||
}
|
}
|
||||||
reply.begin = req.begin;
|
reply.begin = reqBegin;
|
||||||
}
|
}
|
||||||
|
|
||||||
req.reply.send(reply);
|
replyPromise.send(reply);
|
||||||
return Void();
|
return Void();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// This actor keep pushing TLogPeekStreamReply until it's removed from the cluster or should recover
|
||||||
|
ACTOR Future<Void> tLogPeekStream(TLogData* self, TLogPeekStreamRequest req, Reference<LogData> logData) {
|
||||||
|
self->activePeekStreams++;
|
||||||
|
|
||||||
|
state Version begin = req.begin;
|
||||||
|
state bool onlySpilled = false;
|
||||||
|
req.reply.setByteLimit(std::min(SERVER_KNOBS->MAXIMUM_PEEK_BYTES, req.limitBytes));
|
||||||
|
loop {
|
||||||
|
state TLogPeekStreamReply reply;
|
||||||
|
state Promise<TLogPeekReply> promise;
|
||||||
|
state Future<TLogPeekReply> future(promise.getFuture());
|
||||||
|
try {
|
||||||
|
wait(req.reply.onReady() && store(reply.rep, future) &&
|
||||||
|
tLogPeekMessages(promise, self, logData, begin, req.tag, req.returnIfBlocked, onlySpilled));
|
||||||
|
|
||||||
|
reply.rep.begin = begin;
|
||||||
|
req.reply.send(reply);
|
||||||
|
begin = reply.rep.end;
|
||||||
|
onlySpilled = reply.rep.onlySpilled;
|
||||||
|
if (reply.rep.end > logData->version.get()) {
|
||||||
|
wait(delay(SERVER_KNOBS->TLOG_PEEK_DELAY, g_network->getCurrentTask()));
|
||||||
|
} else {
|
||||||
|
wait(delay(0, g_network->getCurrentTask()));
|
||||||
|
}
|
||||||
|
} catch (Error& e) {
|
||||||
|
self->activePeekStreams--;
|
||||||
|
TraceEvent(SevDebug, "TLogPeekStreamEnd", logData->logId)
|
||||||
|
.detail("PeerAddr", req.reply.getEndpoint().getPrimaryAddress())
|
||||||
|
.error(e, true);
|
||||||
|
|
||||||
|
if (e.code() == error_code_end_of_stream || e.code() == error_code_operation_obsolete) {
|
||||||
|
req.reply.sendError(e);
|
||||||
|
return Void();
|
||||||
|
} else {
|
||||||
|
throw;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
ACTOR Future<Void> doQueueCommit(TLogData* self,
|
ACTOR Future<Void> doQueueCommit(TLogData* self,
|
||||||
Reference<LogData> logData,
|
Reference<LogData> logData,
|
||||||
std::vector<Reference<LogData>> missingFinalCommit) {
|
std::vector<Reference<LogData>> missingFinalCommit) {
|
||||||
|
@ -2408,8 +2462,14 @@ ACTOR Future<Void> serveTLogInterface(TLogData* self,
|
||||||
logData->logSystem->set(Reference<ILogSystem>());
|
logData->logSystem->set(Reference<ILogSystem>());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
when(TLogPeekStreamRequest req = waitNext(tli.peekStreamMessages.getFuture())) {
|
||||||
|
TraceEvent(SevDebug, "TLogPeekStream", logData->logId)
|
||||||
|
.detail("Token", tli.peekStreamMessages.getEndpoint().token);
|
||||||
|
logData->addActor.send(tLogPeekStream(self, req, logData));
|
||||||
|
}
|
||||||
when(TLogPeekRequest req = waitNext(tli.peekMessages.getFuture())) {
|
when(TLogPeekRequest req = waitNext(tli.peekMessages.getFuture())) {
|
||||||
logData->addActor.send(tLogPeekMessages(self, req, logData));
|
logData->addActor.send(tLogPeekMessages(
|
||||||
|
req.reply, self, logData, req.begin, req.tag, req.returnIfBlocked, req.onlySpilled, req.sequence));
|
||||||
}
|
}
|
||||||
when(TLogPopRequest req = waitNext(tli.popMessages.getFuture())) {
|
when(TLogPopRequest req = waitNext(tli.popMessages.getFuture())) {
|
||||||
logData->addActor.send(tLogPop(self, req, logData));
|
logData->addActor.send(tLogPop(self, req, logData));
|
||||||
|
@ -2664,7 +2724,7 @@ ACTOR Future<Void> tLogCore(TLogData* self,
|
||||||
SERVER_KNOBS->STORAGE_LOGGING_DELAY,
|
SERVER_KNOBS->STORAGE_LOGGING_DELAY,
|
||||||
&logData->cc,
|
&logData->cc,
|
||||||
logData->logId.toString() + "/TLogMetrics",
|
logData->logId.toString() + "/TLogMetrics",
|
||||||
[self=self](TraceEvent& te) {
|
[self = self](TraceEvent& te) {
|
||||||
StorageBytes sbTlog = self->persistentData->getStorageBytes();
|
StorageBytes sbTlog = self->persistentData->getStorageBytes();
|
||||||
te.detail("KvstoreBytesUsed", sbTlog.used);
|
te.detail("KvstoreBytesUsed", sbTlog.used);
|
||||||
te.detail("KvstoreBytesFree", sbTlog.free);
|
te.detail("KvstoreBytesFree", sbTlog.free);
|
||||||
|
@ -2848,6 +2908,7 @@ ACTOR Future<Void> restorePersistentState(TLogData* self,
|
||||||
recruited.initEndpoints();
|
recruited.initEndpoints();
|
||||||
|
|
||||||
DUMPTOKEN(recruited.peekMessages);
|
DUMPTOKEN(recruited.peekMessages);
|
||||||
|
DUMPTOKEN(recruited.peekStreamMessages);
|
||||||
DUMPTOKEN(recruited.popMessages);
|
DUMPTOKEN(recruited.popMessages);
|
||||||
DUMPTOKEN(recruited.commit);
|
DUMPTOKEN(recruited.commit);
|
||||||
DUMPTOKEN(recruited.lock);
|
DUMPTOKEN(recruited.lock);
|
||||||
|
@ -2894,9 +2955,9 @@ ACTOR Future<Void> restorePersistentState(TLogData* self,
|
||||||
logsByVersion.emplace_back(ver, id1);
|
logsByVersion.emplace_back(ver, id1);
|
||||||
|
|
||||||
TraceEvent("TLogPersistentStateRestore", self->dbgid)
|
TraceEvent("TLogPersistentStateRestore", self->dbgid)
|
||||||
.detail("LogId", logData->logId)
|
.detail("LogId", logData->logId)
|
||||||
.detail("Ver", ver)
|
.detail("Ver", ver)
|
||||||
.detail("RecoveryCount", logData->recoveryCount);
|
.detail("RecoveryCount", logData->recoveryCount);
|
||||||
// Restore popped keys. Pop operations that took place after the last (committed) updatePersistentDataVersion
|
// Restore popped keys. Pop operations that took place after the last (committed) updatePersistentDataVersion
|
||||||
// might be lost, but that is fine because we will get the corresponding data back, too.
|
// might be lost, but that is fine because we will get the corresponding data back, too.
|
||||||
tagKeys = prefixRange(rawId.withPrefix(persistTagPoppedKeys.begin));
|
tagKeys = prefixRange(rawId.withPrefix(persistTagPoppedKeys.begin));
|
||||||
|
@ -3109,6 +3170,7 @@ ACTOR Future<Void> tLogStart(TLogData* self, InitializeTLogRequest req, Locality
|
||||||
recruited.initEndpoints();
|
recruited.initEndpoints();
|
||||||
|
|
||||||
DUMPTOKEN(recruited.peekMessages);
|
DUMPTOKEN(recruited.peekMessages);
|
||||||
|
DUMPTOKEN(recruited.peekStreamMessages);
|
||||||
DUMPTOKEN(recruited.popMessages);
|
DUMPTOKEN(recruited.popMessages);
|
||||||
DUMPTOKEN(recruited.commit);
|
DUMPTOKEN(recruited.commit);
|
||||||
DUMPTOKEN(recruited.lock);
|
DUMPTOKEN(recruited.lock);
|
||||||
|
|
|
@ -114,7 +114,7 @@ struct TesterInterface {
|
||||||
|
|
||||||
ACTOR Future<Void> testerServerCore(TesterInterface interf,
|
ACTOR Future<Void> testerServerCore(TesterInterface interf,
|
||||||
Reference<ClusterConnectionFile> ccf,
|
Reference<ClusterConnectionFile> ccf,
|
||||||
Reference<AsyncVar<struct ServerDBInfo>> serverDBInfo,
|
Reference<AsyncVar<struct ServerDBInfo> const> serverDBInfo,
|
||||||
LocalityData locality);
|
LocalityData locality);
|
||||||
|
|
||||||
enum test_location_t { TEST_HERE, TEST_ON_SERVERS, TEST_ON_TESTERS };
|
enum test_location_t { TEST_HERE, TEST_ON_SERVERS, TEST_ON_TESTERS };
|
||||||
|
|
|
@ -880,8 +880,9 @@ class Database openDBOnServer(Reference<AsyncVar<ServerDBInfo> const> const& db,
|
||||||
TaskPriority taskID = TaskPriority::DefaultEndpoint,
|
TaskPriority taskID = TaskPriority::DefaultEndpoint,
|
||||||
LockAware = LockAware::False,
|
LockAware = LockAware::False,
|
||||||
EnableLocalityLoadBalance = EnableLocalityLoadBalance::True);
|
EnableLocalityLoadBalance = EnableLocalityLoadBalance::True);
|
||||||
ACTOR Future<Void> extractClusterInterface(Reference<AsyncVar<Optional<struct ClusterControllerFullInterface>>> a,
|
ACTOR Future<Void> extractClusterInterface(
|
||||||
Reference<AsyncVar<Optional<struct ClusterInterface>>> b);
|
Reference<AsyncVar<Optional<struct ClusterControllerFullInterface>> const> in,
|
||||||
|
Reference<AsyncVar<Optional<struct ClusterInterface>>> out);
|
||||||
|
|
||||||
ACTOR Future<Void> fdbd(Reference<ClusterConnectionFile> ccf,
|
ACTOR Future<Void> fdbd(Reference<ClusterConnectionFile> ccf,
|
||||||
LocalityData localities,
|
LocalityData localities,
|
||||||
|
@ -925,7 +926,7 @@ ACTOR Future<Void> storageServer(
|
||||||
connFile); // changes pssi->id() to be the recovered ID); // changes pssi->id() to be the recovered ID
|
connFile); // changes pssi->id() to be the recovered ID); // changes pssi->id() to be the recovered ID
|
||||||
ACTOR Future<Void> masterServer(MasterInterface mi,
|
ACTOR Future<Void> masterServer(MasterInterface mi,
|
||||||
Reference<AsyncVar<ServerDBInfo> const> db,
|
Reference<AsyncVar<ServerDBInfo> const> db,
|
||||||
Reference<AsyncVar<Optional<ClusterControllerFullInterface>>> ccInterface,
|
Reference<AsyncVar<Optional<ClusterControllerFullInterface>> const> ccInterface,
|
||||||
ServerCoordinators serverCoordinators,
|
ServerCoordinators serverCoordinators,
|
||||||
LifetimeToken lifetime,
|
LifetimeToken lifetime,
|
||||||
bool forceRecovery);
|
bool forceRecovery);
|
||||||
|
|
|
@ -1981,7 +1981,7 @@ ACTOR Future<Void> masterCore(Reference<MasterData> self) {
|
||||||
|
|
||||||
ACTOR Future<Void> masterServer(MasterInterface mi,
|
ACTOR Future<Void> masterServer(MasterInterface mi,
|
||||||
Reference<AsyncVar<ServerDBInfo> const> db,
|
Reference<AsyncVar<ServerDBInfo> const> db,
|
||||||
Reference<AsyncVar<Optional<ClusterControllerFullInterface>>> ccInterface,
|
Reference<AsyncVar<Optional<ClusterControllerFullInterface>> const> ccInterface,
|
||||||
ServerCoordinators coordinators,
|
ServerCoordinators coordinators,
|
||||||
LifetimeToken lifetime,
|
LifetimeToken lifetime,
|
||||||
bool forceRecovery) {
|
bool forceRecovery) {
|
||||||
|
|
|
@ -241,10 +241,12 @@ struct UpdateEagerReadInfo {
|
||||||
|
|
||||||
void addMutation(MutationRef const& m) {
|
void addMutation(MutationRef const& m) {
|
||||||
// SOMEDAY: Theoretically we can avoid a read if there is an earlier overlapping ClearRange
|
// SOMEDAY: Theoretically we can avoid a read if there is an earlier overlapping ClearRange
|
||||||
if (m.type == MutationRef::ClearRange && !m.param2.startsWith(systemKeys.end))
|
if (m.type == MutationRef::ClearRange && !m.param2.startsWith(systemKeys.end) &&
|
||||||
|
SERVER_KNOBS->ENABLE_CLEAR_RANGE_EAGER_READS)
|
||||||
keyBegin.push_back(m.param2);
|
keyBegin.push_back(m.param2);
|
||||||
else if (m.type == MutationRef::CompareAndClear) {
|
else if (m.type == MutationRef::CompareAndClear) {
|
||||||
keyBegin.push_back(keyAfter(m.param1, arena));
|
if (SERVER_KNOBS->ENABLE_CLEAR_RANGE_EAGER_READS)
|
||||||
|
keyBegin.push_back(keyAfter(m.param1, arena));
|
||||||
if (keys.size() > 0 && keys.back().first == m.param1) {
|
if (keys.size() > 0 && keys.back().first == m.param1) {
|
||||||
// Don't issue a second read, if the last read was equal to the current key.
|
// Don't issue a second read, if the last read was equal to the current key.
|
||||||
// CompareAndClear is likely to be used after another atomic operation on same key.
|
// CompareAndClear is likely to be used after another atomic operation on same key.
|
||||||
|
@ -260,8 +262,10 @@ struct UpdateEagerReadInfo {
|
||||||
}
|
}
|
||||||
|
|
||||||
void finishKeyBegin() {
|
void finishKeyBegin() {
|
||||||
std::sort(keyBegin.begin(), keyBegin.end());
|
if (SERVER_KNOBS->ENABLE_CLEAR_RANGE_EAGER_READS) {
|
||||||
keyBegin.resize(std::unique(keyBegin.begin(), keyBegin.end()) - keyBegin.begin());
|
std::sort(keyBegin.begin(), keyBegin.end());
|
||||||
|
keyBegin.resize(std::unique(keyBegin.begin(), keyBegin.end()) - keyBegin.begin());
|
||||||
|
}
|
||||||
std::sort(keys.begin(), keys.end(), [](const std::pair<KeyRef, int>& lhs, const std::pair<KeyRef, int>& rhs) {
|
std::sort(keys.begin(), keys.end(), [](const std::pair<KeyRef, int>& lhs, const std::pair<KeyRef, int>& rhs) {
|
||||||
return (lhs.first < rhs.first) || (lhs.first == rhs.first && lhs.second > rhs.second);
|
return (lhs.first < rhs.first) || (lhs.first == rhs.first && lhs.second > rhs.second);
|
||||||
});
|
});
|
||||||
|
@ -2384,21 +2388,22 @@ void getQueuingMetrics(StorageServer* self, StorageQueuingMetricsRequest const&
|
||||||
ACTOR Future<Void> doEagerReads(StorageServer* data, UpdateEagerReadInfo* eager) {
|
ACTOR Future<Void> doEagerReads(StorageServer* data, UpdateEagerReadInfo* eager) {
|
||||||
eager->finishKeyBegin();
|
eager->finishKeyBegin();
|
||||||
|
|
||||||
vector<Future<Key>> keyEnd(eager->keyBegin.size());
|
if (SERVER_KNOBS->ENABLE_CLEAR_RANGE_EAGER_READS) {
|
||||||
for (int i = 0; i < keyEnd.size(); i++)
|
vector<Future<Key>> keyEnd(eager->keyBegin.size());
|
||||||
keyEnd[i] = data->storage.readNextKeyInclusive(eager->keyBegin[i]);
|
for (int i = 0; i < keyEnd.size(); i++)
|
||||||
|
keyEnd[i] = data->storage.readNextKeyInclusive(eager->keyBegin[i]);
|
||||||
|
|
||||||
state Future<vector<Key>> futureKeyEnds = getAll(keyEnd);
|
state Future<vector<Key>> futureKeyEnds = getAll(keyEnd);
|
||||||
|
state vector<Key> keyEndVal = wait(futureKeyEnds);
|
||||||
|
eager->keyEnd = keyEndVal;
|
||||||
|
}
|
||||||
|
|
||||||
vector<Future<Optional<Value>>> value(eager->keys.size());
|
vector<Future<Optional<Value>>> value(eager->keys.size());
|
||||||
for (int i = 0; i < value.size(); i++)
|
for (int i = 0; i < value.size(); i++)
|
||||||
value[i] = data->storage.readValuePrefix(eager->keys[i].first, eager->keys[i].second);
|
value[i] = data->storage.readValuePrefix(eager->keys[i].first, eager->keys[i].second);
|
||||||
|
|
||||||
state Future<vector<Optional<Value>>> futureValues = getAll(value);
|
state Future<vector<Optional<Value>>> futureValues = getAll(value);
|
||||||
state vector<Key> keyEndVal = wait(futureKeyEnds);
|
|
||||||
vector<Optional<Value>> optionalValues = wait(futureValues);
|
vector<Optional<Value>> optionalValues = wait(futureValues);
|
||||||
|
|
||||||
eager->keyEnd = keyEndVal;
|
|
||||||
eager->value = optionalValues;
|
eager->value = optionalValues;
|
||||||
|
|
||||||
return Void();
|
return Void();
|
||||||
|
@ -2507,7 +2512,7 @@ bool expandMutation(MutationRef& m,
|
||||||
i = d.lastLessOrEqual(m.param2);
|
i = d.lastLessOrEqual(m.param2);
|
||||||
if (i && i->isClearTo() && i->getEndKey() >= m.param2) {
|
if (i && i->isClearTo() && i->getEndKey() >= m.param2) {
|
||||||
m.param2 = i->getEndKey();
|
m.param2 = i->getEndKey();
|
||||||
} else {
|
} else if (SERVER_KNOBS->ENABLE_CLEAR_RANGE_EAGER_READS) {
|
||||||
// Expand to the next set or clear (from storage or latestVersion), and if it
|
// Expand to the next set or clear (from storage or latestVersion), and if it
|
||||||
// is a clear, engulf it as well
|
// is a clear, engulf it as well
|
||||||
i = d.lower_bound(m.param2);
|
i = d.lower_bound(m.param2);
|
||||||
|
|
|
@ -315,7 +315,7 @@ struct CompoundWorkload : TestWorkload {
|
||||||
|
|
||||||
TestWorkload* getWorkloadIface(WorkloadRequest work,
|
TestWorkload* getWorkloadIface(WorkloadRequest work,
|
||||||
VectorRef<KeyValueRef> options,
|
VectorRef<KeyValueRef> options,
|
||||||
Reference<AsyncVar<ServerDBInfo>> dbInfo) {
|
Reference<AsyncVar<ServerDBInfo> const> dbInfo) {
|
||||||
Value testName = getOption(options, LiteralStringRef("testName"), LiteralStringRef("no-test-specified"));
|
Value testName = getOption(options, LiteralStringRef("testName"), LiteralStringRef("no-test-specified"));
|
||||||
WorkloadContext wcx;
|
WorkloadContext wcx;
|
||||||
wcx.clientId = work.clientId;
|
wcx.clientId = work.clientId;
|
||||||
|
@ -350,7 +350,7 @@ TestWorkload* getWorkloadIface(WorkloadRequest work,
|
||||||
return workload;
|
return workload;
|
||||||
}
|
}
|
||||||
|
|
||||||
TestWorkload* getWorkloadIface(WorkloadRequest work, Reference<AsyncVar<ServerDBInfo>> dbInfo) {
|
TestWorkload* getWorkloadIface(WorkloadRequest work, Reference<AsyncVar<ServerDBInfo> const> dbInfo) {
|
||||||
if (work.options.size() < 1) {
|
if (work.options.size() < 1) {
|
||||||
TraceEvent(SevError, "TestCreationError").detail("Reason", "No options provided");
|
TraceEvent(SevError, "TestCreationError").detail("Reason", "No options provided");
|
||||||
fprintf(stderr, "ERROR: No options were provided for workload.\n");
|
fprintf(stderr, "ERROR: No options were provided for workload.\n");
|
||||||
|
@ -602,7 +602,7 @@ ACTOR Future<Void> runWorkloadAsync(Database cx,
|
||||||
|
|
||||||
ACTOR Future<Void> testerServerWorkload(WorkloadRequest work,
|
ACTOR Future<Void> testerServerWorkload(WorkloadRequest work,
|
||||||
Reference<ClusterConnectionFile> ccf,
|
Reference<ClusterConnectionFile> ccf,
|
||||||
Reference<AsyncVar<struct ServerDBInfo>> dbInfo,
|
Reference<AsyncVar<struct ServerDBInfo> const> dbInfo,
|
||||||
LocalityData locality) {
|
LocalityData locality) {
|
||||||
state WorkloadInterface workIface;
|
state WorkloadInterface workIface;
|
||||||
state bool replied = false;
|
state bool replied = false;
|
||||||
|
@ -661,7 +661,7 @@ ACTOR Future<Void> testerServerWorkload(WorkloadRequest work,
|
||||||
|
|
||||||
ACTOR Future<Void> testerServerCore(TesterInterface interf,
|
ACTOR Future<Void> testerServerCore(TesterInterface interf,
|
||||||
Reference<ClusterConnectionFile> ccf,
|
Reference<ClusterConnectionFile> ccf,
|
||||||
Reference<AsyncVar<struct ServerDBInfo>> dbInfo,
|
Reference<AsyncVar<struct ServerDBInfo> const> dbInfo,
|
||||||
LocalityData locality) {
|
LocalityData locality) {
|
||||||
state PromiseStream<Future<Void>> addWorkload;
|
state PromiseStream<Future<Void>> addWorkload;
|
||||||
state Future<Void> workerFatalError = actorCollection(addWorkload.getFuture());
|
state Future<Void> workerFatalError = actorCollection(addWorkload.getFuture());
|
||||||
|
|
|
@ -520,7 +520,7 @@ ACTOR Future<Void> registrationClient(Reference<AsyncVar<Optional<ClusterControl
|
||||||
Reference<AsyncVar<bool> const> degraded,
|
Reference<AsyncVar<bool> const> degraded,
|
||||||
Reference<ClusterConnectionFile> connFile,
|
Reference<ClusterConnectionFile> connFile,
|
||||||
Reference<AsyncVar<std::set<std::string>> const> issues,
|
Reference<AsyncVar<std::set<std::string>> const> issues,
|
||||||
LocalConfiguration* localConfig) {
|
Reference<LocalConfiguration> localConfig) {
|
||||||
// Keeps the cluster controller (as it may be re-elected) informed that this worker exists
|
// Keeps the cluster controller (as it may be re-elected) informed that this worker exists
|
||||||
// The cluster controller uses waitFailureClient to find out if we die, and returns from registrationReply
|
// The cluster controller uses waitFailureClient to find out if we die, and returns from registrationReply
|
||||||
// (requiring us to re-register) The registration request piggybacks optional distributor interface if it exists.
|
// (requiring us to re-register) The registration request piggybacks optional distributor interface if it exists.
|
||||||
|
@ -604,7 +604,7 @@ ACTOR Future<Void> registrationClient(Reference<AsyncVar<Optional<ClusterControl
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns true if `address` is used in the db (indicated by `dbInfo`) transaction system and in the db's primary DC.
|
// Returns true if `address` is used in the db (indicated by `dbInfo`) transaction system and in the db's primary DC.
|
||||||
bool addressInDbAndPrimaryDc(const NetworkAddress& address, Reference<AsyncVar<ServerDBInfo>> dbInfo) {
|
bool addressInDbAndPrimaryDc(const NetworkAddress& address, Reference<AsyncVar<ServerDBInfo> const> dbInfo) {
|
||||||
const auto& dbi = dbInfo->get();
|
const auto& dbi = dbInfo->get();
|
||||||
|
|
||||||
if (dbi.master.addresses().contains(address)) {
|
if (dbi.master.addresses().contains(address)) {
|
||||||
|
@ -625,7 +625,6 @@ bool addressInDbAndPrimaryDc(const NetworkAddress& address, Reference<AsyncVar<S
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
for (const auto& grvProxy : dbi.client.grvProxies) {
|
for (const auto& grvProxy : dbi.client.grvProxies) {
|
||||||
if (grvProxy.addresses().contains(address)) {
|
if (grvProxy.addresses().contains(address)) {
|
||||||
return true;
|
return true;
|
||||||
|
@ -661,7 +660,7 @@ bool addressInDbAndPrimaryDc(const NetworkAddress& address, Reference<AsyncVar<S
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool addressesInDbAndPrimaryDc(const NetworkAddressList& addresses, Reference<AsyncVar<ServerDBInfo>> dbInfo) {
|
bool addressesInDbAndPrimaryDc(const NetworkAddressList& addresses, Reference<AsyncVar<ServerDBInfo> const> dbInfo) {
|
||||||
return addressInDbAndPrimaryDc(addresses.address, dbInfo) ||
|
return addressInDbAndPrimaryDc(addresses.address, dbInfo) ||
|
||||||
(addresses.secondaryAddress.present() && addressInDbAndPrimaryDc(addresses.secondaryAddress.get(), dbInfo));
|
(addresses.secondaryAddress.present() && addressInDbAndPrimaryDc(addresses.secondaryAddress.get(), dbInfo));
|
||||||
}
|
}
|
||||||
|
@ -707,13 +706,15 @@ TEST_CASE("/fdbserver/worker/addressInDbAndPrimaryDc") {
|
||||||
// Last, tests that proxies included in the ClientDbInfo are considered as local.
|
// Last, tests that proxies included in the ClientDbInfo are considered as local.
|
||||||
NetworkAddress grvProxyAddress(IPAddress(0x26262626), 1);
|
NetworkAddress grvProxyAddress(IPAddress(0x26262626), 1);
|
||||||
GrvProxyInterface grvProxyInterf;
|
GrvProxyInterface grvProxyInterf;
|
||||||
grvProxyInterf.getConsistentReadVersion = RequestStream<struct GetReadVersionRequest>(Endpoint({ grvProxyAddress }, UID(1, 2)));
|
grvProxyInterf.getConsistentReadVersion =
|
||||||
|
RequestStream<struct GetReadVersionRequest>(Endpoint({ grvProxyAddress }, UID(1, 2)));
|
||||||
testDbInfo.client.grvProxies.push_back(grvProxyInterf);
|
testDbInfo.client.grvProxies.push_back(grvProxyInterf);
|
||||||
ASSERT(addressInDbAndPrimaryDc(grvProxyAddress, makeReference<AsyncVar<ServerDBInfo>>(testDbInfo)));
|
ASSERT(addressInDbAndPrimaryDc(grvProxyAddress, makeReference<AsyncVar<ServerDBInfo>>(testDbInfo)));
|
||||||
|
|
||||||
NetworkAddress commitProxyAddress(IPAddress(0x37373737), 1);
|
NetworkAddress commitProxyAddress(IPAddress(0x37373737), 1);
|
||||||
CommitProxyInterface commitProxyInterf;
|
CommitProxyInterface commitProxyInterf;
|
||||||
commitProxyInterf.commit = RequestStream<struct CommitTransactionRequest>(Endpoint({ commitProxyAddress }, UID(1, 2)));
|
commitProxyInterf.commit =
|
||||||
|
RequestStream<struct CommitTransactionRequest>(Endpoint({ commitProxyAddress }, UID(1, 2)));
|
||||||
testDbInfo.client.commitProxies.push_back(commitProxyInterf);
|
testDbInfo.client.commitProxies.push_back(commitProxyInterf);
|
||||||
ASSERT(addressInDbAndPrimaryDc(commitProxyAddress, makeReference<AsyncVar<ServerDBInfo>>(testDbInfo)));
|
ASSERT(addressInDbAndPrimaryDc(commitProxyAddress, makeReference<AsyncVar<ServerDBInfo>>(testDbInfo)));
|
||||||
|
|
||||||
|
@ -723,10 +724,10 @@ TEST_CASE("/fdbserver/worker/addressInDbAndPrimaryDc") {
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
// The actor that actively monitors the health of local and peer servers, and reports anomaly to the cluster controller.
|
// The actor that actively monitors the health of local and peer servers, and reports anomaly to the cluster controller.
|
||||||
ACTOR Future<Void> healthMonitor(Reference<AsyncVar<Optional<ClusterControllerFullInterface>>> ccInterface,
|
ACTOR Future<Void> healthMonitor(Reference<AsyncVar<Optional<ClusterControllerFullInterface>> const> ccInterface,
|
||||||
WorkerInterface interf,
|
WorkerInterface interf,
|
||||||
LocalityData locality,
|
LocalityData locality,
|
||||||
Reference<AsyncVar<ServerDBInfo>> dbInfo) {
|
Reference<AsyncVar<ServerDBInfo> const> dbInfo) {
|
||||||
loop {
|
loop {
|
||||||
Future<Void> nextHealthCheckDelay = Never();
|
Future<Void> nextHealthCheckDelay = Never();
|
||||||
if (dbInfo->get().recoveryState >= RecoveryState::ACCEPTING_COMMITS &&
|
if (dbInfo->get().recoveryState >= RecoveryState::ACCEPTING_COMMITS &&
|
||||||
|
@ -959,7 +960,7 @@ ACTOR Future<Void> storageServerRollbackRebooter(std::set<std::pair<UID, KeyValu
|
||||||
UID id,
|
UID id,
|
||||||
LocalityData locality,
|
LocalityData locality,
|
||||||
bool isTss,
|
bool isTss,
|
||||||
Reference<AsyncVar<ServerDBInfo>> db,
|
Reference<AsyncVar<ServerDBInfo> const> db,
|
||||||
std::string folder,
|
std::string folder,
|
||||||
ActorCollection* filesClosed,
|
ActorCollection* filesClosed,
|
||||||
int64_t memoryLimit,
|
int64_t memoryLimit,
|
||||||
|
@ -1006,7 +1007,7 @@ ACTOR Future<Void> storageServerRollbackRebooter(std::set<std::pair<UID, KeyValu
|
||||||
ACTOR Future<Void> storageCacheRollbackRebooter(Future<Void> prevStorageCache,
|
ACTOR Future<Void> storageCacheRollbackRebooter(Future<Void> prevStorageCache,
|
||||||
UID id,
|
UID id,
|
||||||
LocalityData locality,
|
LocalityData locality,
|
||||||
Reference<AsyncVar<ServerDBInfo>> db) {
|
Reference<AsyncVar<ServerDBInfo> const> db) {
|
||||||
loop {
|
loop {
|
||||||
ErrorOr<Void> e = wait(errorOr(prevStorageCache));
|
ErrorOr<Void> e = wait(errorOr(prevStorageCache));
|
||||||
if (!e.isError()) {
|
if (!e.isError()) {
|
||||||
|
@ -1212,7 +1213,7 @@ struct SharedLogsValue {
|
||||||
};
|
};
|
||||||
|
|
||||||
ACTOR Future<Void> workerServer(Reference<ClusterConnectionFile> connFile,
|
ACTOR Future<Void> workerServer(Reference<ClusterConnectionFile> connFile,
|
||||||
Reference<AsyncVar<Optional<ClusterControllerFullInterface>>> ccInterface,
|
Reference<AsyncVar<Optional<ClusterControllerFullInterface>> const> ccInterface,
|
||||||
LocalityData locality,
|
LocalityData locality,
|
||||||
Reference<AsyncVar<ClusterControllerPriorityInfo>> asyncPriorityInfo,
|
Reference<AsyncVar<ClusterControllerPriorityInfo>> asyncPriorityInfo,
|
||||||
ProcessClass initialClass,
|
ProcessClass initialClass,
|
||||||
|
@ -1226,7 +1227,7 @@ ACTOR Future<Void> workerServer(Reference<ClusterConnectionFile> connFile,
|
||||||
std::string whitelistBinPaths,
|
std::string whitelistBinPaths,
|
||||||
Reference<AsyncVar<ServerDBInfo>> dbInfo,
|
Reference<AsyncVar<ServerDBInfo>> dbInfo,
|
||||||
ConfigDBType configDBType,
|
ConfigDBType configDBType,
|
||||||
LocalConfiguration* localConfig) {
|
Reference<LocalConfiguration> localConfig) {
|
||||||
state PromiseStream<ErrorInfo> errors;
|
state PromiseStream<ErrorInfo> errors;
|
||||||
state Reference<AsyncVar<Optional<DataDistributorInterface>>> ddInterf(
|
state Reference<AsyncVar<Optional<DataDistributorInterface>>> ddInterf(
|
||||||
new AsyncVar<Optional<DataDistributorInterface>>());
|
new AsyncVar<Optional<DataDistributorInterface>>());
|
||||||
|
@ -1912,6 +1913,7 @@ ACTOR Future<Void> workerServer(Reference<ClusterConnectionFile> connFile,
|
||||||
startRole(Role::LOG_ROUTER, recruited.id(), interf.id(), details);
|
startRole(Role::LOG_ROUTER, recruited.id(), interf.id(), details);
|
||||||
|
|
||||||
DUMPTOKEN(recruited.peekMessages);
|
DUMPTOKEN(recruited.peekMessages);
|
||||||
|
DUMPTOKEN(recruited.peekStreamMessages);
|
||||||
DUMPTOKEN(recruited.popMessages);
|
DUMPTOKEN(recruited.popMessages);
|
||||||
DUMPTOKEN(recruited.commit);
|
DUMPTOKEN(recruited.commit);
|
||||||
DUMPTOKEN(recruited.lock);
|
DUMPTOKEN(recruited.lock);
|
||||||
|
@ -2044,14 +2046,14 @@ ACTOR Future<Void> workerServer(Reference<ClusterConnectionFile> connFile,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ACTOR Future<Void> extractClusterInterface(Reference<AsyncVar<Optional<ClusterControllerFullInterface>>> a,
|
ACTOR Future<Void> extractClusterInterface(Reference<AsyncVar<Optional<ClusterControllerFullInterface>> const> in,
|
||||||
Reference<AsyncVar<Optional<ClusterInterface>>> b) {
|
Reference<AsyncVar<Optional<ClusterInterface>>> out) {
|
||||||
loop {
|
loop {
|
||||||
if (a->get().present())
|
if (in->get().present())
|
||||||
b->set(a->get().get().clientInterface);
|
out->set(in->get().get().clientInterface);
|
||||||
else
|
else
|
||||||
b->set(Optional<ClusterInterface>());
|
out->set(Optional<ClusterInterface>());
|
||||||
wait(a->onChange());
|
wait(in->onChange());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2086,7 +2088,7 @@ ACTOR Future<Void> printTimeout() {
|
||||||
return Void();
|
return Void();
|
||||||
}
|
}
|
||||||
|
|
||||||
ACTOR Future<Void> printOnFirstConnected(Reference<AsyncVar<Optional<ClusterInterface>>> ci) {
|
ACTOR Future<Void> printOnFirstConnected(Reference<AsyncVar<Optional<ClusterInterface>> const> ci) {
|
||||||
state Future<Void> timeoutFuture = printTimeout();
|
state Future<Void> timeoutFuture = printTimeout();
|
||||||
loop {
|
loop {
|
||||||
choose {
|
choose {
|
||||||
|
@ -2372,14 +2374,15 @@ ACTOR Future<Void> fdbd(Reference<ClusterConnectionFile> connFile,
|
||||||
ConfigDBType configDBType) {
|
ConfigDBType configDBType) {
|
||||||
state vector<Future<Void>> actors;
|
state vector<Future<Void>> actors;
|
||||||
state Promise<Void> recoveredDiskFiles;
|
state Promise<Void> recoveredDiskFiles;
|
||||||
state LocalConfiguration localConfig(dataFolder, configPath, manualKnobOverrides);
|
state Reference<LocalConfiguration> localConfig =
|
||||||
|
makeReference<LocalConfiguration>(dataFolder, configPath, manualKnobOverrides);
|
||||||
// setupStackSignal();
|
// setupStackSignal();
|
||||||
getCurrentLineage()->modify(&RoleLineage::role) = ProcessClass::Worker;
|
getCurrentLineage()->modify(&RoleLineage::role) = ProcessClass::Worker;
|
||||||
|
|
||||||
// FIXME: Initializing here causes simulation issues, these must be fixed
|
// FIXME: Initializing here causes simulation issues, these must be fixed
|
||||||
/*
|
/*
|
||||||
if (configDBType != ConfigDBType::DISABLED) {
|
if (configDBType != ConfigDBType::DISABLED) {
|
||||||
wait(localConfig.initialize());
|
wait(localConfig->initialize());
|
||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
@ -2450,7 +2453,7 @@ ACTOR Future<Void> fdbd(Reference<ClusterConnectionFile> connFile,
|
||||||
whitelistBinPaths,
|
whitelistBinPaths,
|
||||||
dbInfo,
|
dbInfo,
|
||||||
configDBType,
|
configDBType,
|
||||||
&localConfig),
|
localConfig),
|
||||||
"WorkerServer",
|
"WorkerServer",
|
||||||
UID(),
|
UID(),
|
||||||
&normalWorkerErrors()));
|
&normalWorkerErrors()));
|
||||||
|
|
|
@ -84,6 +84,8 @@ struct KillRegionWorkload : TestWorkload {
|
||||||
TraceEvent("ForceRecovery_Wait").log();
|
TraceEvent("ForceRecovery_Wait").log();
|
||||||
wait(delay(deterministicRandom()->random01() * self->testDuration));
|
wait(delay(deterministicRandom()->random01() * self->testDuration));
|
||||||
|
|
||||||
|
// FIXME: killDataCenter breaks simulation if forceKill=false, since some processes can survive and
|
||||||
|
// partially complete a recovery
|
||||||
g_simulator.killDataCenter(LiteralStringRef("0"),
|
g_simulator.killDataCenter(LiteralStringRef("0"),
|
||||||
deterministicRandom()->random01() < 0.5 ? ISimulator::KillInstantly
|
deterministicRandom()->random01() < 0.5 ? ISimulator::KillInstantly
|
||||||
: ISimulator::RebootAndDelete,
|
: ISimulator::RebootAndDelete,
|
||||||
|
|
|
@ -224,7 +224,7 @@ struct ReadWriteWorkload : KVWorkload {
|
||||||
Future<Void> setup(Database const& cx) override { return _setup(cx, this); }
|
Future<Void> setup(Database const& cx) override { return _setup(cx, this); }
|
||||||
Future<Void> start(Database const& cx) override { return _start(cx, this); }
|
Future<Void> start(Database const& cx) override { return _start(cx, this); }
|
||||||
|
|
||||||
ACTOR static Future<bool> traceDumpWorkers(Reference<AsyncVar<ServerDBInfo>> db) {
|
ACTOR static Future<bool> traceDumpWorkers(Reference<AsyncVar<ServerDBInfo> const> db) {
|
||||||
try {
|
try {
|
||||||
loop {
|
loop {
|
||||||
choose {
|
choose {
|
||||||
|
|
|
@ -19,7 +19,7 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "fdbclient/NativeAPI.actor.h"
|
#include "fdbclient/NativeAPI.actor.h"
|
||||||
#include "fdbclient/TagThrottle.h"
|
#include "fdbclient/TagThrottle.actor.h"
|
||||||
#include "fdbserver/Knobs.h"
|
#include "fdbserver/Knobs.h"
|
||||||
#include "fdbserver/TesterInterface.actor.h"
|
#include "fdbserver/TesterInterface.actor.h"
|
||||||
#include "fdbserver/workloads/workloads.actor.h"
|
#include "fdbserver/workloads/workloads.actor.h"
|
||||||
|
@ -83,7 +83,7 @@ struct TagThrottleApiWorkload : TestWorkload {
|
||||||
tagSet.addTag(tag);
|
tagSet.addTag(tag);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
wait(ThrottleApi::throttleTags(cx,
|
wait(ThrottleApi::throttleTags(cx.getReference(),
|
||||||
tagSet,
|
tagSet,
|
||||||
rate,
|
rate,
|
||||||
duration,
|
duration,
|
||||||
|
@ -137,7 +137,7 @@ struct TagThrottleApiWorkload : TestWorkload {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool removed = wait(ThrottleApi::unthrottleTags(cx, tagSet, throttleType, priority));
|
bool removed = wait(ThrottleApi::unthrottleTags(cx.getReference(), tagSet, throttleType, priority));
|
||||||
if (removed) {
|
if (removed) {
|
||||||
ASSERT(erased || !throttleType.present() || throttleType.get() == TagThrottleType::AUTO);
|
ASSERT(erased || !throttleType.present() || throttleType.get() == TagThrottleType::AUTO);
|
||||||
} else {
|
} else {
|
||||||
|
@ -151,7 +151,9 @@ struct TagThrottleApiWorkload : TestWorkload {
|
||||||
TagThrottleApiWorkload* self,
|
TagThrottleApiWorkload* self,
|
||||||
Database cx,
|
Database cx,
|
||||||
std::map<std::pair<TransactionTag, TransactionPriority>, TagThrottleInfo> const* manuallyThrottledTags) {
|
std::map<std::pair<TransactionTag, TransactionPriority>, TagThrottleInfo> const* manuallyThrottledTags) {
|
||||||
std::vector<TagThrottleInfo> tags = wait(ThrottleApi::getThrottledTags(cx, CLIENT_KNOBS->TOO_MANY));
|
|
||||||
|
std::vector<TagThrottleInfo> tags =
|
||||||
|
wait(ThrottleApi::getThrottledTags(cx.getReference(), CLIENT_KNOBS->TOO_MANY));
|
||||||
|
|
||||||
int manualThrottledTags = 0;
|
int manualThrottledTags = 0;
|
||||||
int activeAutoThrottledTags = 0;
|
int activeAutoThrottledTags = 0;
|
||||||
|
@ -184,7 +186,8 @@ struct TagThrottleApiWorkload : TestWorkload {
|
||||||
}
|
}
|
||||||
|
|
||||||
ACTOR Future<Void> getRecommendedTags(TagThrottleApiWorkload* self, Database cx) {
|
ACTOR Future<Void> getRecommendedTags(TagThrottleApiWorkload* self, Database cx) {
|
||||||
std::vector<TagThrottleInfo> tags = wait(ThrottleApi::getRecommendedTags(cx, CLIENT_KNOBS->TOO_MANY));
|
std::vector<TagThrottleInfo> tags =
|
||||||
|
wait(ThrottleApi::getRecommendedTags(cx.getReference(), CLIENT_KNOBS->TOO_MANY));
|
||||||
|
|
||||||
for (auto& tag : tags) {
|
for (auto& tag : tags) {
|
||||||
ASSERT(tag.throttleType == TagThrottleType::AUTO);
|
ASSERT(tag.throttleType == TagThrottleType::AUTO);
|
||||||
|
@ -200,7 +203,7 @@ struct TagThrottleApiWorkload : TestWorkload {
|
||||||
deterministicRandom()->coinflip() ? Optional<TransactionPriority>()
|
deterministicRandom()->coinflip() ? Optional<TransactionPriority>()
|
||||||
: deterministicRandom()->randomChoice(allTransactionPriorities);
|
: deterministicRandom()->randomChoice(allTransactionPriorities);
|
||||||
|
|
||||||
bool unthrottled = wait(ThrottleApi::unthrottleAll(cx, throttleType, priority));
|
bool unthrottled = wait(ThrottleApi::unthrottleAll(cx.getReference(), throttleType, priority));
|
||||||
if (!throttleType.present() || throttleType.get() == TagThrottleType::MANUAL) {
|
if (!throttleType.present() || throttleType.get() == TagThrottleType::MANUAL) {
|
||||||
bool unthrottleExpected = false;
|
bool unthrottleExpected = false;
|
||||||
bool empty = manuallyThrottledTags->empty();
|
bool empty = manuallyThrottledTags->empty();
|
||||||
|
@ -227,15 +230,16 @@ struct TagThrottleApiWorkload : TestWorkload {
|
||||||
}
|
}
|
||||||
|
|
||||||
ACTOR Future<Void> enableAutoThrottling(TagThrottleApiWorkload* self, Database cx) {
|
ACTOR Future<Void> enableAutoThrottling(TagThrottleApiWorkload* self, Database cx) {
|
||||||
|
state Reference<DatabaseContext> db = cx.getReference();
|
||||||
if (deterministicRandom()->coinflip()) {
|
if (deterministicRandom()->coinflip()) {
|
||||||
wait(ThrottleApi::enableAuto(cx, true));
|
wait(ThrottleApi::enableAuto(db, true));
|
||||||
self->autoThrottleEnabled = true;
|
self->autoThrottleEnabled = true;
|
||||||
if (deterministicRandom()->coinflip()) {
|
if (deterministicRandom()->coinflip()) {
|
||||||
bool unthrottled =
|
bool unthrottled =
|
||||||
wait(ThrottleApi::unthrottleAll(cx, TagThrottleType::AUTO, Optional<TransactionPriority>()));
|
wait(ThrottleApi::unthrottleAll(db, TagThrottleType::AUTO, Optional<TransactionPriority>()));
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
wait(ThrottleApi::enableAuto(cx, false));
|
wait(ThrottleApi::enableAuto(db, false));
|
||||||
self->autoThrottleEnabled = false;
|
self->autoThrottleEnabled = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -23,7 +23,7 @@
|
||||||
#include "fdbserver/workloads/BulkSetup.actor.h"
|
#include "fdbserver/workloads/BulkSetup.actor.h"
|
||||||
#include "fdbserver/WorkerInterface.actor.h"
|
#include "fdbserver/WorkerInterface.actor.h"
|
||||||
#include "fdbclient/NativeAPI.actor.h"
|
#include "fdbclient/NativeAPI.actor.h"
|
||||||
#include "fdbclient/TagThrottle.h"
|
#include "fdbclient/TagThrottle.actor.h"
|
||||||
#include "flow/actorcompiler.h" // This must be the last #include.
|
#include "flow/actorcompiler.h" // This must be the last #include.
|
||||||
|
|
||||||
constexpr int SAMPLE_SIZE = 10000;
|
constexpr int SAMPLE_SIZE = 10000;
|
||||||
|
@ -100,7 +100,7 @@ struct WriteTagThrottlingWorkload : KVWorkload {
|
||||||
wait(bulkSetup(cx, self, self->keyCount, Promise<double>()));
|
wait(bulkSetup(cx, self, self->keyCount, Promise<double>()));
|
||||||
}
|
}
|
||||||
if (self->clientId == 0) {
|
if (self->clientId == 0) {
|
||||||
wait(ThrottleApi::enableAuto(cx, true));
|
wait(ThrottleApi::enableAuto(cx.getReference(), true));
|
||||||
}
|
}
|
||||||
return Void();
|
return Void();
|
||||||
}
|
}
|
||||||
|
@ -306,9 +306,10 @@ struct WriteTagThrottlingWorkload : KVWorkload {
|
||||||
}
|
}
|
||||||
ACTOR static Future<Void> throttledTagUpdater(Database cx, WriteTagThrottlingWorkload* self) {
|
ACTOR static Future<Void> throttledTagUpdater(Database cx, WriteTagThrottlingWorkload* self) {
|
||||||
state std::vector<TagThrottleInfo> tags;
|
state std::vector<TagThrottleInfo> tags;
|
||||||
|
state Reference<DatabaseContext> db = cx.getReference();
|
||||||
loop {
|
loop {
|
||||||
wait(delay(1.0));
|
wait(delay(1.0));
|
||||||
wait(store(tags, ThrottleApi::getThrottledTags(cx, CLIENT_KNOBS->TOO_MANY, true)));
|
wait(store(tags, ThrottleApi::getThrottledTags(db, CLIENT_KNOBS->TOO_MANY, true)));
|
||||||
self->recordThrottledTags(tags);
|
self->recordThrottledTags(tags);
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
|
@ -49,7 +49,7 @@ struct WorkloadContext {
|
||||||
Standalone<VectorRef<KeyValueRef>> options;
|
Standalone<VectorRef<KeyValueRef>> options;
|
||||||
int clientId, clientCount;
|
int clientId, clientCount;
|
||||||
int64_t sharedRandomNumber;
|
int64_t sharedRandomNumber;
|
||||||
Reference<AsyncVar<struct ServerDBInfo>> dbInfo;
|
Reference<AsyncVar<struct ServerDBInfo> const> dbInfo;
|
||||||
|
|
||||||
WorkloadContext();
|
WorkloadContext();
|
||||||
WorkloadContext(const WorkloadContext&);
|
WorkloadContext(const WorkloadContext&);
|
||||||
|
|
|
@ -627,6 +627,13 @@ Future<T> safeThreadFutureToFuture(ThreadFuture<T> threadFuture) {
|
||||||
return threadFuture.get();
|
return threadFuture.get();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// do nothing, just for template functions' calls
|
||||||
|
template <class T>
|
||||||
|
Future<T> safeThreadFutureToFuture(Future<T> future) {
|
||||||
|
// do nothing
|
||||||
|
return future;
|
||||||
|
}
|
||||||
|
|
||||||
// Helper actor. Do not use directly!
|
// Helper actor. Do not use directly!
|
||||||
namespace internal_thread_helper {
|
namespace internal_thread_helper {
|
||||||
|
|
||||||
|
|
|
@ -599,4 +599,5 @@ extern TraceBatch g_traceBatch;
|
||||||
#define DUMPTOKEN(name) \
|
#define DUMPTOKEN(name) \
|
||||||
TraceEvent("DumpToken", recruited.id()).detail("Name", #name).detail("Token", name.getEndpoint().token)
|
TraceEvent("DumpToken", recruited.id()).detail("Name", #name).detail("Token", name.getEndpoint().token)
|
||||||
|
|
||||||
|
#define DisabledTraceEvent(...) false && TraceEvent()
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -75,10 +75,10 @@ Performance issues:
|
||||||
- When waiting for a number of things, wait a little extra time to get
|
- When waiting for a number of things, wait a little extra time to get
|
||||||
the stragglers. (See the SmartQuorum() generic actor)
|
the stragglers. (See the SmartQuorum() generic actor)
|
||||||
|
|
||||||
- If asking another asynch server to do units of work, don't queue up more
|
- If asking another asynchronous server to do units of work, don't queue up more
|
||||||
work than is necessary to keep the server busy. Likewise, if you are
|
work than is necessary to keep the server busy. Likewise, if you are
|
||||||
busy, let your own work queue fill up to signal your requestor
|
busy, let your own work queue fill up to signal your requester
|
||||||
that you are blocked. Also do this personally with managers assigning
|
that you are blocked. Also do this personally with managers assigning
|
||||||
you stuff.
|
you stuff.
|
||||||
|
|
||||||
- Pass all variables as "const &" if thier size is greater than 8 bytes.
|
- Pass all variables as "const &" if their size is greater than 8 bytes.
|
||||||
|
|
|
@ -150,13 +150,13 @@
|
||||||
</Component>
|
</Component>
|
||||||
|
|
||||||
<Component Id='FDBCRegistryValue' Guid='{6ED940F3-75C8-4385-97D9-D7D0F211B17D}' Win64='yes'>
|
<Component Id='FDBCRegistryValue' Guid='{6ED940F3-75C8-4385-97D9-D7D0F211B17D}' Win64='yes'>
|
||||||
<RegistryKey Root='HKLM' Key='SOFTWARE\$(var.Manufacturer)\KeyValue\Client' Action='createAndRemoveOnUninstall'>
|
<RegistryKey Root='HKLM' Key='SOFTWARE\$(var.Manufacturer)\KeyValue\Client'>
|
||||||
<RegistryValue Name='Version' Type='string' Value='$(var.Version)' KeyPath='yes' />
|
<RegistryValue Name='Version' Type='string' Value='$(var.Version)' KeyPath='yes' />
|
||||||
</RegistryKey>
|
</RegistryKey>
|
||||||
</Component>
|
</Component>
|
||||||
|
|
||||||
<Component Id='FDBSRegistryValue' Guid='{361A9B4A-A06F-4BFB-AFEA-B5F733C8BFDF}' Win64='yes'>
|
<Component Id='FDBSRegistryValue' Guid='{361A9B4A-A06F-4BFB-AFEA-B5F733C8BFDF}' Win64='yes'>
|
||||||
<RegistryKey Root='HKLM' Key='SOFTWARE\$(var.Manufacturer)\KeyValue\Server' Action='createAndRemoveOnUninstall'>
|
<RegistryKey Root='HKLM' Key='SOFTWARE\$(var.Manufacturer)\KeyValue\Server'>
|
||||||
<RegistryValue Name='Version' Type='string' Value='$(var.Version)' KeyPath='yes' />
|
<RegistryValue Name='Version' Type='string' Value='$(var.Version)' KeyPath='yes' />
|
||||||
</RegistryKey>
|
</RegistryKey>
|
||||||
</Component>
|
</Component>
|
||||||
|
|
|
@ -109,6 +109,7 @@ if(WITH_PYTHON)
|
||||||
add_fdb_test(TEST_FILES pt.TXT IGNORE)
|
add_fdb_test(TEST_FILES pt.TXT IGNORE)
|
||||||
add_fdb_test(TEST_FILES randomSelector.txt IGNORE)
|
add_fdb_test(TEST_FILES randomSelector.txt IGNORE)
|
||||||
add_fdb_test(TEST_FILES selectorCorrectness.txt IGNORE)
|
add_fdb_test(TEST_FILES selectorCorrectness.txt IGNORE)
|
||||||
|
add_fdb_test(TEST_FILES IThreadPool.txt IGNORE)
|
||||||
add_fdb_test(TEST_FILES fast/AtomicBackupCorrectness.toml)
|
add_fdb_test(TEST_FILES fast/AtomicBackupCorrectness.toml)
|
||||||
add_fdb_test(TEST_FILES fast/AtomicBackupToDBCorrectness.toml)
|
add_fdb_test(TEST_FILES fast/AtomicBackupToDBCorrectness.toml)
|
||||||
add_fdb_test(TEST_FILES fast/AtomicOps.toml)
|
add_fdb_test(TEST_FILES fast/AtomicOps.toml)
|
||||||
|
|
Loading…
Reference in New Issue