Merge commit 'b17c8359ec22892ed4daeaa569f2f5e105477251' into feature-redwood

# Conflicts:
#	flow/Trace.cpp
This commit is contained in:
Stephen Atherton 2018-06-30 23:18:29 -07:00
commit b95a2bd6c1
81 changed files with 2429 additions and 1449 deletions

View File

@ -136,7 +136,7 @@ static std::pair<std::string, std::string> splitPair(std::string const& input, c
static int abbrevToNID(std::string const& sn) {
int nid = NID_undef;
if (sn == "C" || sn == "CN" || sn == "L" || sn == "ST" || sn == "O" || sn == "OU")
if (sn == "C" || sn == "CN" || sn == "L" || sn == "ST" || sn == "O" || sn == "OU" || sn == "UID" || sn == "DC")
nid = OBJ_sn2nid(sn.c_str());
if (nid == NID_undef)
throw std::runtime_error("abbrevToNID");

View File

@ -154,7 +154,7 @@ clean: $(CLEAN_TARGETS) docpreview_clean
@rm -rf $(DEPSDIR)
@rm -rf lib/
@rm -rf bin/coverage.*.xml
@rm -f */*.g.cpp */*/*/g.cpp */*.g.h */*/*.g.h
@find . -name "*.g.cpp" -exec rm -f {} \; -or -name "*.g.h" -exec rm -f {} \;
targets:
@echo "Available targets:"

View File

@ -28,7 +28,7 @@ from bindingtester import util
from bindingtester.tests import Test, Instruction, InstructionSet, ResultSpecification
from bindingtester.tests import test_util, directory_util
from bindingtester.tests.directory_util import DirListEntry
from bindingtester.tests.directory_state_tree import DirectoryStateTreeNode
fdb.api_version(FDB_API_VERSION)
@ -48,12 +48,12 @@ class DirectoryTest(Test):
def ensure_default_directory_subspace(self, instructions, path):
directory_util.create_default_directory_subspace(instructions, path, self.random)
child = self.root.add_child((path,), path, self.root, DirListEntry(True, True))
child = self.root.add_child(path, DirectoryStateTreeNode(True, True, has_known_prefix=True))
self.dir_list.append(child)
self.dir_index = directory_util.DEFAULT_DIRECTORY_INDEX
def generate_layer(self):
if random.random < 0.7:
if random.random() < 0.7:
return ''
else:
choice = random.randint(0, 3)
@ -114,29 +114,34 @@ class DirectoryTest(Test):
instructions.push_args(layer)
instructions.push_args(*test_util.with_length(path))
instructions.append('DIRECTORY_OPEN')
# print '%d. Selected %s, dir=%s, has_known_prefix=%s, dir_list_len=%d' \
# % (len(instructions), 'DIRECTORY_OPEN', repr(self.dir_index), False, len(self.dir_list))
self.dir_list.append(self.dir_list[0].add_child(path, default_path, self.root, DirListEntry(True, True, has_known_prefix=False)))
self.dir_list.append(self.root.add_child(path, DirectoryStateTreeNode(True, True, has_known_prefix=False)))
# print('%d. Selected %s, dir=%s, dir_id=%s, has_known_prefix=%s, dir_list_len=%d' \
# % (len(instructions), 'DIRECTORY_OPEN', repr(self.dir_index), self.dir_list[-1].dir_id, False, len(self.dir_list)-1))
instructions.setup_complete()
for i in range(args.num_ops):
if random.random() < 0.5:
self.dir_index = random.randrange(0, len(self.dir_list))
while True:
self.dir_index = random.randrange(0, len(self.dir_list))
if not self.dir_list[self.dir_index].state.is_partition or not self.dir_list[self.dir_index].state.deleted:
break
instructions.push_args(self.dir_index)
instructions.append('DIRECTORY_CHANGE')
dir_entry = self.dir_list[self.dir_index]
choices = op_choices[:]
if self.dir_list[self.dir_index].is_directory:
if dir_entry.state.is_directory:
choices += directory
if self.dir_list[self.dir_index].is_subspace:
if dir_entry.state.is_subspace:
choices += subspace
op = random.choice(choices)
dir_entry = self.dir_list[self.dir_index]
# print '%d. Selected %s, dir=%s, has_known_prefix=%s, dir_list_len=%d' \
# % (len(instructions), op, repr(self.dir_index), repr(dir_entry.has_known_prefix), len(self.dir_list))
# print('%d. Selected %s, dir=%d, dir_id=%d, has_known_prefix=%d, dir_list_len=%d' \
# % (len(instructions), op, self.dir_index, dir_entry.dir_id, dir_entry.state.has_known_prefix, len(self.dir_list)))
if op.endswith('_DATABASE') or op.endswith('_SNAPSHOT'):
root_op = op[0:-9]
@ -151,24 +156,26 @@ class DirectoryTest(Test):
elif root_op == 'DIRECTORY_CREATE_SUBSPACE':
path = generate_path()
instructions.push_args(generate_prefix(allow_empty=False, is_partition=True))
instructions.push_args(generate_prefix(require_unique=False, is_partition=True))
instructions.push_args(*test_util.with_length(path))
instructions.append(op)
self.dir_list.append(DirListEntry(False, True))
self.dir_list.append(DirectoryStateTreeNode(False, True, has_known_prefix=True))
elif root_op == 'DIRECTORY_CREATE_LAYER':
indices = []
prefixes = [generate_prefix(require_unique=args.concurrency==1, is_partition=True) for i in range(2)]
for i in range(2):
instructions.push_args(generate_prefix(allow_empty=False, is_partition=True))
instructions.push_args(prefixes[i])
instructions.push_args(*test_util.with_length(generate_path()))
instructions.append('DIRECTORY_CREATE_SUBSPACE')
indices.append(len(self.dir_list))
self.dir_list.append(DirListEntry(False, True))
self.dir_list.append(DirectoryStateTreeNode(False, True, has_known_prefix=True))
instructions.push_args(random.choice([0, 1]))
instructions.push_args(*indices)
instructions.append(op)
self.dir_list.append(DirListEntry(True, False, False))
self.dir_list.append(DirectoryStateTreeNode.get_layer(prefixes[0]))
elif root_op == 'DIRECTORY_CREATE_OR_OPEN':
# Because allocated prefixes are non-deterministic, we cannot have overlapping
@ -183,14 +190,18 @@ class DirectoryTest(Test):
if not op.endswith('_DATABASE') and args.concurrency == 1:
test_util.blocking_commit(instructions)
self.dir_list.append(dir_entry.add_child(path, default_path, self.root, DirListEntry(True, True, False)))
child_entry = dir_entry.get_descendent(path)
if child_entry is None:
child_entry = DirectoryStateTreeNode(True, True)
child_entry.state.has_known_prefix = False
self.dir_list.append(dir_entry.add_child(path, child_entry))
elif root_op == 'DIRECTORY_CREATE':
layer = self.generate_layer()
is_partition = layer == 'partition'
allow_empty_prefix = random.random() < 0.8
prefix = generate_prefix(allow_empty=allow_empty_prefix, is_partition=is_partition)
prefix = generate_prefix(require_unique=is_partition and args.concurrency==1, is_partition=is_partition, min_length=0)
# Because allocated prefixes are non-deterministic, we cannot have overlapping
# transactions that allocate/remove these prefixes in a comparison test
@ -209,40 +220,59 @@ class DirectoryTest(Test):
if not op.endswith('_DATABASE') and args.concurrency == 1: # and allow_empty_prefix:
test_util.blocking_commit(instructions)
self.dir_list.append(dir_entry.add_child(path, default_path, self.root, DirListEntry(True, True, bool(prefix))))
child_entry = dir_entry.get_descendent(path)
if child_entry is None:
child_entry = DirectoryStateTreeNode(True, True, has_known_prefix=bool(prefix))
elif not bool(prefix):
child_entry.state.has_known_prefix = False
if is_partition:
child_entry.state.is_partition = True
self.dir_list.append(dir_entry.add_child(path, child_entry))
elif root_op == 'DIRECTORY_OPEN':
path = generate_path()
instructions.push_args(self.generate_layer())
instructions.push_args(*test_util.with_length(path))
instructions.append(op)
self.dir_list.append(dir_entry.add_child(path, default_path, self.root, DirListEntry(True, True)))
child_entry = dir_entry.get_descendent(path)
if child_entry is None:
self.dir_list.append(DirectoryStateTreeNode(False, False, has_known_prefix=False))
else:
self.dir_list.append(dir_entry.add_child(path, child_entry))
elif root_op == 'DIRECTORY_MOVE':
old_path = generate_path()
new_path = generate_path()
instructions.push_args(*(test_util.with_length(old_path) + test_util.with_length(new_path)))
instructions.append(op)
# This could probably be made to sometimes set has_known_prefix to true
self.dir_list.append(dir_entry.add_child(new_path, default_path, self.root, DirListEntry(True, True, False)))
child_entry = dir_entry.get_descendent(old_path)
if child_entry is None:
self.dir_list.append(DirectoryStateTreeNode(False, False, has_known_prefix=False))
else:
self.dir_list.append(dir_entry.add_child(new_path, child_entry))
# Make sure that the default directory subspace still exists after moving the specified directory
if dir_entry.is_directory and not dir_entry.is_subspace and old_path == (u'',):
if dir_entry.state.is_directory and not dir_entry.state.is_subspace and old_path == (u'',):
self.ensure_default_directory_subspace(instructions, default_path)
elif root_op == 'DIRECTORY_MOVE_TO':
new_path = generate_path()
instructions.push_args(*test_util.with_length(new_path))
instructions.append(op)
self.dir_list.append(dir_entry.root.add_child(new_path, default_path, self.root,
DirListEntry(True, True, dir_entry.has_known_prefix)))
child_entry = dir_entry.get_descendent(())
if child_entry is None:
self.dir_list.append(DirectoryStateTreeNode(False, False, has_known_prefix=False))
else:
self.dir_list.append(dir_entry.add_child(new_path, child_entry))
# Make sure that the default directory subspace still exists after moving the current directory
self.ensure_default_directory_subspace(instructions, default_path)
# FIXME: There is currently a problem with removing partitions. In these generated tests, it's possible
# for a removed partition to resurrect itself and insert keys into the database using its allocated
# prefix. The result is non-deterministic HCA errors.
elif root_op == 'DIRECTORY_REMOVE' or root_op == 'DIRECTORY_REMOVE_IF_EXISTS':
# Because allocated prefixes are non-deterministic, we cannot have overlapping
# transactions that allocate/remove these prefixes in a comparison test
@ -254,12 +284,14 @@ class DirectoryTest(Test):
if count == 1:
path = generate_path()
instructions.push_args(*test_util.with_length(path))
instructions.push_args(count)
instructions.push_args(count)
instructions.append(op)
dir_entry.delete(path)
# Make sure that the default directory subspace still exists after removing the specified directory
if path == () or (dir_entry.is_directory and not dir_entry.is_subspace and path == (u'',)):
if path == () or (dir_entry.state.is_directory and not dir_entry.state.is_subspace and path == (u'',)):
self.ensure_default_directory_subspace(instructions, default_path)
elif root_op == 'DIRECTORY_LIST' or root_op == 'DIRECTORY_EXISTS':
@ -278,7 +310,7 @@ class DirectoryTest(Test):
instructions.append('DIRECTORY_STRIP_PREFIX')
elif root_op == 'DIRECTORY_UNPACK_KEY' or root_op == 'DIRECTORY_CONTAINS':
if not dir_entry.has_known_prefix or random.random() < 0.2 or root_op == 'DIRECTORY_UNPACK_KEY':
if not dir_entry.state.has_known_prefix or random.random() < 0.2 or root_op == 'DIRECTORY_UNPACK_KEY':
t = self.random.random_tuple(5)
instructions.push_args(*test_util.with_length(t))
instructions.append('DIRECTORY_PACK_KEY')
@ -292,7 +324,7 @@ class DirectoryTest(Test):
instructions.push_args(*test_util.with_length(t))
instructions.append(op)
if root_op == 'DIRECTORY_OPEN_SUBSPACE':
self.dir_list.append(DirListEntry(False, True, dir_entry.has_known_prefix))
self.dir_list.append(DirectoryStateTreeNode(False, True, dir_entry.state.has_known_prefix))
else:
test_util.to_front(instructions, 1)
instructions.append('DIRECTORY_STRIP_PREFIX')
@ -308,16 +340,18 @@ class DirectoryTest(Test):
for i, dir_entry in enumerate(self.dir_list):
instructions.push_args(i)
instructions.append('DIRECTORY_CHANGE')
if dir_entry.is_directory:
if dir_entry.state.is_directory:
instructions.push_args(self.directory_log.key())
instructions.append('DIRECTORY_LOG_DIRECTORY')
if dir_entry.has_known_prefix and dir_entry.is_subspace:
# print '%d. Logging subspace: %d' % (i, dir_entry.dir_id)
if dir_entry.state.has_known_prefix and dir_entry.state.is_subspace:
# print('%d. Logging subspace: %d' % (i, dir_entry.dir_id))
instructions.push_args(self.subspace_log.key())
instructions.append('DIRECTORY_LOG_SUBSPACE')
if (i + 1) % 100 == 0:
test_util.blocking_commit(instructions)
test_util.blocking_commit(instructions)
instructions.push_args(self.stack_subspace.key())
instructions.append('LOG_STACK')
@ -365,11 +399,15 @@ def generate_path(min_length=0):
return path
def generate_prefix(allow_empty=True, is_partition=False):
if allow_empty and random.random() < 0.8:
def generate_prefix(require_unique=False, is_partition=False, min_length=1):
fixed_prefix = 'abcdefg'
if not require_unique and min_length == 0 and random.random() < 0.8:
return None
elif is_partition or random.random() < 0.5:
length = random.randint(0 if allow_empty else 1, 5)
elif require_unique or is_partition or min_length > len(fixed_prefix) or random.random() < 0.5:
if require_unique:
min_length = max(min_length, 16)
length = random.randint(min_length, min_length+5)
if length == 0:
return ''
@ -379,6 +417,6 @@ def generate_prefix(allow_empty=True, is_partition=False):
else:
return ''.join(chr(random.randrange(ord('\x02'), ord('\x14'))) for i in range(0, length))
else:
prefix = 'abcdefg'
generated = prefix[0:random.randrange(0 if allow_empty else 1, len(prefix))]
prefix = fixed_prefix
generated = prefix[0:random.randrange(min_length, len(prefix))]
return generated

View File

@ -0,0 +1,259 @@
import sys
class TreeNodeState:
def __init__(self, node, dir_id, is_directory, is_subspace, has_known_prefix, root, is_partition):
self.dir_id = dir_id
self.is_directory = is_directory
self.is_subspace = is_subspace
self.has_known_prefix = has_known_prefix
self.root = root
self.is_partition = is_partition
self.parents = { node }
self.children = {}
self.deleted = False
# Represents an element of the directory hierarchy. As a result of various operations (e.g. moves) that
# may or may not have succeeded, a node can represent multiple possible states.
class DirectoryStateTreeNode:
# A cache of directory layers. We mustn't have multiple entries for the same layer
layers = {}
# Because our operations may be applied to the default directory in the case that
# the current directory failed to open/create, we compute the result of each operation
# as if it was performed on the current directory and the default directory.
default_directory = None
# Used for debugging
dir_id = 0
@classmethod
def reset(cls):
cls.dir_id = 0
cls.layers = {}
cls.default_directory = None
@classmethod
def set_default_directory(cls, default_directory):
cls.default_directory = default_directory
@classmethod
def get_layer(cls, node_subspace_prefix):
if node_subspace_prefix not in DirectoryStateTreeNode.layers:
DirectoryStateTreeNode.layers[node_subspace_prefix] = DirectoryStateTreeNode(True, False, has_known_prefix=False)
return DirectoryStateTreeNode.layers[node_subspace_prefix]
def __init__(self, is_directory, is_subspace, has_known_prefix=True, root=None, is_partition=False):
self.state = TreeNodeState(self, DirectoryStateTreeNode.dir_id + 1, is_directory, is_subspace, has_known_prefix,
root or self, is_partition)
DirectoryStateTreeNode.dir_id += 1
def __repr__(self):
return '{DirEntry %d: %d}' % (self.state.dir_id, self.state.has_known_prefix)
def _get_descendent(self, subpath, default):
if not subpath:
if default is not None:
self._merge(default)
return self
default_child = None
if default is not None:
default_child = default.state.children.get(subpath[0])
self_child = self.state.children.get(subpath[0])
if self_child is None:
if default_child is None:
return None
else:
return default_child._get_descendent(subpath[1:], None)
return self_child._get_descendent(subpath[1:], default_child)
def get_descendent(self, subpath):
return self._get_descendent(subpath, DirectoryStateTreeNode.default_directory)
def add_child(self, subpath, child):
child.state.root = self.state.root
if DirectoryStateTreeNode.default_directory:
# print('Adding child %r to default directory at %r' % (child, subpath))
child = DirectoryStateTreeNode.default_directory._add_child_impl(subpath, child)
# print('Added %r' % child)
# print('Adding child %r to directory at %r' % (child, subpath))
c = self._add_child_impl(subpath, child)
# print('Added %r' % c)
return c
def _add_child_impl(self, subpath, child):
# print('%d, %d. Adding child %r (recursive): %r' % (self.state.dir_id, child.state.dir_id, child, subpath))
if len(subpath) == 0:
# print('%d, %d. Setting child: %d, %d' % (self.state.dir_id, child.state.dir_id, self.state.has_known_prefix, child.state.has_known_prefix))
self._merge(child)
return self
else:
if not subpath[0] in self.state.children:
# print('%d, %d. Path %r was absent from %r (%r)' % (self.state.dir_id, child.state.dir_id, subpath[0:1], self, self.state.children))
subdir = DirectoryStateTreeNode(True, True, root=self.state.root)
self.state.children[subpath[0]] = subdir
else:
subdir = self.state.children[subpath[0]]
# print('%d, %d. Path was present' % (self.state.dir_id, child.state.dir_id))
if len(subpath) > 1:
subdir.state.has_known_prefix = False
return subdir._add_child_impl(subpath[1:], child)
def _merge(self, other):
if self.state.dir_id == other.state.dir_id:
return
self.dir_id = other.dir_id
self.state.dir_id = min(other.state.dir_id, self.state.dir_id)
self.state.is_directory = self.state.is_directory and other.state.is_directory
self.state.is_subspace = self.state.is_subspace and other.state.is_subspace
self.state.has_known_prefix = self.state.has_known_prefix and other.state.has_known_prefix
self.state.deleted = self.state.deleted or other.state.deleted
self.state.is_partition = self.state.is_partition or other.state.is_partition
other_children = other.state.children.copy()
other_parents = other.state.parents.copy()
for node in other_parents:
node.state = self.state
self.state.parents.add(node)
for c in other_children:
if c not in self.state.children:
self.state.children[c] = other_children[c]
else:
self.state.children[c]._merge(other_children[c])
def _delete_impl(self):
if not self.state.deleted:
self.state.deleted = True
for c in self.state.children.values():
c._delete_impl()
def delete(self, path):
child = self.get_descendent(path)
if child:
child._delete_impl()
def validate_dir(dir, root):
if dir.state.is_directory:
assert dir.state.root == root
else:
assert dir.state.root == dir
def run_test():
all_entries = []
root = DirectoryStateTreeNode.get_layer('\xfe')
all_entries.append(root)
default_dir = root.add_child(('default',), DirectoryStateTreeNode(True, True, has_known_prefix=True))
DirectoryStateTreeNode.set_default_directory(default_dir)
all_entries.append(default_dir)
all_entries.append(default_dir.add_child(('1',), DirectoryStateTreeNode(True, True, has_known_prefix=True)))
all_entries.append(default_dir.add_child(('1', '1'), DirectoryStateTreeNode(True, False, has_known_prefix=True)))
all_entries.append(default_dir.add_child(('2',), DirectoryStateTreeNode(True, True, has_known_prefix=True)))
all_entries.append(default_dir.add_child(('3',), DirectoryStateTreeNode(True, True, has_known_prefix=False)))
all_entries.append(default_dir.add_child(('5',), DirectoryStateTreeNode(True, True, has_known_prefix=True)))
all_entries.append(default_dir.add_child(('3', '1'), DirectoryStateTreeNode(True, True, has_known_prefix=False)))
all_entries.append(default_dir.add_child(('1', '3'), DirectoryStateTreeNode(True, True, has_known_prefix=False)))
entry = all_entries[-1]
child_entries = []
child_entries.append(entry.add_child(('1',), DirectoryStateTreeNode(True, False, has_known_prefix=True)))
child_entries.append(entry.add_child(('2',), DirectoryStateTreeNode(True, True, has_known_prefix=True)))
child_entries.append(entry.add_child(('3',), DirectoryStateTreeNode(True, True, has_known_prefix=True)))
child_entries.append(entry.add_child(('4',), DirectoryStateTreeNode(True, False, has_known_prefix=False)))
child_entries.append(entry.add_child(('5',), DirectoryStateTreeNode(True, True, has_known_prefix=True)))
all_entries.append(root.add_child(('1', '2'), DirectoryStateTreeNode(True, True, has_known_prefix=False)))
all_entries.append(root.add_child(('2',), DirectoryStateTreeNode(True, True, has_known_prefix=True)))
all_entries.append(root.add_child(('3',), DirectoryStateTreeNode(True, True, has_known_prefix=True)))
all_entries.append(root.add_child(('1', '3',), DirectoryStateTreeNode(True, True, has_known_prefix=True)))
# This directory was merged with the default, but both have readable prefixes
entry = root.get_descendent(('2',))
assert entry.state.has_known_prefix
entry = all_entries[-1]
all_entries.append(entry.add_child(('1',), DirectoryStateTreeNode(True, True, has_known_prefix=True)))
all_entries.append(entry.add_child(('2',), DirectoryStateTreeNode(True, True, has_known_prefix=False)))
all_entries.append(entry.add_child(('3',), DirectoryStateTreeNode(True, False, has_known_prefix=True)))
entry_to_move = all_entries[-1]
all_entries.append(entry.add_child(('5',), DirectoryStateTreeNode(True, False, has_known_prefix=True)))
child_entries.append(entry.add_child(('6',), DirectoryStateTreeNode(True, True, has_known_prefix=True)))
all_entries.extend(child_entries)
# This directory has an unknown prefix
entry = root.get_descendent(('1', '2'))
assert not entry.state.has_known_prefix
# This directory was default created and should have an unknown prefix
# It will merge with the default directory's child, which is not a subspace
entry = root.get_descendent(('1',))
assert not entry.state.has_known_prefix
assert not entry.state.is_subspace
# Multiple merges will have made this prefix unreadable
entry = root.get_descendent(('2',))
assert not entry.state.has_known_prefix
# Merge with default directory's child that has an unknown prefix
entry = root.get_descendent(('3',))
assert not entry.state.has_known_prefix
# Merge with default directory's child that has an unknown prefix and merged children
entry = root.get_descendent(('1', '3'))
assert set(entry.state.children.keys()) == {'1', '2', '3', '4', '5', '6'}
# This child entry should be the combination of ['default', '3'], ['default', '1', '3'], and ['1', '3']
entry = entry.get_descendent(('3',))
assert not entry.state.has_known_prefix
assert not entry.state.is_subspace
# Verify the merge of the children
assert not child_entries[0].state.has_known_prefix
assert not child_entries[0].state.is_subspace
assert not child_entries[1].state.has_known_prefix
assert child_entries[1].state.is_subspace
assert not child_entries[2].state.has_known_prefix
assert not child_entries[2].state.is_subspace
assert not child_entries[3].state.has_known_prefix
assert not child_entries[3].state.is_subspace
assert child_entries[4].state.has_known_prefix
assert not child_entries[4].state.is_subspace
assert child_entries[5].state.has_known_prefix
assert child_entries[5].state.is_subspace
entry = root.add_child(('3',), entry_to_move)
all_entries.append(entry)
# Test moving an entry
assert not entry.state.has_known_prefix
assert not entry.state.is_subspace
assert entry.state.children.keys() == ['1']
for e in all_entries:
validate_dir(e, root)
if __name__ == '__main__':
sys.exit(run_test())

View File

@ -27,6 +27,7 @@ from bindingtester import FDB_API_VERSION
from bindingtester import util
from bindingtester.tests import test_util
from bindingtester.tests.directory_state_tree import DirectoryStateTreeNode
fdb.api_version(FDB_API_VERSION)
@ -34,82 +35,26 @@ DEFAULT_DIRECTORY_INDEX = 4
DEFAULT_DIRECTORY_PREFIX = 'default'
DIRECTORY_ERROR_STRING = 'DIRECTORY_ERROR'
class DirListEntry:
dir_id = 0 # Used for debugging
def __init__(self, is_directory, is_subspace, has_known_prefix=True, path=(), root=None):
self.root = root or self
self.path = path
self.is_directory = is_directory
self.is_subspace = is_subspace
self.has_known_prefix = has_known_prefix
self.children = {}
self.dir_id = DirListEntry.dir_id + 1
DirListEntry.dir_id += 1
def __repr__(self):
return 'DirEntry %d %r: %d' % (self.dir_id, self.path, self.has_known_prefix)
def add_child(self, subpath, default_path, root, child):
if default_path in root.children:
# print 'Adding child %r to default directory %r at %r' % (child, root.children[DirectoryTest.DEFAULT_DIRECTORY_PATH].path, subpath)
c = root.children[default_path]._add_child_impl(subpath, child)
child.has_known_prefix = c.has_known_prefix and child.has_known_prefix
# print 'Added %r' % c
# print 'Adding child %r to directory %r at %r' % (child, self.path, subpath)
c = self._add_child_impl(subpath, child)
# print 'Added %r' % c
return c
def _add_child_impl(self, subpath, child):
# print '%d, %d. Adding child (recursive): %s %s' % (self.dir_id, child.dir_id, repr(self.path), repr(subpath))
if len(subpath) == 0:
self.has_known_prefix = self.has_known_prefix and child.has_known_prefix
# print '%d, %d. Setting child: %d' % (self.dir_id, child.dir_id, self.has_known_prefix)
self._merge_children(child)
return self
else:
if not subpath[0] in self.children:
# print '%d, %d. Path %s was absent (%s)' % (self.dir_id, child.dir_id, repr(self.path + subpath[0:1]), repr(self.children))
subdir = DirListEntry(True, True, path=self.path + subpath[0:1], root=self.root)
subdir.has_known_prefix = len(subpath) == 1
self.children[subpath[0]] = subdir
else:
subdir = self.children[subpath[0]]
subdir.has_known_prefix = False
# print '%d, %d. Path was present' % (self.dir_id, child.dir_id)
return subdir._add_child_impl(subpath[1:], child)
def _merge_children(self, other):
for c in other.children:
if c not in self.children:
self.children[c] = other.children[c]
else:
self.children[c].has_known_prefix = self.children[c].has_known_prefix and other.children[c].has_known_prefix
self.children[c]._merge_children(other.children[c])
def setup_directories(instructions, default_path, random):
dir_list = [DirListEntry(True, False, True)]
# Clients start with the default directory layer in the directory list
DirectoryStateTreeNode.reset()
dir_list = [DirectoryStateTreeNode.get_layer('\xfe')]
instructions.push_args(0, '\xfe')
instructions.append('DIRECTORY_CREATE_SUBSPACE')
dir_list.append(DirListEntry(False, True))
dir_list.append(DirectoryStateTreeNode(False, True))
instructions.push_args(0, '')
instructions.append('DIRECTORY_CREATE_SUBSPACE')
dir_list.append(DirListEntry(False, True))
dir_list.append(DirectoryStateTreeNode(False, True))
instructions.push_args(1, 2, 1)
instructions.append('DIRECTORY_CREATE_LAYER')
dir_list.append(DirListEntry(True, False, True))
dir_list.append(DirectoryStateTreeNode.get_layer('\xfe'))
create_default_directory_subspace(instructions, default_path, random)
dir_list.append(DirListEntry(True, True, True))
dir_list.append(dir_list[0].add_child((default_path,), DirectoryStateTreeNode(True, True, has_known_prefix=True)))
DirectoryStateTreeNode.set_default_directory(dir_list[-1])
instructions.push_args(DEFAULT_DIRECTORY_INDEX)
instructions.append('DIRECTORY_SET_ERROR_INDEX')

View File

@ -170,7 +170,10 @@ class AsyncDirectoryExtension {
.thenAccept(children -> inst.push(Tuple.fromItems(children).pack()));
}
else if(op == DirectoryOperation.DIRECTORY_EXISTS) {
return inst.popParam()
// In Java, DirectoryLayer.exists can return true without doing any reads.
// Other bindings will always do a read, so we get a read version now to be compatible with that behavior.
return inst.readTcx.readAsync(tr -> tr.getReadVersion())
.thenComposeAsync(v -> inst.popParam())
.thenComposeAsync(count -> DirectoryUtil.popPaths(inst, StackUtils.getInt(count)))
.thenComposeAsync(path -> {
if(path.size() == 0)

View File

@ -317,8 +317,8 @@ public class AsyncStackTester {
if(t != null) {
inst.context.newTransaction(oldTr); // Other bindings allow reuse of non-retryable transactions, so we need to emulate that behavior.
}
else {
inst.setTransaction(oldTr, tr);
else if(!inst.setTransaction(oldTr, tr)) {
tr.close();
}
}).thenApply(v -> null);

View File

@ -92,6 +92,7 @@ abstract class Context implements Runnable, AutoCloseable {
private static synchronized Transaction getTransaction(String trName) {
Transaction tr = transactionMap.get(trName);
assert tr != null : "Null transaction";
addTransactionReference(tr);
return tr;
}
@ -117,7 +118,15 @@ abstract class Context implements Runnable, AutoCloseable {
}
private static synchronized boolean updateTransaction(String trName, Transaction oldTr, Transaction newTr) {
if(transactionMap.replace(trName, oldTr, newTr)) {
boolean added;
if(oldTr == null) {
added = (transactionMap.putIfAbsent(trName, newTr) == null);
}
else {
added = transactionMap.replace(trName, oldTr, newTr);
}
if(added) {
addTransactionReference(newTr);
releaseTransaction(oldTr);
return true;

View File

@ -160,6 +160,11 @@ class DirectoryExtension {
int count = StackUtils.getInt(inst.popParam().get());
List<List<String>> path = DirectoryUtil.popPaths(inst, count).get();
boolean exists;
// In Java, DirectoryLayer.exists can return true without doing any reads.
// Other bindings will always do a read, so we get a read version now to be compatible with that behavior.
inst.readTcx.read(tr -> tr.getReadVersion().join());
if(path.size() == 0)
exists = directory().exists(inst.readTcx).get();
else

View File

@ -72,16 +72,21 @@ class Instruction extends Stack {
readTcx = isDatabase ? context.db : readTr;
}
void setTransaction(Transaction newTr) {
boolean setTransaction(Transaction newTr) {
if(!isDatabase) {
context.updateCurrentTransaction(newTr);
return true;
}
return false;
}
void setTransaction(Transaction oldTr, Transaction newTr) {
boolean setTransaction(Transaction oldTr, Transaction newTr) {
if(!isDatabase) {
context.updateCurrentTransaction(oldTr, newTr);
return context.updateCurrentTransaction(oldTr, newTr);
}
return false;
}
void releaseTransaction() {

View File

@ -284,7 +284,10 @@ public class StackTester {
FDBException err = new FDBException("Fake testing error", filteredError ? 1020 : errorCode);
try {
inst.setTransaction(inst.tr.onError(err).join());
Transaction tr = inst.tr.onError(err).join();
if(!inst.setTransaction(tr)) {
tr.close();
}
}
catch(Throwable t) {
inst.context.newTransaction(); // Other bindings allow reuse of non-retryable transactions, so we need to emulate that behavior.

View File

@ -102,71 +102,71 @@ class DirectoryExtension():
new_dir = self.dir_list[self.dir_index]
clazz = new_dir.__class__.__name__
new_path = repr(new_dir._path) if hasattr(new_dir, '_path') else "<na>"
print('changed directory to %d (%s @%s)' % (self.dir_index, clazz, new_path))
print('changed directory to %d (%s @%r)' % (self.dir_index, clazz, new_path))
elif inst.op == six.u('DIRECTORY_SET_ERROR_INDEX'):
self.error_index = inst.pop()
elif inst.op == six.u('DIRECTORY_CREATE_OR_OPEN'):
path = self.pop_tuples(inst.stack)
layer = inst.pop()
log_op('create_or_open %s: layer=%s' % (repr(directory.get_path() + path), repr(layer)))
log_op('create_or_open %r: layer=%r' % (directory.get_path() + path, layer))
d = directory.create_or_open(inst.tr, path, layer or b'')
self.append_dir(inst, d)
elif inst.op == six.u('DIRECTORY_CREATE'):
path = self.pop_tuples(inst.stack)
layer, prefix = inst.pop(2)
log_op('create %s: layer=%s, prefix=%s' % (repr(directory.get_path() + path), repr(layer), repr(prefix)))
log_op('create %r: layer=%r, prefix=%r' % (directory.get_path() + path, layer, prefix))
self.append_dir(inst, directory.create(inst.tr, path, layer or b'', prefix))
elif inst.op == six.u('DIRECTORY_OPEN'):
path = self.pop_tuples(inst.stack)
layer = inst.pop()
log_op('open %s: layer=%s' % (repr(directory.get_path() + path), repr(layer)))
log_op('open %r: layer=%r' % (directory.get_path() + path, layer))
self.append_dir(inst, directory.open(inst.tr, path, layer or b''))
elif inst.op == six.u('DIRECTORY_MOVE'):
old_path, new_path = self.pop_tuples(inst.stack, 2)
log_op('move %s to %s' % (repr(directory.get_path() + old_path), repr(directory.get_path() + new_path)))
log_op('move %r to %r' % (directory.get_path() + old_path, directory.get_path() + new_path))
self.append_dir(inst, directory.move(inst.tr, old_path, new_path))
elif inst.op == six.u('DIRECTORY_MOVE_TO'):
new_absolute_path = self.pop_tuples(inst.stack)
log_op('move %s to %s' % (repr(directory.get_path()), repr(new_absolute_path)))
log_op('move %r to %r' % (directory.get_path(), new_absolute_path))
self.append_dir(inst, directory.move_to(inst.tr, new_absolute_path))
elif inst.op == six.u('DIRECTORY_REMOVE'):
count = inst.pop()
if count == 0:
log_op('remove %s' % repr(directory.get_path()))
log_op('remove %r' % (directory.get_path(),))
directory.remove(inst.tr)
else:
path = self.pop_tuples(inst.stack)
log_op('remove %s' % repr(directory.get_path() + path))
log_op('remove %r' % (directory.get_path() + path,))
directory.remove(inst.tr, path)
elif inst.op == six.u('DIRECTORY_REMOVE_IF_EXISTS'):
count = inst.pop()
if count == 0:
log_op('remove_if_exists %s' % repr(directory.get_path()))
log_op('remove_if_exists %r' % (directory.get_path(),))
directory.remove_if_exists(inst.tr)
else:
path = self.pop_tuples(inst.stack)
log_op('remove_if_exists %s' % repr(directory.get_path() + path))
log_op('remove_if_exists %r' % (directory.get_path() + path,))
directory.remove_if_exists(inst.tr, path)
elif inst.op == six.u('DIRECTORY_LIST'):
count = inst.pop()
if count == 0:
result = directory.list(inst.tr)
log_op('list %s' % (repr(directory.get_path())))
log_op('list %r' % (directory.get_path(),))
else:
path = self.pop_tuples(inst.stack)
result = directory.list(inst.tr, path)
log_op('list %s' % (repr(directory.get_path() + path)))
log_op('list %r' % (directory.get_path() + path,))
inst.push(fdb.tuple.pack(tuple(result)))
elif inst.op == six.u('DIRECTORY_EXISTS'):
count = inst.pop()
if count == 0:
result = directory.exists(inst.tr)
log_op('exists %s: %d' % (repr(directory.get_path()), result))
log_op('exists %r: %d' % (directory.get_path(), result))
else:
path = self.pop_tuples(inst.stack)
result = directory.exists(inst.tr, path)
log_op('exists %s: %d' % (repr(directory.get_path() + path), result))
log_op('exists %r: %d' % (directory.get_path() + path, result))
if result:
inst.push(1)
@ -177,7 +177,7 @@ class DirectoryExtension():
inst.push(directory.pack(key_tuple))
elif inst.op == six.u('DIRECTORY_UNPACK_KEY'):
key = inst.pop()
log_op('unpack %s in subspace with prefix %s' % (repr(key), repr(directory.rawPrefix)))
log_op('unpack %r in subspace with prefix %r' % (key, directory.rawPrefix))
tup = directory.unpack(key)
for t in tup:
inst.push(t)
@ -215,7 +215,7 @@ class DirectoryExtension():
elif inst.op == six.u('DIRECTORY_STRIP_PREFIX'):
s = inst.pop()
if not s.startswith(directory.key()):
raise Exception('String %s does not start with raw prefix %s' % (s, directory.key()))
raise Exception('String %r does not start with raw prefix %r' % (s, directory.key()))
inst.push(s[len(directory.key()):])
else:

View File

@ -91,7 +91,7 @@ class Stack:
else:
raw[i] = (raw[i][0], val)
except fdb.FDBError as e:
# print('ERROR: %s' % repr(e))
# print('ERROR: %r' % e)
raw[i] = (raw[i][0], fdb.tuple.pack((b'ERROR', str(e.code).encode('ascii'))))
if count is None:
@ -543,7 +543,7 @@ class Tester:
else:
raise Exception("Unknown op %s" % inst.op)
except fdb.FDBError as e:
# print('ERROR: %s' % repr(e))
# print('ERROR: %r' % e)
inst.stack.push(idx, fdb.tuple.pack((b"ERROR", str(e.code).encode('ascii'))))
# print(" to %s" % self.stack)

View File

@ -62,7 +62,10 @@
]
},
"data_version":12341234,
"data_version_lag":12341234,
"data_lag": {
"seconds":5.0,
"versions":12341234
},
"id":"eb84471d68c12d1d26f692a50000003f",
"finished_queries":{
"hz":0.0,
@ -362,6 +365,7 @@
"remote_redundancy_mode":"remote_single",
"remote_log_replicas":3,
"remote_logs":5,
"log_routers":10,
"usable_regions":1,
"storage_replicas":1,
"resolvers":1,

View File

@ -31,7 +31,8 @@ extensions = [
'sphinx.ext.todo',
'sphinx.ext.ifconfig',
'brokenrole',
'relativelink'
'relativelink',
'sphinxcontrib.rubydomain'
]
# Add any paths that contain templates here, relative to this directory.

View File

@ -2,3 +2,4 @@
sphinx==1.5.6
sphinx-bootstrap-theme==0.4.8
pygments-style-solarized
sphinxcontrib-rubydomain==0.1dev-20100804

View File

@ -229,46 +229,45 @@ Use the ``status`` command of ``fdbcli`` to determine if the cluster is up and r
The database is available.
Welcome to the fdbcli. For help, type `help'.
fdb> status
fdb> status
Configuration:
Redundancy mode - triple
Storage engine - ssd-2
Coordinators - 5
Desired Proxies - 5
Desired Logs - 8
Configuration:
Redundancy mode - triple
Storage engine - ssd-2
Coordinators - 5
Desired Proxies - 5
Desired Logs - 8
Cluster:
FoundationDB processes - 272
Machines - 16
Memory availability - 14.5 GB per process on machine with least available
Retransmissions rate - 20 Hz
Fault Tolerance - 2 machines
Server time - 03/19/18 08:51:52
Data:
Replication health - Healthy
Moving data - 0.000 GB
Sum of key-value sizes - 3.298 TB
Disk space used - 15.243 TB
Operating space:
Storage server - 1656.2 GB free on most full server
Log server - 1794.7 GB free on most full server
Workload:
Read rate - 55990 Hz
Write rate - 14946 Hz
Transactions started - 6321 Hz
Transactions committed - 1132 Hz
Conflict rate - 0 Hz
Backup and DR:
Running backups - 1
Running DRs - 1 as primary
Client time: 03/19/18 08:51:51
Cluster:
FoundationDB processes - 272
Machines - 16
Memory availability - 14.5 GB per process on machine with least available
Retransmissions rate - 20 Hz
Fault Tolerance - 2 machines
Server time - 03/19/18 08:51:52
Data:
Replication health - Healthy
Moving data - 0.000 GB
Sum of key-value sizes - 3.298 TB
Disk space used - 15.243 TB
Operating space:
Storage server - 1656.2 GB free on most full server
Log server - 1794.7 GB free on most full server
Workload:
Read rate - 55990 Hz
Write rate - 14946 Hz
Transactions started - 6321 Hz
Transactions committed - 1132 Hz
Conflict rate - 0 Hz
Backup and DR:
Running backups - 1
Running DRs - 1 as primary
Client time: 03/19/18 08:51:51
The summary fields are interpreted as follows:
@ -328,131 +327,132 @@ The ``status`` command can provide detailed statistics about the cluster and the
fdb> status details
Configuration:
Redundancy mode - triple
Storage engine - ssd-2
Coordinators - 5
Configuration:
Redundancy mode - triple
Storage engine - ssd-2
Coordinators - 5
Cluster:
FoundationDB processes - 85
Machines - 5
Memory availability - 7.4 GB per process on machine with least available
Retransmissions rate - 5 Hz
Fault Tolerance - 2 machines
Server time - 03/19/18 08:59:37
Data:
Replication health - Healthy
Moving data - 0.000 GB
Sum of key-value sizes - 87.068 GB
Disk space used - 327.819 GB
Operating space:
Storage server - 888.2 GB free on most full server
Log server - 897.3 GB free on most full server
Workload:
Read rate - 117 Hz
Write rate - 0 Hz
Transactions started - 43 Hz
Transactions committed - 1 Hz
Conflict rate - 0 Hz
Cluster:
FoundationDB processes - 85
Machines - 5
Memory availability - 7.4 GB per process on machine with least available
Retransmissions rate - 5 Hz
Fault Tolerance - 2 machines
Server time - 03/19/18 08:59:37
Process performance details:
10.0.4.1:4500 ( 2% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 3.2 GB / 7.4 GB RAM )
10.0.4.1:4501 ( 1% cpu; 2% machine; 0.010 Gbps; 3% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.1:4502 ( 2% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.1:4503 ( 0% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM )
10.0.4.1:4504 ( 0% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.1:4505 ( 2% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM )
10.0.4.1:4506 ( 2% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM )
10.0.4.1:4507 ( 2% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM )
10.0.4.1:4508 ( 2% cpu; 2% machine; 0.010 Gbps; 1% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.1:4509 ( 2% cpu; 2% machine; 0.010 Gbps; 1% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.1:4510 ( 1% cpu; 2% machine; 0.010 Gbps; 1% disk IO; 2.7 GB / 7.4 GB RAM )
10.0.4.1:4511 ( 0% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.1:4512 ( 0% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.1:4513 ( 0% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM )
10.0.4.1:4514 ( 0% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 0.2 GB / 7.4 GB RAM )
10.0.4.1:4515 ( 12% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 0.2 GB / 7.4 GB RAM )
10.0.4.1:4516 ( 0% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 0.3 GB / 7.4 GB RAM )
10.0.4.2:4500 ( 2% cpu; 3% machine; 0.124 Gbps; 0% disk IO; 3.2 GB / 7.4 GB RAM )
10.0.4.2:4501 ( 15% cpu; 3% machine; 0.124 Gbps; 19% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.2:4502 ( 2% cpu; 3% machine; 0.124 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.2:4503 ( 2% cpu; 3% machine; 0.124 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.2:4504 ( 2% cpu; 3% machine; 0.124 Gbps; 1% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.2:4505 ( 18% cpu; 3% machine; 0.124 Gbps; 18% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.2:4506 ( 2% cpu; 3% machine; 0.124 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.2:4507 ( 2% cpu; 3% machine; 0.124 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.2:4508 ( 2% cpu; 3% machine; 0.124 Gbps; 19% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.2:4509 ( 0% cpu; 3% machine; 0.124 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.2:4510 ( 0% cpu; 3% machine; 0.124 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.2:4511 ( 2% cpu; 3% machine; 0.124 Gbps; 1% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.2:4512 ( 2% cpu; 3% machine; 0.124 Gbps; 19% disk IO; 2.7 GB / 7.4 GB RAM )
10.0.4.2:4513 ( 0% cpu; 3% machine; 0.124 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.2:4514 ( 0% cpu; 3% machine; 0.124 Gbps; 0% disk IO; 0.2 GB / 7.4 GB RAM )
10.0.4.2:4515 ( 11% cpu; 3% machine; 0.124 Gbps; 0% disk IO; 0.2 GB / 7.4 GB RAM )
10.0.4.2:4516 ( 0% cpu; 3% machine; 0.124 Gbps; 0% disk IO; 0.6 GB / 7.4 GB RAM )
10.0.4.3:4500 ( 14% cpu; 3% machine; 0.284 Gbps; 26% disk IO; 3.0 GB / 7.4 GB RAM )
10.0.4.3:4501 ( 2% cpu; 3% machine; 0.284 Gbps; 0% disk IO; 2.8 GB / 7.4 GB RAM )
10.0.4.3:4502 ( 2% cpu; 3% machine; 0.284 Gbps; 0% disk IO; 2.8 GB / 7.4 GB RAM )
10.0.4.3:4503 ( 2% cpu; 3% machine; 0.284 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM )
10.0.4.3:4504 ( 7% cpu; 3% machine; 0.284 Gbps; 12% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.3:4505 ( 2% cpu; 3% machine; 0.284 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM )
10.0.4.3:4506 ( 2% cpu; 3% machine; 0.284 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.3:4507 ( 2% cpu; 3% machine; 0.284 Gbps; 26% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.3:4508 ( 2% cpu; 3% machine; 0.284 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM )
10.0.4.3:4509 ( 2% cpu; 3% machine; 0.284 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.3:4510 ( 2% cpu; 3% machine; 0.284 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM )
10.0.4.3:4511 ( 2% cpu; 3% machine; 0.284 Gbps; 12% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.3:4512 ( 2% cpu; 3% machine; 0.284 Gbps; 3% disk IO; 2.7 GB / 7.4 GB RAM )
10.0.4.3:4513 ( 2% cpu; 3% machine; 0.284 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.3:4514 ( 0% cpu; 3% machine; 0.284 Gbps; 0% disk IO; 0.1 GB / 7.4 GB RAM )
10.0.4.3:4515 ( 0% cpu; 3% machine; 0.284 Gbps; 0% disk IO; 0.1 GB / 7.4 GB RAM )
10.0.4.3:4516 ( 0% cpu; 3% machine; 0.284 Gbps; 0% disk IO; 0.1 GB / 7.4 GB RAM )
10.0.4.4:4500 ( 2% cpu; 4% machine; 0.065 Gbps; 0% disk IO; 3.2 GB / 7.4 GB RAM )
10.0.4.4:4501 ( 2% cpu; 4% machine; 0.065 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.4:4502 ( 0% cpu; 4% machine; 0.065 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.4:4503 ( 2% cpu; 4% machine; 0.065 Gbps; 16% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.4:4504 ( 2% cpu; 4% machine; 0.065 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM )
10.0.4.4:4505 ( 0% cpu; 4% machine; 0.065 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.4:4506 ( 0% cpu; 4% machine; 0.065 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.4:4507 ( 2% cpu; 4% machine; 0.065 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.4:4508 ( 0% cpu; 4% machine; 0.065 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.4:4509 ( 2% cpu; 4% machine; 0.065 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.4:4510 ( 24% cpu; 4% machine; 0.065 Gbps; 15% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.4:4511 ( 2% cpu; 4% machine; 0.065 Gbps; 0% disk IO; 2.8 GB / 7.4 GB RAM )
10.0.4.4:4512 ( 2% cpu; 4% machine; 0.065 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM )
10.0.4.4:4513 ( 0% cpu; 4% machine; 0.065 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.4:4514 ( 0% cpu; 4% machine; 0.065 Gbps; 1% disk IO; 0.2 GB / 7.4 GB RAM )
10.0.4.4:4515 ( 0% cpu; 4% machine; 0.065 Gbps; 1% disk IO; 0.2 GB / 7.4 GB RAM )
10.0.4.4:4516 ( 0% cpu; 4% machine; 0.065 Gbps; 1% disk IO; 0.6 GB / 7.4 GB RAM )
10.0.4.5:4500 ( 6% cpu; 2% machine; 0.076 Gbps; 7% disk IO; 3.2 GB / 7.4 GB RAM )
10.0.4.5:4501 ( 2% cpu; 2% machine; 0.076 Gbps; 19% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.5:4502 ( 1% cpu; 2% machine; 0.076 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.5:4503 ( 0% cpu; 2% machine; 0.076 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.5:4504 ( 2% cpu; 2% machine; 0.076 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM )
10.0.4.5:4505 ( 2% cpu; 2% machine; 0.076 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM )
10.0.4.5:4506 ( 0% cpu; 2% machine; 0.076 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.5:4507 ( 2% cpu; 2% machine; 0.076 Gbps; 6% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.5:4508 ( 31% cpu; 2% machine; 0.076 Gbps; 8% disk IO; 2.7 GB / 7.4 GB RAM )
10.0.4.5:4509 ( 0% cpu; 2% machine; 0.076 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.5:4510 ( 2% cpu; 2% machine; 0.076 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM )
10.0.4.5:4511 ( 2% cpu; 2% machine; 0.076 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.5:4512 ( 2% cpu; 2% machine; 0.076 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.5:4513 ( 0% cpu; 2% machine; 0.076 Gbps; 3% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.5:4514 ( 0% cpu; 2% machine; 0.076 Gbps; 0% disk IO; 0.2 GB / 7.4 GB RAM )
10.0.4.5:4515 ( 0% cpu; 2% machine; 0.076 Gbps; 0% disk IO; 0.2 GB / 7.4 GB RAM )
10.0.4.5:4516 ( 0% cpu; 2% machine; 0.076 Gbps; 0% disk IO; 0.6 GB / 7.4 GB RAM )
Data:
Replication health - Healthy
Moving data - 0.000 GB
Sum of key-value sizes - 87.068 GB
Disk space used - 327.819 GB
Coordination servers:
10.0.4.1:4500 (reachable)
10.0.4.2:4500 (reachable)
10.0.4.3:4500 (reachable)
10.0.4.4:4500 (reachable)
10.0.4.5:4500 (reachable)
Client time: 03/19/18 08:59:37
Operating space:
Storage server - 888.2 GB free on most full server
Log server - 897.3 GB free on most full server
Workload:
Read rate - 117 Hz
Write rate - 0 Hz
Transactions started - 43 Hz
Transactions committed - 1 Hz
Conflict rate - 0 Hz
Process performance details:
10.0.4.1:4500 ( 2% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 3.2 GB / 7.4 GB RAM )
10.0.4.1:4501 ( 1% cpu; 2% machine; 0.010 Gbps; 3% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.1:4502 ( 2% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.1:4503 ( 0% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM )
10.0.4.1:4504 ( 0% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.1:4505 ( 2% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM )
10.0.4.1:4506 ( 2% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM )
10.0.4.1:4507 ( 2% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM )
10.0.4.1:4508 ( 2% cpu; 2% machine; 0.010 Gbps; 1% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.1:4509 ( 2% cpu; 2% machine; 0.010 Gbps; 1% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.1:4510 ( 1% cpu; 2% machine; 0.010 Gbps; 1% disk IO; 2.7 GB / 7.4 GB RAM )
10.0.4.1:4511 ( 0% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.1:4512 ( 0% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.1:4513 ( 0% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM )
10.0.4.1:4514 ( 0% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 0.2 GB / 7.4 GB RAM )
10.0.4.1:4515 ( 12% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 0.2 GB / 7.4 GB RAM )
10.0.4.1:4516 ( 0% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 0.3 GB / 7.4 GB RAM )
10.0.4.2:4500 ( 2% cpu; 3% machine; 0.124 Gbps; 0% disk IO; 3.2 GB / 7.4 GB RAM )
10.0.4.2:4501 ( 15% cpu; 3% machine; 0.124 Gbps; 19% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.2:4502 ( 2% cpu; 3% machine; 0.124 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.2:4503 ( 2% cpu; 3% machine; 0.124 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.2:4504 ( 2% cpu; 3% machine; 0.124 Gbps; 1% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.2:4505 ( 18% cpu; 3% machine; 0.124 Gbps; 18% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.2:4506 ( 2% cpu; 3% machine; 0.124 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.2:4507 ( 2% cpu; 3% machine; 0.124 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.2:4508 ( 2% cpu; 3% machine; 0.124 Gbps; 19% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.2:4509 ( 0% cpu; 3% machine; 0.124 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.2:4510 ( 0% cpu; 3% machine; 0.124 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.2:4511 ( 2% cpu; 3% machine; 0.124 Gbps; 1% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.2:4512 ( 2% cpu; 3% machine; 0.124 Gbps; 19% disk IO; 2.7 GB / 7.4 GB RAM )
10.0.4.2:4513 ( 0% cpu; 3% machine; 0.124 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.2:4514 ( 0% cpu; 3% machine; 0.124 Gbps; 0% disk IO; 0.2 GB / 7.4 GB RAM )
10.0.4.2:4515 ( 11% cpu; 3% machine; 0.124 Gbps; 0% disk IO; 0.2 GB / 7.4 GB RAM )
10.0.4.2:4516 ( 0% cpu; 3% machine; 0.124 Gbps; 0% disk IO; 0.6 GB / 7.4 GB RAM )
10.0.4.3:4500 ( 14% cpu; 3% machine; 0.284 Gbps; 26% disk IO; 3.0 GB / 7.4 GB RAM )
10.0.4.3:4501 ( 2% cpu; 3% machine; 0.284 Gbps; 0% disk IO; 2.8 GB / 7.4 GB RAM )
10.0.4.3:4502 ( 2% cpu; 3% machine; 0.284 Gbps; 0% disk IO; 2.8 GB / 7.4 GB RAM )
10.0.4.3:4503 ( 2% cpu; 3% machine; 0.284 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM )
10.0.4.3:4504 ( 7% cpu; 3% machine; 0.284 Gbps; 12% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.3:4505 ( 2% cpu; 3% machine; 0.284 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM )
10.0.4.3:4506 ( 2% cpu; 3% machine; 0.284 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.3:4507 ( 2% cpu; 3% machine; 0.284 Gbps; 26% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.3:4508 ( 2% cpu; 3% machine; 0.284 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM )
10.0.4.3:4509 ( 2% cpu; 3% machine; 0.284 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.3:4510 ( 2% cpu; 3% machine; 0.284 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM )
10.0.4.3:4511 ( 2% cpu; 3% machine; 0.284 Gbps; 12% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.3:4512 ( 2% cpu; 3% machine; 0.284 Gbps; 3% disk IO; 2.7 GB / 7.4 GB RAM )
10.0.4.3:4513 ( 2% cpu; 3% machine; 0.284 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.3:4514 ( 0% cpu; 3% machine; 0.284 Gbps; 0% disk IO; 0.1 GB / 7.4 GB RAM )
10.0.4.3:4515 ( 0% cpu; 3% machine; 0.284 Gbps; 0% disk IO; 0.1 GB / 7.4 GB RAM )
10.0.4.3:4516 ( 0% cpu; 3% machine; 0.284 Gbps; 0% disk IO; 0.1 GB / 7.4 GB RAM )
10.0.4.4:4500 ( 2% cpu; 4% machine; 0.065 Gbps; 0% disk IO; 3.2 GB / 7.4 GB RAM )
10.0.4.4:4501 ( 2% cpu; 4% machine; 0.065 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.4:4502 ( 0% cpu; 4% machine; 0.065 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.4:4503 ( 2% cpu; 4% machine; 0.065 Gbps; 16% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.4:4504 ( 2% cpu; 4% machine; 0.065 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM )
10.0.4.4:4505 ( 0% cpu; 4% machine; 0.065 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.4:4506 ( 0% cpu; 4% machine; 0.065 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.4:4507 ( 2% cpu; 4% machine; 0.065 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.4:4508 ( 0% cpu; 4% machine; 0.065 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.4:4509 ( 2% cpu; 4% machine; 0.065 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.4:4510 ( 24% cpu; 4% machine; 0.065 Gbps; 15% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.4:4511 ( 2% cpu; 4% machine; 0.065 Gbps; 0% disk IO; 2.8 GB / 7.4 GB RAM )
10.0.4.4:4512 ( 2% cpu; 4% machine; 0.065 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM )
10.0.4.4:4513 ( 0% cpu; 4% machine; 0.065 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.4:4514 ( 0% cpu; 4% machine; 0.065 Gbps; 1% disk IO; 0.2 GB / 7.4 GB RAM )
10.0.4.4:4515 ( 0% cpu; 4% machine; 0.065 Gbps; 1% disk IO; 0.2 GB / 7.4 GB RAM )
10.0.4.4:4516 ( 0% cpu; 4% machine; 0.065 Gbps; 1% disk IO; 0.6 GB / 7.4 GB RAM )
10.0.4.5:4500 ( 6% cpu; 2% machine; 0.076 Gbps; 7% disk IO; 3.2 GB / 7.4 GB RAM )
10.0.4.5:4501 ( 2% cpu; 2% machine; 0.076 Gbps; 19% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.5:4502 ( 1% cpu; 2% machine; 0.076 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.5:4503 ( 0% cpu; 2% machine; 0.076 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.5:4504 ( 2% cpu; 2% machine; 0.076 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM )
10.0.4.5:4505 ( 2% cpu; 2% machine; 0.076 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM )
10.0.4.5:4506 ( 0% cpu; 2% machine; 0.076 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.5:4507 ( 2% cpu; 2% machine; 0.076 Gbps; 6% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.5:4508 ( 31% cpu; 2% machine; 0.076 Gbps; 8% disk IO; 2.7 GB / 7.4 GB RAM )
10.0.4.5:4509 ( 0% cpu; 2% machine; 0.076 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.5:4510 ( 2% cpu; 2% machine; 0.076 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM )
10.0.4.5:4511 ( 2% cpu; 2% machine; 0.076 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.5:4512 ( 2% cpu; 2% machine; 0.076 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.5:4513 ( 0% cpu; 2% machine; 0.076 Gbps; 3% disk IO; 2.6 GB / 7.4 GB RAM )
10.0.4.5:4514 ( 0% cpu; 2% machine; 0.076 Gbps; 0% disk IO; 0.2 GB / 7.4 GB RAM )
10.0.4.5:4515 ( 0% cpu; 2% machine; 0.076 Gbps; 0% disk IO; 0.2 GB / 7.4 GB RAM )
10.0.4.5:4516 ( 0% cpu; 2% machine; 0.076 Gbps; 0% disk IO; 0.6 GB / 7.4 GB RAM )
Coordination servers:
10.0.4.1:4500 (reachable)
10.0.4.2:4500 (reachable)
10.0.4.3:4500 (reachable)
10.0.4.4:4500 (reachable)
10.0.4.5:4500 (reachable)
Client time: 03/19/18 08:59:37
Several details about individual FoundationDB processes are displayed in a list format in parenthesis after the IP address and port:
======= =========================================================================

View File

@ -10,38 +10,38 @@ macOS
The macOS installation package is supported on macOS 10.7+. It includes the client and (optionally) the server.
* `FoundationDB-5.2.4.pkg <https://www.foundationdb.org/downloads/5.2.4/macOS/installers/FoundationDB-5.2.4.pkg>`_
* `FoundationDB-5.2.5.pkg <https://www.foundationdb.org/downloads/5.2.5/macOS/installers/FoundationDB-5.2.5.pkg>`_
Ubuntu
------
The Ubuntu packages are supported on 64-bit Ubuntu 12.04+, but beware of the Linux kernel bug in Ubuntu 12.x.
* `foundationdb-clients-5.2.4-1_amd64.deb <https://www.foundationdb.org/downloads/5.2.4/ubuntu/installers/foundationdb-clients_5.2.4-1_amd64.deb>`_
* `foundationdb-server-5.2.4-1_amd64.deb <https://www.foundationdb.org/downloads/5.2.4/ubuntu/installers/foundationdb-server_5.2.4-1_amd64.deb>`_ (depends on the clients package)
* `foundationdb-clients-5.2.5-1_amd64.deb <https://www.foundationdb.org/downloads/5.2.5/ubuntu/installers/foundationdb-clients_5.2.5-1_amd64.deb>`_
* `foundationdb-server-5.2.5-1_amd64.deb <https://www.foundationdb.org/downloads/5.2.5/ubuntu/installers/foundationdb-server_5.2.5-1_amd64.deb>`_ (depends on the clients package)
RHEL/CentOS EL6
---------------
The RHEL/CentOS EL6 packages are supported on 64-bit RHEL/CentOS 6.x.
* `foundationdb-clients-5.2.4-1.el6.x86_64.rpm <https://www.foundationdb.org/downloads/5.2.4/rhel6/installers/foundationdb-clients-5.2.4-1.el6.x86_64.rpm>`_
* `foundationdb-server-5.2.4-1.el6.x86_64.rpm <https://www.foundationdb.org/downloads/5.2.4/rhel6/installers/foundationdb-server-5.2.4-1.el6.x86_64.rpm>`_ (depends on the clients package)
* `foundationdb-clients-5.2.5-1.el6.x86_64.rpm <https://www.foundationdb.org/downloads/5.2.5/rhel6/installers/foundationdb-clients-5.2.5-1.el6.x86_64.rpm>`_
* `foundationdb-server-5.2.5-1.el6.x86_64.rpm <https://www.foundationdb.org/downloads/5.2.5/rhel6/installers/foundationdb-server-5.2.5-1.el6.x86_64.rpm>`_ (depends on the clients package)
RHEL/CentOS EL7
---------------
The RHEL/CentOS EL7 packages are supported on 64-bit RHEL/CentOS 7.x.
* `foundationdb-clients-5.2.4-1.el7.x86_64.rpm <https://www.foundationdb.org/downloads/5.2.4/rhel7/installers/foundationdb-clients-5.2.4-1.el7.x86_64.rpm>`_
* `foundationdb-server-5.2.4-1.el7.x86_64.rpm <https://www.foundationdb.org/downloads/5.2.4/rhel7/installers/foundationdb-server-5.2.4-1.el7.x86_64.rpm>`_ (depends on the clients package)
* `foundationdb-clients-5.2.5-1.el7.x86_64.rpm <https://www.foundationdb.org/downloads/5.2.5/rhel7/installers/foundationdb-clients-5.2.5-1.el7.x86_64.rpm>`_
* `foundationdb-server-5.2.5-1.el7.x86_64.rpm <https://www.foundationdb.org/downloads/5.2.5/rhel7/installers/foundationdb-server-5.2.5-1.el7.x86_64.rpm>`_ (depends on the clients package)
Windows
-------
The Windows installer is supported on 64-bit Windows XP and later. It includes the client and (optionally) the server.
* `foundationdb-5.2.4-x64.msi <https://www.foundationdb.org/downloads/5.2.4/windows/installers/foundationdb-5.2.4-x64.msi>`_
* `foundationdb-5.2.5-x64.msi <https://www.foundationdb.org/downloads/5.2.5/windows/installers/foundationdb-5.2.5-x64.msi>`_
API Language Bindings
=====================
@ -58,18 +58,18 @@ On macOS and Windows, the FoundationDB Python API bindings are installed as part
If you need to use the FoundationDB Python API from other Python installations or paths, download the Python package:
* `foundationdb-5.2.4.tar.gz <https://www.foundationdb.org/downloads/5.2.4/bindings/python/foundationdb-5.2.4.tar.gz>`_
* `foundationdb-5.2.5.tar.gz <https://www.foundationdb.org/downloads/5.2.5/bindings/python/foundationdb-5.2.5.tar.gz>`_
Ruby 1.9.3/2.0.0+
-----------------
* `fdb-5.2.4.gem <https://www.foundationdb.org/downloads/5.2.4/bindings/ruby/fdb-5.2.4.gem>`_
* `fdb-5.2.5.gem <https://www.foundationdb.org/downloads/5.2.5/bindings/ruby/fdb-5.2.5.gem>`_
Java 8+
-------
* `fdb-java-5.2.4.jar <https://www.foundationdb.org/downloads/5.2.4/bindings/java/fdb-java-5.2.4.jar>`_
* `fdb-java-5.2.4-javadoc.jar <https://www.foundationdb.org/downloads/5.2.4/bindings/java/fdb-java-5.2.4-javadoc.jar>`_
* `fdb-java-5.2.5.jar <https://www.foundationdb.org/downloads/5.2.5/bindings/java/fdb-java-5.2.5.jar>`_
* `fdb-java-5.2.5-javadoc.jar <https://www.foundationdb.org/downloads/5.2.5/bindings/java/fdb-java-5.2.5-javadoc.jar>`_
Go 1.1+
-------

View File

@ -81,19 +81,18 @@ The following is a simple implementation of the basic pattern:
// Remove the top element from the queue.
public static Object dequeue(TransactionContext tcx){
final KeyValue item = firstItem(tcx);
if(item == null){
return null;
}
// Remove from the top of the queue.
tcx.run((Transaction tr) -> {
return tcx.run((Transaction tr) -> {
final KeyValue item = firstItem(tr);
if(item == null){
return null;
}
tr.clear(item.getKey());
return null;
// Return the old value.
return Tuple.fromBytes(item.getValue()).get(0);
});
// Return the old value.
return Tuple.fromBytes(item.getValue()).get(0);
}
// Add an element to the queue.

View File

@ -101,7 +101,7 @@ The default behavior when the certificate or key file is not specified is to loo
Default Peer Verification
^^^^^^^^^^^^^^^^^^^^^^^^^
The default peer verification is ``Check.Valid=0``.
The default peer verification is ``Check.Valid=1``.
Default Password
^^^^^^^^^^^^^^^^^^^^^^^^^

View File

@ -884,6 +884,12 @@ void printStatus(StatusObjectReader statusObj, StatusClient::StatusLevel level,
if (statusObjConfig.get("logs", intVal))
outputString += format("\n Desired Logs - %d", intVal);
if (statusObjConfig.get("remote_logs", intVal))
outputString += format("\n Desired Remote Logs - %d", intVal);
if (statusObjConfig.get("log_routers", intVal))
outputString += format("\n Desired Log Routers - %d", intVal);
}
catch (std::runtime_error& e) {
outputString = outputStringCache;

View File

@ -29,7 +29,7 @@ DatabaseConfiguration::DatabaseConfiguration()
void DatabaseConfiguration::resetInternal() {
// does NOT reset rawConfiguration
initialized = false;
masterProxyCount = resolverCount = desiredTLogCount = tLogWriteAntiQuorum = tLogReplicationFactor = storageTeamSize = -1;
masterProxyCount = resolverCount = desiredTLogCount = tLogWriteAntiQuorum = tLogReplicationFactor = storageTeamSize = desiredLogRouterCount = -1;
tLogDataStoreType = storageServerStoreType = KeyValueStoreType::END;
autoMasterProxyCount = CLIENT_KNOBS->DEFAULT_AUTO_PROXIES;
autoResolverCount = CLIENT_KNOBS->DEFAULT_AUTO_RESOLVERS;
@ -102,11 +102,19 @@ void parse( std::vector<RegionInfo>* regions, ValueRef const& v ) {
info.satelliteTLogUsableDcs = 2;
info.satelliteTLogWriteAntiQuorum = 0;
info.satelliteTLogPolicy = IRepPolicyRef(new PolicyAcross(2, "dcid", IRepPolicyRef(new PolicyAcross(2, "zoneid", IRepPolicyRef(new PolicyOne())))));
info.satelliteTLogReplicationFactorFallback = 2;
info.satelliteTLogUsableDcsFallback = 1;
info.satelliteTLogWriteAntiQuorumFallback = 0;
info.satelliteTLogPolicyFallback = IRepPolicyRef(new PolicyAcross(2, "zoneid", IRepPolicyRef(new PolicyOne())));
} else if(satelliteReplication == "two_satellite_fast") {
info.satelliteTLogReplicationFactor = 4;
info.satelliteTLogUsableDcs = 2;
info.satelliteTLogWriteAntiQuorum = 2;
info.satelliteTLogPolicy = IRepPolicyRef(new PolicyAcross(2, "dcid", IRepPolicyRef(new PolicyAcross(2, "zoneid", IRepPolicyRef(new PolicyOne())))));
info.satelliteTLogReplicationFactorFallback = 2;
info.satelliteTLogUsableDcsFallback = 1;
info.satelliteTLogWriteAntiQuorumFallback = 0;
info.satelliteTLogPolicyFallback = IRepPolicyRef(new PolicyAcross(2, "zoneid", IRepPolicyRef(new PolicyOne())));
} else {
throw invalid_option();
}
@ -114,6 +122,9 @@ void parse( std::vector<RegionInfo>* regions, ValueRef const& v ) {
dc.tryGet("satellite_log_replicas", info.satelliteTLogReplicationFactor);
dc.tryGet("satellite_usable_dcs", info.satelliteTLogUsableDcs);
dc.tryGet("satellite_anti_quorum", info.satelliteTLogWriteAntiQuorum);
dc.tryGet("satellite_log_replicas_fallback", info.satelliteTLogReplicationFactorFallback);
dc.tryGet("satellite_usable_dcs_fallback", info.satelliteTLogUsableDcsFallback);
dc.tryGet("satellite_anti_quorum_fallback", info.satelliteTLogWriteAntiQuorumFallback);
regions->push_back(info);
}
std::sort(regions->begin(), regions->end(), RegionInfo::sort_by_priority() );
@ -137,6 +148,9 @@ void DatabaseConfiguration::setDefaultReplicationPolicy() {
if(r.satelliteTLogReplicationFactor > 0 && !r.satelliteTLogPolicy) {
r.satelliteTLogPolicy = IRepPolicyRef(new PolicyAcross(r.satelliteTLogReplicationFactor, "zoneid", IRepPolicyRef(new PolicyOne())));
}
if(r.satelliteTLogReplicationFactorFallback > 0 && !r.satelliteTLogPolicyFallback) {
r.satelliteTLogPolicyFallback = IRepPolicyRef(new PolicyAcross(r.satelliteTLogReplicationFactorFallback, "zoneid", IRepPolicyRef(new PolicyOne())));
}
}
}
@ -172,7 +186,8 @@ bool DatabaseConfiguration::isValid() const {
r.satelliteTLogReplicationFactor >= 0 &&
r.satelliteTLogWriteAntiQuorum >= 0 &&
r.satelliteTLogUsableDcs >= 1 &&
( r.satelliteTLogReplicationFactor == 0 || ( r.satelliteTLogPolicy && r.satellites.size() ) ) ) ) {
( r.satelliteTLogReplicationFactor == 0 || ( r.satelliteTLogPolicy && r.satellites.size() ) ) &&
( r.satelliteTLogUsableDcsFallback == 0 || ( r.satelliteTLogReplicationFactor > 0 && r.satelliteTLogReplicationFactorFallback > 0 ) ) ) ) {
return false;
}
dcIds.insert(r.dcId);
@ -201,6 +216,8 @@ StatusObject DatabaseConfiguration::toJSON(bool noPolicies) const {
result["redundancy_mode"] = "double";
} else if( tLogReplicationFactor == 4 && storageTeamSize == 6 && tlogInfo == "dcid^2 x zoneid^2 x 1" && storageInfo == "dcid^3 x zoneid^2 x 1" ) {
result["redundancy_mode"] = "three_datacenter";
} else if( tLogReplicationFactor == 4 && storageTeamSize == 4 && tlogInfo == "dcid^2 x zoneid^2 x 1" && storageInfo == "dcid^2 x zoneid^2 x 1" ) {
result["redundancy_mode"] = "three_datacenter_fallback";
} else if( tLogReplicationFactor == 3 && storageTeamSize == 3 ) {
result["redundancy_mode"] = "triple";
} else if( tLogReplicationFactor == 4 && storageTeamSize == 3 && tlogInfo == "data_hall^2 x zoneid^2 x 1" && storageInfo == "data_hall^3 x 1" ) {
@ -254,21 +271,25 @@ StatusObject DatabaseConfiguration::toJSON(bool noPolicies) const {
dcObj["priority"] = r.priority;
dcArr.push_back(dcObj);
if(r.satelliteTLogReplicationFactor == 1 && r.satelliteTLogUsableDcs == 1 && r.satelliteTLogWriteAntiQuorum == 0) {
if(r.satelliteTLogReplicationFactor == 1 && r.satelliteTLogUsableDcs == 1 && r.satelliteTLogWriteAntiQuorum == 0 && r.satelliteTLogUsableDcsFallback == 0) {
regionObj["satellite_redundancy_mode"] = "one_satellite_single";
} else if(r.satelliteTLogReplicationFactor == 2 && r.satelliteTLogUsableDcs == 1 && r.satelliteTLogWriteAntiQuorum == 0) {
} else if(r.satelliteTLogReplicationFactor == 2 && r.satelliteTLogUsableDcs == 1 && r.satelliteTLogWriteAntiQuorum == 0 && r.satelliteTLogUsableDcsFallback == 0) {
regionObj["satellite_redundancy_mode"] = "one_satellite_double";
} else if(r.satelliteTLogReplicationFactor == 3 && r.satelliteTLogUsableDcs == 1 && r.satelliteTLogWriteAntiQuorum == 0) {
} else if(r.satelliteTLogReplicationFactor == 3 && r.satelliteTLogUsableDcs == 1 && r.satelliteTLogWriteAntiQuorum == 0 && r.satelliteTLogUsableDcsFallback == 0) {
regionObj["satellite_redundancy_mode"] = "one_satellite_triple";
} else if(r.satelliteTLogReplicationFactor == 4 && r.satelliteTLogUsableDcs == 2 && r.satelliteTLogWriteAntiQuorum == 0) {
} else if(r.satelliteTLogReplicationFactor == 4 && r.satelliteTLogUsableDcs == 2 && r.satelliteTLogWriteAntiQuorum == 0 && r.satelliteTLogUsableDcsFallback == 1 && r.satelliteTLogReplicationFactorFallback == 2 && r.satelliteTLogWriteAntiQuorumFallback == 0) {
regionObj["satellite_redundancy_mode"] = "two_satellite_safe";
} else if(r.satelliteTLogReplicationFactor == 4 && r.satelliteTLogUsableDcs == 2 && r.satelliteTLogWriteAntiQuorum == 2) {
} else if(r.satelliteTLogReplicationFactor == 4 && r.satelliteTLogUsableDcs == 2 && r.satelliteTLogWriteAntiQuorum == 2 && r.satelliteTLogUsableDcsFallback == 1 && r.satelliteTLogReplicationFactorFallback == 2 && r.satelliteTLogWriteAntiQuorumFallback == 0) {
regionObj["satellite_redundancy_mode"] = "two_satellite_fast";
} else if(r.satelliteTLogReplicationFactor != 0) {
regionObj["satellite_log_replicas"] = r.satelliteTLogReplicationFactor;
regionObj["satellite_usable_dcs"] = r.satelliteTLogUsableDcs;
regionObj["satellite_anti_quorum"] = r.satelliteTLogWriteAntiQuorum;
if(r.satelliteTLogPolicy) regionObj["satellite_log_policy"] = r.satelliteTLogPolicy->info();
regionObj["satellite_log_replicas_fallback"] = r.satelliteTLogReplicationFactorFallback;
regionObj["satellite_usable_dcs_fallback"] = r.satelliteTLogUsableDcsFallback;
regionObj["satellite_anti_quorum_fallback"] = r.satelliteTLogWriteAntiQuorumFallback;
if(r.satelliteTLogPolicyFallback) regionObj["satellite_log_policy_fallback"] = r.satelliteTLogPolicyFallback->info();
}
if( r.satelliteDesiredTLogCount != -1 ) {
@ -301,6 +322,9 @@ StatusObject DatabaseConfiguration::toJSON(bool noPolicies) const {
if( resolverCount != -1 ) {
result["resolvers"] = resolverCount;
}
if( desiredLogRouterCount != -1 ) {
result["log_routers"] = desiredLogRouterCount;
}
if( remoteDesiredTLogCount != -1 ) {
result["remote_logs"] = remoteDesiredTLogCount;
}
@ -344,6 +368,7 @@ bool DatabaseConfiguration::setInternal(KeyRef key, ValueRef value) {
else if (ck == LiteralStringRef("auto_logs")) parse(&autoDesiredTLogCount, value);
else if (ck == LiteralStringRef("storage_replication_policy")) parseReplicationPolicy(&storagePolicy, value);
else if (ck == LiteralStringRef("log_replication_policy")) parseReplicationPolicy(&tLogPolicy, value);
else if (ck == LiteralStringRef("log_routers")) parse(&desiredLogRouterCount, value);
else if (ck == LiteralStringRef("remote_logs")) parse(&remoteDesiredTLogCount, value);
else if (ck == LiteralStringRef("remote_log_replicas")) parse(&remoteTLogReplicationFactor, value);
else if (ck == LiteralStringRef("remote_log_policy")) parseReplicationPolicy(&remoteTLogPolicy, value);

View File

@ -55,9 +55,15 @@ struct RegionInfo {
int32_t satelliteTLogWriteAntiQuorum;
int32_t satelliteTLogUsableDcs;
IRepPolicyRef satelliteTLogPolicyFallback;
int32_t satelliteTLogReplicationFactorFallback;
int32_t satelliteTLogWriteAntiQuorumFallback;
int32_t satelliteTLogUsableDcsFallback;
std::vector<SatelliteInfo> satellites;
RegionInfo() : priority(0), satelliteDesiredTLogCount(-1), satelliteTLogReplicationFactor(0), satelliteTLogWriteAntiQuorum(0), satelliteTLogUsableDcs(1) {}
RegionInfo() : priority(0), satelliteDesiredTLogCount(-1), satelliteTLogReplicationFactor(0), satelliteTLogWriteAntiQuorum(0), satelliteTLogUsableDcs(1),
satelliteTLogReplicationFactorFallback(0), satelliteTLogWriteAntiQuorumFallback(0), satelliteTLogUsableDcsFallback(0) {}
struct sort_by_priority {
bool operator ()(RegionInfo const&a, RegionInfo const& b) const { return a.priority > b.priority; }
@ -65,7 +71,8 @@ struct RegionInfo {
template <class Ar>
void serialize(Ar& ar) {
ar & dcId & priority & satelliteTLogPolicy & satelliteDesiredTLogCount & satelliteTLogReplicationFactor & satelliteTLogWriteAntiQuorum & satelliteTLogUsableDcs & satellites;
ar & dcId & priority & satelliteTLogPolicy & satelliteDesiredTLogCount & satelliteTLogReplicationFactor & satelliteTLogWriteAntiQuorum & satelliteTLogUsableDcs &
satelliteTLogPolicyFallback & satelliteTLogReplicationFactorFallback & satelliteTLogWriteAntiQuorumFallback & satelliteTLogUsableDcsFallback & satellites;
}
};
@ -115,12 +122,12 @@ struct DatabaseConfiguration {
}
return minRequired;
}
int32_t minMachinesRequiredPerDatacenter() const {
int32_t minMachinesRequiredPerDatacenter() const {
int minRequired = std::max( remoteTLogReplicationFactor, std::max(tLogReplicationFactor, storageTeamSize) );
for(auto& r : regions) {
minRequired = std::max( minRequired, r.satelliteTLogReplicationFactor/std::max(1, r.satelliteTLogUsableDcs) );
}
return minRequired;
return minRequired;
}
//Killing an entire datacenter counts as killing one machine in modes that support it
@ -128,6 +135,9 @@ struct DatabaseConfiguration {
int worstSatellite = regions.size() ? std::numeric_limits<int>::max() : 0;
for(auto& r : regions) {
worstSatellite = std::min(worstSatellite, r.satelliteTLogReplicationFactor - r.satelliteTLogWriteAntiQuorum);
if(r.satelliteTLogUsableDcsFallback > 0) {
worstSatellite = std::min(worstSatellite, r.satelliteTLogReplicationFactorFallback - r.satelliteTLogWriteAntiQuorumFallback);
}
}
if(usableRegions > 1 && worstSatellite > 0) {
return 1 + std::min(std::max(tLogReplicationFactor - 1 - tLogWriteAntiQuorum, worstSatellite - 1), storageTeamSize - 1);
@ -159,6 +169,7 @@ struct DatabaseConfiguration {
KeyValueStoreType storageServerStoreType;
// Remote TLogs
int32_t desiredLogRouterCount;
int32_t remoteDesiredTLogCount;
int32_t remoteTLogReplicationFactor;
IRepPolicyRef remoteTLogPolicy;

View File

@ -604,7 +604,7 @@ static bool addressExcluded( std::set<AddressExclusion> const& exclusions, Netwo
struct ClusterControllerPriorityInfo {
enum DCFitness { FitnessPrimary, FitnessRemote, FitnessPreferred, FitnessUnknown, FitnessBad }; //cannot be larger than 7 because of leader election mask
static DCFitness calculateDCFitness(Optional<Key> dcId, vector<Optional<Key>> dcPriority) {
static DCFitness calculateDCFitness(Optional<Key> const& dcId, vector<Optional<Key>> const& dcPriority) {
if(!dcPriority.size()) {
return FitnessUnknown;
} else if(dcPriority.size() == 1) {

View File

@ -35,8 +35,8 @@ ClientKnobs::ClientKnobs(bool randomize) {
init( SYSTEM_MONITOR_INTERVAL, 5.0 );
init( FAILURE_MAX_DELAY, 10.0 ); if( randomize && BUGGIFY ) FAILURE_MAX_DELAY = 5.0;
init( FAILURE_MIN_DELAY, 5.0 ); if( randomize && BUGGIFY ) FAILURE_MIN_DELAY = 2.0;
init( FAILURE_MAX_DELAY, 5.0 );
init( FAILURE_MIN_DELAY, 4.0 ); if( randomize && BUGGIFY ) FAILURE_MIN_DELAY = 1.0;
init( FAILURE_TIMEOUT_DELAY, FAILURE_MIN_DELAY );
init( CLIENT_FAILURE_TIMEOUT_DELAY, FAILURE_MIN_DELAY );
@ -168,6 +168,11 @@ ClientKnobs::ClientKnobs(bool randomize) {
init( BLOBSTORE_MAX_SEND_BYTES_PER_SECOND, 1e9 );
init( BLOBSTORE_MAX_RECV_BYTES_PER_SECOND, 1e9 );
init( BLOBSTORE_LIST_REQUESTS_PER_SECOND, 25 );
init( BLOBSTORE_WRITE_REQUESTS_PER_SECOND, 50 );
init( BLOBSTORE_READ_REQUESTS_PER_SECOND, 100 );
init( BLOBSTORE_DELETE_REQUESTS_PER_SECOND, 200 );
// Client Status Info
init(CSI_SAMPLING_PROBABILITY, -1.0);
init(CSI_SIZE_LIMIT, std::numeric_limits<int64_t>::max());

View File

@ -155,6 +155,10 @@ public:
int BLOBSTORE_REQUEST_TRIES;
int BLOBSTORE_REQUEST_TIMEOUT;
int BLOBSTORE_REQUESTS_PER_SECOND;
int BLOBSTORE_LIST_REQUESTS_PER_SECOND;
int BLOBSTORE_WRITE_REQUESTS_PER_SECOND;
int BLOBSTORE_READ_REQUESTS_PER_SECOND;
int BLOBSTORE_DELETE_REQUESTS_PER_SECOND;
int BLOBSTORE_CONCURRENT_REQUESTS;
int BLOBSTORE_MULTIPART_MAX_PART_SIZE;
int BLOBSTORE_MULTIPART_MIN_PART_SIZE;

View File

@ -65,14 +65,14 @@ std::map<std::string, std::string> configForToken( std::string const& mode ) {
std::string key = mode.substr(0, pos);
std::string value = mode.substr(pos+1);
if( (key == "logs" || key == "proxies" || key == "resolvers" || key == "remote_logs" || key == "satellite_logs" || key == "usable_regions") && isInteger(value) ) {
if( (key == "logs" || key == "proxies" || key == "resolvers" || key == "remote_logs" || key == "log_routers" || key == "satellite_logs" || key == "usable_regions") && isInteger(value) ) {
out[p+key] = value;
}
if( key == "regions" ) {
json_spirit::mValue mv;
json_spirit::read_string( value, mv );
StatusObject regionObj;
regionObj["regions"] = mv;
out[p+key] = BinaryWriter::toValue(regionObj, IncludeVersion()).toString();
@ -125,6 +125,10 @@ std::map<std::string, std::string> configForToken( std::string const& mode ) {
tLogPolicy = IRepPolicyRef(new PolicyAcross(2, "dcid",
IRepPolicyRef(new PolicyAcross(2, "zoneid", IRepPolicyRef(new PolicyOne())))
));
} else if(mode == "three_datacenter_fallback") {
redundancy="4";
log_replicas="4";
storagePolicy = tLogPolicy = IRepPolicyRef(new PolicyAcross(2, "dcid", IRepPolicyRef(new PolicyAcross(2, "zoneid", IRepPolicyRef(new PolicyOne())))));
} else if(mode == "three_data_hall") {
redundancy="3";
log_replicas="4";
@ -340,6 +344,9 @@ ConfigureAutoResult parseConfig( StatusObject const& status ) {
} else if( result.old_replication == "three_datacenter" ) {
storage_replication = 6;
log_replication = 4;
} else if( result.old_replication == "three_datacenter_fallback" ) {
storage_replication = 4;
log_replication = 4;
} else
return ConfigureAutoResult();

View File

@ -40,6 +40,7 @@
#include "flow/Knobs.h"
#include "fdbclient/Knobs.h"
#include "fdbrpc/Net2FileSystem.h"
#include "fdbrpc/simulator.h"
#include <iterator>
@ -3013,7 +3014,7 @@ void Transaction::checkDeferredError() { cx->checkDeferredError(); }
Reference<TransactionLogInfo> Transaction::createTrLogInfoProbabilistically(const Database &cx) {
double clientSamplingProbability = std::isinf(cx->clientInfo->get().clientTxnInfoSampleRate) ? CLIENT_KNOBS->CSI_SAMPLING_PROBABILITY : cx->clientInfo->get().clientTxnInfoSampleRate;
if (((networkOptions.logClientInfo.present() && networkOptions.logClientInfo.get()) || BUGGIFY) && g_random->random01() < clientSamplingProbability)
if (((networkOptions.logClientInfo.present() && networkOptions.logClientInfo.get()) || BUGGIFY) && g_random->random01() < clientSamplingProbability && (!g_network->isSimulated() || !g_simulator.speedUpSimulation))
return Reference<TransactionLogInfo>(new TransactionLogInfo());
else
return Reference<TransactionLogInfo>();

View File

@ -57,6 +57,10 @@ BlobStoreEndpoint::BlobKnobs::BlobKnobs() {
request_timeout = CLIENT_KNOBS->BLOBSTORE_REQUEST_TIMEOUT;
requests_per_second = CLIENT_KNOBS->BLOBSTORE_REQUESTS_PER_SECOND;
concurrent_requests = CLIENT_KNOBS->BLOBSTORE_CONCURRENT_REQUESTS;
list_requests_per_second = CLIENT_KNOBS->BLOBSTORE_LIST_REQUESTS_PER_SECOND;
write_requests_per_second = CLIENT_KNOBS->BLOBSTORE_WRITE_REQUESTS_PER_SECOND;
read_requests_per_second = CLIENT_KNOBS->BLOBSTORE_READ_REQUESTS_PER_SECOND;
delete_requests_per_second = CLIENT_KNOBS->BLOBSTORE_DELETE_REQUESTS_PER_SECOND;
multipart_max_part_size = CLIENT_KNOBS->BLOBSTORE_MULTIPART_MAX_PART_SIZE;
multipart_min_part_size = CLIENT_KNOBS->BLOBSTORE_MULTIPART_MIN_PART_SIZE;
concurrent_uploads = CLIENT_KNOBS->BLOBSTORE_CONCURRENT_UPLOADS;
@ -79,6 +83,10 @@ bool BlobStoreEndpoint::BlobKnobs::set(StringRef name, int value) {
TRY_PARAM(request_tries, rt);
TRY_PARAM(request_timeout, rto);
TRY_PARAM(requests_per_second, rps);
TRY_PARAM(list_requests_per_second, lrps);
TRY_PARAM(write_requests_per_second, wrps);
TRY_PARAM(read_requests_per_second, rrps);
TRY_PARAM(delete_requests_per_second, drps);
TRY_PARAM(concurrent_requests, cr);
TRY_PARAM(multipart_max_part_size, maxps);
TRY_PARAM(multipart_min_part_size, minps);
@ -107,6 +115,10 @@ std::string BlobStoreEndpoint::BlobKnobs::getURLParameters() const {
_CHECK_PARAM(request_tries, rt);
_CHECK_PARAM(request_timeout, rto);
_CHECK_PARAM(requests_per_second, rps);
_CHECK_PARAM(list_requests_per_second, lrps);
_CHECK_PARAM(write_requests_per_second, wrps);
_CHECK_PARAM(read_requests_per_second, rrps);
_CHECK_PARAM(delete_requests_per_second, drps);
_CHECK_PARAM(concurrent_requests, cr);
_CHECK_PARAM(multipart_max_part_size, maxps);
_CHECK_PARAM(multipart_min_part_size, minps);
@ -195,6 +207,8 @@ std::string BlobStoreEndpoint::getResourceURL(std::string resource) {
}
ACTOR Future<bool> objectExists_impl(Reference<BlobStoreEndpoint> b, std::string bucket, std::string object) {
Void _ = wait(b->requestRateRead->getAllowance(1));
std::string resource = std::string("/") + bucket + "/" + object;
HTTP::Headers headers;
@ -207,6 +221,8 @@ Future<bool> BlobStoreEndpoint::objectExists(std::string const &bucket, std::str
}
ACTOR Future<Void> deleteObject_impl(Reference<BlobStoreEndpoint> b, std::string bucket, std::string object) {
Void _ = wait(b->requestRateDelete->getAllowance(1));
std::string resource = std::string("/") + bucket + "/" + object;
HTTP::Headers headers;
Reference<HTTP::Response> r = wait(b->doRequest("DELETE", resource, headers, NULL, 0, {200, 204, 404}));
@ -273,9 +289,10 @@ Future<Void> BlobStoreEndpoint::deleteRecursively(std::string const &bucket, std
}
ACTOR Future<Void> createBucket_impl(Reference<BlobStoreEndpoint> b, std::string bucket) {
Void _ = wait(b->requestRateWrite->getAllowance(1));
std::string resource = std::string("/") + bucket;
HTTP::Headers headers;
Reference<HTTP::Response> r = wait(b->doRequest("PUT", resource, headers, NULL, 0, {200, 409}));
return Void();
}
@ -285,6 +302,8 @@ Future<Void> BlobStoreEndpoint::createBucket(std::string const &bucket) {
}
ACTOR Future<int64_t> objectSize_impl(Reference<BlobStoreEndpoint> b, std::string bucket, std::string object) {
Void _ = wait(b->requestRateRead->getAllowance(1));
std::string resource = std::string("/") + bucket + "/" + object;
HTTP::Headers headers;
@ -789,6 +808,8 @@ void BlobStoreEndpoint::setAuthHeaders(std::string const &verb, std::string cons
}
ACTOR Future<std::string> readEntireFile_impl(Reference<BlobStoreEndpoint> bstore, std::string bucket, std::string object) {
Void _ = wait(bstore->requestRateRead->getAllowance(1));
std::string resource = std::string("/") + bucket + "/" + object;
HTTP::Headers headers;
Reference<HTTP::Response> r = wait(bstore->doRequest("GET", resource, headers, NULL, 0, {200, 404}));
@ -805,6 +826,7 @@ ACTOR Future<Void> writeEntireFileFromBuffer_impl(Reference<BlobStoreEndpoint> b
if(contentLen > bstore->knobs.multipart_max_part_size)
throw file_too_large();
Void _ = wait(bstore->requestRateWrite->getAllowance(1));
Void _ = wait(bstore->concurrentUploads.take());
state FlowLock::Releaser uploadReleaser(bstore->concurrentUploads, 1);
@ -856,6 +878,8 @@ Future<Void> BlobStoreEndpoint::writeEntireFileFromBuffer(std::string const &buc
ACTOR Future<int> readObject_impl(Reference<BlobStoreEndpoint> bstore, std::string bucket, std::string object, void *data, int length, int64_t offset) {
if(length <= 0)
return 0;
Void _ = wait(bstore->requestRateRead->getAllowance(1));
std::string resource = std::string("/") + bucket + "/" + object;
HTTP::Headers headers;
headers["Range"] = format("bytes=%lld-%lld", offset, offset + length - 1);
@ -874,6 +898,8 @@ Future<int> BlobStoreEndpoint::readObject(std::string const &bucket, std::string
}
ACTOR static Future<std::string> beginMultiPartUpload_impl(Reference<BlobStoreEndpoint> bstore, std::string bucket, std::string object) {
Void _ = wait(bstore->requestRateWrite->getAllowance(1));
std::string resource = std::string("/") + bucket + "/" + object + "?uploads";
HTTP::Headers headers;
Reference<HTTP::Response> r = wait(bstore->doRequest("POST", resource, headers, NULL, 0, {200}));
@ -892,6 +918,7 @@ Future<std::string> BlobStoreEndpoint::beginMultiPartUpload(std::string const &b
}
ACTOR Future<std::string> uploadPart_impl(Reference<BlobStoreEndpoint> bstore, std::string bucket, std::string object, std::string uploadID, unsigned int partNumber, UnsentPacketQueue *pContent, int contentLen, std::string contentMD5) {
Void _ = wait(bstore->requestRateWrite->getAllowance(1));
Void _ = wait(bstore->concurrentUploads.take());
state FlowLock::Releaser uploadReleaser(bstore->concurrentUploads, 1);
@ -921,6 +948,7 @@ Future<std::string> BlobStoreEndpoint::uploadPart(std::string const &bucket, std
ACTOR Future<Void> finishMultiPartUpload_impl(Reference<BlobStoreEndpoint> bstore, std::string bucket, std::string object, std::string uploadID, BlobStoreEndpoint::MultiPartSetT parts) {
state UnsentPacketQueue part_list(); // NonCopyable state var so must be declared at top of actor
Void _ = wait(bstore->requestRateWrite->getAllowance(1));
std::string manifest = "<CompleteMultipartUpload>";
for(auto &p : parts)

View File

@ -55,6 +55,10 @@ public:
request_tries,
request_timeout,
requests_per_second,
list_requests_per_second,
write_requests_per_second,
read_requests_per_second,
delete_requests_per_second,
multipart_max_part_size,
multipart_min_part_size,
concurrent_requests,
@ -78,6 +82,10 @@ public:
"request_tries (or rt) Number of times to try each request until a parseable HTTP response other than 429 is received.",
"request_timeout (or rto) Number of seconds to wait for a request to succeed after a connection is established.",
"requests_per_second (or rps) Max number of requests to start per second.",
"list_requests_per_second (or lrps) Max number of list requests to start per second.",
"write_requests_per_second (or wrps) Max number of write requests to start per second.",
"read_requests_per_second (or rrps) Max number of read requests to start per second.",
"delete_requests_per_second (or drps) Max number of delete requests to start per second.",
"multipart_max_part_size (or maxps) Max part size for multipart uploads.",
"multipart_min_part_size (or minps) Min part size for multipart uploads.",
"concurrent_requests (or cr) Max number of total requests in progress at once, regardless of operation-specific concurrency limits.",
@ -97,6 +105,10 @@ public:
BlobStoreEndpoint(std::string const &host, std::string service, std::string const &key, std::string const &secret, BlobKnobs const &knobs = BlobKnobs())
: host(host), service(service), key(key), secret(secret), lookupSecret(secret.empty()), knobs(knobs),
requestRate(new SpeedLimit(knobs.requests_per_second, 1)),
requestRateList(new SpeedLimit(knobs.list_requests_per_second, 1)),
requestRateWrite(new SpeedLimit(knobs.write_requests_per_second, 1)),
requestRateRead(new SpeedLimit(knobs.read_requests_per_second, 1)),
requestRateDelete(new SpeedLimit(knobs.delete_requests_per_second, 1)),
sendRate(new SpeedLimit(knobs.max_send_bytes_per_second, 1)),
recvRate(new SpeedLimit(knobs.max_recv_bytes_per_second, 1)),
concurrentRequests(knobs.concurrent_requests),
@ -135,6 +147,10 @@ public:
// Speed and concurrency limits
Reference<IRateControl> requestRate;
Reference<IRateControl> requestRateList;
Reference<IRateControl> requestRateWrite;
Reference<IRateControl> requestRateRead;
Reference<IRateControl> requestRateDelete;
Reference<IRateControl> sendRate;
Reference<IRateControl> recvRate;
FlowLock concurrentRequests;

View File

@ -137,6 +137,8 @@ ProcessClass::Fitness ProcessClass::machineClassFitness( ClusterRole role ) cons
return ProcessClass::OkayFit;
case ProcessClass::ProxyClass:
return ProcessClass::OkayFit;
case ProcessClass::LogRouterClass:
return ProcessClass::OkayFit;
case ProcessClass::UnsetClass:
return ProcessClass::UnsetFit;
case ProcessClass::TesterClass:

View File

@ -27,7 +27,7 @@
struct ProcessClass {
// This enum is stored in restartInfo.ini for upgrade tests, so be very careful about changing the existing items!
enum ClassType { UnsetClass, StorageClass, TransactionClass, ResolutionClass, TesterClass, ProxyClass, MasterClass, StatelessClass, LogClass, ClusterControllerClass, LogRouterClass, InvalidClass = -1 };
enum Fitness { BestFit, GoodFit, OkayFit, UnsetFit, WorstFit, ExcludeFit, NeverAssign }; //cannot be larger than 7 because of leader election mask
enum Fitness { BestFit, GoodFit, UnsetFit, OkayFit, WorstFit, ExcludeFit, NeverAssign }; //cannot be larger than 7 because of leader election mask
enum ClusterRole { Storage, TLog, Proxy, Master, Resolver, LogRouter, ClusterController };
enum ClassSource { CommandLineSource, AutoSource, DBSource, InvalidSource = -1 };
int16_t _class;

View File

@ -91,7 +91,7 @@ TLSConnection::TLSConnection( Reference<IConnection> const& conn, Reference<ITLS
// If session is NULL, we're trusting policy->create_session
// to have used its provided logging function to have logged
// the error
throw internal_error();
throw tls_error();
}
handshook = handshake(this);
}
@ -325,7 +325,7 @@ Reference<ITLSPolicy> TLSOptions::get_policy(PolicyType type) {
if (platform::getEnvironmentVar("FDB_TLS_VERIFY_PEERS", verifyPeerString))
set_verify_peers({ verifyPeerString });
else
set_verify_peers({ std::string("Check.Valid=0")});
set_verify_peers({ std::string("Check.Valid=1")});
}
if (!ca_set) {
std::string caFile;

View File

@ -339,6 +339,43 @@ public:
return results;
}
//FIXME: This logic will fallback unnecessarily when usable dcs > 1 because it does not check all combinations of potential satellite locations
std::vector<std::pair<WorkerInterface, ProcessClass>> getWorkersForSatelliteLogs( const DatabaseConfiguration& conf, const RegionInfo& region, std::map< Optional<Standalone<StringRef>>, int>& id_used, bool& satelliteFallback, bool checkStable = false ) {
int startDC = 0;
loop {
if(startDC > 0 && startDC >= region.satellites.size() + 1 - (satelliteFallback ? region.satelliteTLogUsableDcsFallback : region.satelliteTLogUsableDcs)) {
if(satelliteFallback || region.satelliteTLogUsableDcsFallback == 0) {
throw no_more_servers();
} else {
if(now() - startTime < SERVER_KNOBS->WAIT_FOR_GOOD_REMOTE_RECRUITMENT_DELAY) {
throw operation_failed();
}
satelliteFallback = true;
startDC = 0;
}
}
try {
std::set<Optional<Key>> satelliteDCs;
for(int s = startDC; s < std::min<int>(startDC + (satelliteFallback ? region.satelliteTLogUsableDcsFallback : region.satelliteTLogUsableDcs), region.satellites.size()); s++) {
satelliteDCs.insert(region.satellites[s].dcId);
}
if(satelliteFallback) {
return getWorkersForTlogs( conf, region.satelliteTLogReplicationFactorFallback, conf.getDesiredSatelliteLogs(region.dcId)*region.satelliteTLogUsableDcsFallback/region.satelliteTLogUsableDcs, region.satelliteTLogPolicyFallback, id_used, checkStable, satelliteDCs );
} else {
return getWorkersForTlogs( conf, region.satelliteTLogReplicationFactor, conf.getDesiredSatelliteLogs(region.dcId), region.satelliteTLogPolicy, id_used, checkStable, satelliteDCs );
}
} catch (Error &e) {
if(e.code() != error_code_no_more_servers) {
throw;
}
}
startDC++;
}
}
WorkerFitnessInfo getWorkerForRoleInDatacenter(Optional<Standalone<StringRef>> const& dcId, ProcessClass::ClusterRole role, ProcessClass::Fitness unacceptableFitness, DatabaseConfiguration const& conf, std::map< Optional<Standalone<StringRef>>, int>& id_used, bool checkStable = false ) {
std::map<std::pair<ProcessClass::Fitness,int>, vector<std::pair<WorkerInterface, ProcessClass>>> fitness_workers;
@ -439,7 +476,14 @@ public:
return false;
}
bool betterCount (RoleFitness const& r) const {
if(count > r.count) return true;
return worstFit < r.worstFit;
}
bool operator == (RoleFitness const& r) const { return worstFit == r.worstFit && bestFit == r.bestFit && count == r.count; }
std::string toString() const { return format("%d %d &d", bestFit, worstFit, count); }
};
std::set<Optional<Standalone<StringRef>>> getDatacenters( DatabaseConfiguration const& conf, bool checkStable = false ) {
@ -459,7 +503,6 @@ public:
std::set<Optional<Key>> remoteDC;
remoteDC.insert(req.dcId);
auto remoteLogs = getWorkersForTlogs( req.configuration, req.configuration.getRemoteTLogReplicationFactor(), req.configuration.getDesiredRemoteLogs(), req.configuration.getRemoteTLogPolicy(), id_used, false, remoteDC );
for(int i = 0; i < remoteLogs.size(); i++) {
@ -471,9 +514,13 @@ public:
result.logRouters.push_back(logRouters[i].first);
}
if( now() - startTime < SERVER_KNOBS->WAIT_FOR_GOOD_RECRUITMENT_DELAY &&
( ( RoleFitness(remoteLogs, ProcessClass::TLog) > RoleFitness(SERVER_KNOBS->EXPECTED_TLOG_FITNESS, req.configuration.getDesiredLogs()) ) ||
( RoleFitness(logRouters, ProcessClass::LogRouter) > RoleFitness(SERVER_KNOBS->EXPECTED_LOG_ROUTER_FITNESS, req.logRouterCount) ) ) ) {
if(!remoteStartTime.present()) {
remoteStartTime = now();
}
if( now() - remoteStartTime.get() < SERVER_KNOBS->WAIT_FOR_GOOD_REMOTE_RECRUITMENT_DELAY &&
( ( RoleFitness(SERVER_KNOBS->EXPECTED_TLOG_FITNESS, req.configuration.getDesiredRemoteLogs()).betterCount(RoleFitness(remoteLogs, ProcessClass::TLog)) ) ||
( RoleFitness(SERVER_KNOBS->EXPECTED_LOG_ROUTER_FITNESS, req.logRouterCount).betterCount(RoleFitness(logRouters, ProcessClass::LogRouter)) ) ) ) {
throw operation_failed();
}
@ -487,11 +534,11 @@ public:
id_used[clusterControllerProcessId]++;
ASSERT(dcId.present());
std::set<Optional<Key>> primaryDC;
primaryDC.insert(dcId);
result.dcId = dcId;
RegionInfo region;
for(auto& r : req.configuration.regions) {
if(r.dcId == dcId.get()) {
@ -499,14 +546,14 @@ public:
break;
}
}
if(req.recruitSeedServers) {
auto primaryStorageServers = getWorkersForSeedServers( req.configuration, req.configuration.storagePolicy, dcId );
for(int i = 0; i < primaryStorageServers.size(); i++) {
result.storageServers.push_back(primaryStorageServers[i].first);
}
}
auto tlogs = getWorkersForTlogs( req.configuration, req.configuration.tLogReplicationFactor, req.configuration.getDesiredLogs(), req.configuration.tLogPolicy, id_used, false, primaryDC );
for(int i = 0; i < tlogs.size(); i++) {
result.tLogs.push_back(tlogs[i].first);
@ -514,31 +561,9 @@ public:
std::vector<std::pair<WorkerInterface, ProcessClass>> satelliteLogs;
if(region.satelliteTLogReplicationFactor > 0) {
int startDC = 0;
loop {
if(startDC > 0 && startDC >= region.satellites.size() + 1 - region.satelliteTLogUsableDcs) {
throw no_more_servers();
}
try {
std::set<Optional<Key>> satelliteDCs;
for(int s = startDC; s < std::min<int>(startDC + region.satelliteTLogUsableDcs, region.satellites.size()); s++) {
satelliteDCs.insert(region.satellites[s].dcId);
}
satelliteLogs = getWorkersForTlogs( req.configuration, region.satelliteTLogReplicationFactor, req.configuration.getDesiredSatelliteLogs(dcId), region.satelliteTLogPolicy, id_used, false, satelliteDCs );
for(int i = 0; i < satelliteLogs.size(); i++) {
result.satelliteTLogs.push_back(satelliteLogs[i].first);
}
break;
} catch (Error &e) {
if(e.code() != error_code_no_more_servers) {
throw;
}
}
startDC++;
satelliteLogs = getWorkersForSatelliteLogs( req.configuration, region, id_used, result.satelliteFallback );
for(int i = 0; i < satelliteLogs.size(); i++) {
result.satelliteTLogs.push_back(satelliteLogs[i].first);
}
}
@ -562,10 +587,10 @@ public:
}
if( now() - startTime < SERVER_KNOBS->WAIT_FOR_GOOD_RECRUITMENT_DELAY &&
( RoleFitness(tlogs, ProcessClass::TLog) > RoleFitness(SERVER_KNOBS->EXPECTED_TLOG_FITNESS, req.configuration.getDesiredLogs()) ||
( region.satelliteTLogReplicationFactor > 0 && RoleFitness(satelliteLogs, ProcessClass::TLog) > RoleFitness(SERVER_KNOBS->EXPECTED_TLOG_FITNESS, req.configuration.getDesiredSatelliteLogs(dcId)) ) ||
RoleFitness(proxies, ProcessClass::Proxy) > RoleFitness(SERVER_KNOBS->EXPECTED_PROXY_FITNESS, req.configuration.getDesiredProxies()) ||
RoleFitness(resolvers, ProcessClass::Resolver) > RoleFitness(SERVER_KNOBS->EXPECTED_RESOLVER_FITNESS, req.configuration.getDesiredResolvers()) ) ) {
( RoleFitness(SERVER_KNOBS->EXPECTED_TLOG_FITNESS, req.configuration.getDesiredLogs()).betterCount(RoleFitness(tlogs, ProcessClass::TLog)) ||
( region.satelliteTLogReplicationFactor > 0 && RoleFitness(SERVER_KNOBS->EXPECTED_TLOG_FITNESS, req.configuration.getDesiredSatelliteLogs(dcId)).betterCount(RoleFitness(satelliteLogs, ProcessClass::TLog)) ) ||
RoleFitness(SERVER_KNOBS->EXPECTED_PROXY_FITNESS, req.configuration.getDesiredProxies()).betterCount(RoleFitness(proxies, ProcessClass::Proxy)) ||
RoleFitness(SERVER_KNOBS->EXPECTED_RESOLVER_FITNESS, req.configuration.getDesiredResolvers()).betterCount(RoleFitness(resolvers, ProcessClass::Resolver)) ) ) {
return operation_failed();
}
@ -593,7 +618,11 @@ public:
}
throw no_more_servers();
} catch( Error& e ) {
if (e.code() != error_code_no_more_servers || regions[1].priority < 0 || now() - startTime < SERVER_KNOBS->WAIT_FOR_GOOD_RECRUITMENT_DELAY) {
if(now() - startTime < SERVER_KNOBS->WAIT_FOR_GOOD_REMOTE_RECRUITMENT_DELAY && (!clusterControllerDcId.present() || regions[1].dcId != clusterControllerDcId.get())) {
throw operation_failed();
}
if (e.code() != error_code_no_more_servers || regions[1].priority < 0) {
throw;
}
TraceEvent(SevWarn, "AttemptingRecruitmentInRemoteDC", id).error(e);
@ -703,8 +732,8 @@ public:
.detail("DesiredResolvers", req.configuration.getDesiredResolvers()).detail("ActualResolvers", result.resolvers.size());
if( now() - startTime < SERVER_KNOBS->WAIT_FOR_GOOD_RECRUITMENT_DELAY &&
( RoleFitness(tlogs, ProcessClass::TLog) > RoleFitness(SERVER_KNOBS->EXPECTED_TLOG_FITNESS, req.configuration.getDesiredLogs()) ||
bestFitness > RoleFitness(std::min(SERVER_KNOBS->EXPECTED_PROXY_FITNESS, SERVER_KNOBS->EXPECTED_RESOLVER_FITNESS), std::max(SERVER_KNOBS->EXPECTED_PROXY_FITNESS, SERVER_KNOBS->EXPECTED_RESOLVER_FITNESS), req.configuration.getDesiredProxies()+req.configuration.getDesiredResolvers()) ) ) {
( RoleFitness(SERVER_KNOBS->EXPECTED_TLOG_FITNESS, req.configuration.getDesiredLogs()).betterCount(RoleFitness(tlogs, ProcessClass::TLog)) ||
RoleFitness(std::min(SERVER_KNOBS->EXPECTED_PROXY_FITNESS, SERVER_KNOBS->EXPECTED_RESOLVER_FITNESS), std::max(SERVER_KNOBS->EXPECTED_PROXY_FITNESS, SERVER_KNOBS->EXPECTED_RESOLVER_FITNESS), req.configuration.getDesiredProxies()+req.configuration.getDesiredResolvers()).betterCount(bestFitness) ) ) {
throw operation_failed();
}
@ -716,22 +745,19 @@ public:
if(desiredDcIds.get().present() && desiredDcIds.get().get().size() == 2 && desiredDcIds.get().get()[0].get() == regions[0].dcId && desiredDcIds.get().get()[1].get() == regions[1].dcId) {
return;
}
try {
std::map< Optional<Standalone<StringRef>>, int> id_used;
getWorkerForRoleInDatacenter(regions[0].dcId, ProcessClass::ClusterController, ProcessClass::ExcludeFit, db.config, id_used, true);
getWorkerForRoleInDatacenter(regions[0].dcId, ProcessClass::Master, ProcessClass::ExcludeFit, db.config, id_used, true);
std::set<Optional<Key>> primaryDC;
primaryDC.insert(regions[0].dcId);
getWorkersForTlogs(db.config, db.config.tLogReplicationFactor, db.config.desiredTLogCount, db.config.tLogPolicy, id_used, true, primaryDC);
if(regions[0].satelliteTLogReplicationFactor > 0) {
std::set<Optional<Key>> satelliteDCs;
for(auto &s : regions[0].satellites) {
satelliteDCs.insert(s.dcId);
}
getWorkersForTlogs(db.config, regions[0].satelliteTLogReplicationFactor, db.config.getDesiredSatelliteLogs(regions[0].dcId), regions[0].satelliteTLogPolicy, id_used, true, satelliteDCs);
bool satelliteFallback = false;
getWorkersForSatelliteLogs(db.config, regions[0], id_used, satelliteFallback, true);
}
getWorkerForRoleInDatacenter( regions[0].dcId, ProcessClass::Resolver, ProcessClass::ExcludeFit, db.config, id_used, true );
@ -856,7 +882,6 @@ public:
return true;
std::set<Optional<Key>> primaryDC;
std::set<Optional<Key>> satelliteDCs;
std::set<Optional<Key>> remoteDC;
RegionInfo region;
@ -871,12 +896,6 @@ public:
region = r;
}
}
if(region.satelliteTLogReplicationFactor > 0) {
for(auto &s : region.satellites) {
satelliteDCs.insert(s.dcId);
}
}
}
// Check tLog fitness
@ -885,23 +904,38 @@ public:
if(oldTLogFit < newTLogFit) return false;
RoleFitness oldSatelliteTLogFit(satellite_tlogs, ProcessClass::TLog);
RoleFitness newSatelliteTLogFit(region.satelliteTLogReplicationFactor > 0 ? getWorkersForTlogs(db.config, region.satelliteTLogReplicationFactor, db.config.getDesiredSatelliteLogs(clusterControllerDcId), region.satelliteTLogPolicy, id_used, true, satelliteDCs) : satellite_tlogs, ProcessClass::TLog);
bool oldSatelliteFallback = false;
for(auto& logSet : dbi.logSystemConfig.tLogs) {
if(logSet.isLocal && logSet.locality == tagLocalitySatellite) {
oldSatelliteFallback = logSet.tLogPolicy->info() != region.satelliteTLogPolicy->info();
ASSERT(!oldSatelliteFallback || logSet.tLogPolicy->info() == region.satelliteTLogPolicyFallback->info());
break;
}
}
if(oldSatelliteTLogFit < newSatelliteTLogFit) return false;
RoleFitness oldSatelliteTLogFit(satellite_tlogs, ProcessClass::TLog);
bool newSatelliteFallback = false;
RoleFitness newSatelliteTLogFit(region.satelliteTLogReplicationFactor > 0 ? getWorkersForSatelliteLogs(db.config, region, id_used, newSatelliteFallback, true) : satellite_tlogs, ProcessClass::TLog);
if(oldSatelliteTLogFit < newSatelliteTLogFit)
return false;
if(!oldSatelliteFallback && newSatelliteFallback)
return false;
RoleFitness oldRemoteTLogFit(remote_tlogs, ProcessClass::TLog);
RoleFitness newRemoteTLogFit((db.config.usableRegions > 1 && dbi.recoveryState == RecoveryState::REMOTE_RECOVERED) ? getWorkersForTlogs(db.config, db.config.getRemoteTLogReplicationFactor(), db.config.getDesiredRemoteLogs(), db.config.getRemoteTLogPolicy(), id_used, true, remoteDC) : remote_tlogs, ProcessClass::TLog);
if(oldRemoteTLogFit < newRemoteTLogFit) return false;
int oldRouterCount = oldTLogFit.count * std::max<int>(1, db.config.desiredLogRouterCount / std::max(1,oldTLogFit.count));
int newRouterCount = newTLogFit.count * std::max<int>(1, db.config.desiredLogRouterCount / std::max(1,newTLogFit.count));
RoleFitness oldLogRoutersFit(log_routers, ProcessClass::LogRouter);
RoleFitness newLogRoutersFit((db.config.usableRegions > 1 && dbi.recoveryState == RecoveryState::REMOTE_RECOVERED) ? getWorkersForRoleInDatacenter( *remoteDC.begin(), ProcessClass::LogRouter, newTLogFit.count, db.config, id_used, Optional<WorkerFitnessInfo>(), true ) : log_routers, ProcessClass::LogRouter);
RoleFitness newLogRoutersFit((db.config.usableRegions > 1 && dbi.recoveryState == RecoveryState::REMOTE_RECOVERED) ? getWorkersForRoleInDatacenter( *remoteDC.begin(), ProcessClass::LogRouter, newRouterCount, db.config, id_used, Optional<WorkerFitnessInfo>(), true ) : log_routers, ProcessClass::LogRouter);
if(oldLogRoutersFit.count < oldTLogFit.count) {
if(oldLogRoutersFit.count < oldRouterCount) {
oldLogRoutersFit.worstFit = ProcessClass::NeverAssign;
}
if(newLogRoutersFit.count < newTLogFit.count) {
if(newLogRoutersFit.count < newRouterCount) {
newLogRoutersFit.worstFit = ProcessClass::NeverAssign;
}
@ -922,14 +956,14 @@ public:
if(oldInFit.betterFitness(newInFit)) return false;
if(oldTLogFit > newTLogFit || oldInFit > newInFit || oldSatelliteTLogFit > newSatelliteTLogFit || oldRemoteTLogFit > newRemoteTLogFit || oldLogRoutersFit > newLogRoutersFit) {
if(oldTLogFit > newTLogFit || oldInFit > newInFit || (oldSatelliteFallback && !newSatelliteFallback) || oldSatelliteTLogFit > newSatelliteTLogFit || oldRemoteTLogFit > newRemoteTLogFit || oldLogRoutersFit > newLogRoutersFit) {
TraceEvent("BetterMasterExists", id).detail("OldMasterFit", oldMasterFit).detail("NewMasterFit", mworker.fitness)
.detail("OldTLogFitC", oldTLogFit.count).detail("NewTLogFitC", newTLogFit.count)
.detail("OldTLogWorstFitT", oldTLogFit.worstFit).detail("NewTLogWorstFitT", newTLogFit.worstFit)
.detail("OldTLogBestFitT", oldTLogFit.bestFit).detail("NewTLogBestFitT", newTLogFit.bestFit)
.detail("OldInFitW", oldInFit.worstFit).detail("NewInFitW", newInFit.worstFit)
.detail("OldInFitB", oldInFit.bestFit).detail("NewInFitB", newInFit.bestFit)
.detail("OldInFitC", oldInFit.count).detail("NewInFitC", newInFit.count);
.detail("OldTLogFit", oldTLogFit.toString()).detail("NewTLogFit", newTLogFit.toString())
.detail("OldInFit", oldInFit.toString()).detail("NewInFit", newInFit.toString())
.detail("OldSatelliteFit", oldSatelliteTLogFit.toString()).detail("NewSatelliteFit", newSatelliteTLogFit.toString())
.detail("OldRemoteFit", oldRemoteTLogFit.toString()).detail("NewRemoteFit", newRemoteTLogFit.toString())
.detail("OldRouterFit", oldLogRoutersFit.toString()).detail("NewRouterFit", newLogRoutersFit.toString())
.detail("OldSatelliteFallback", oldSatelliteFallback).detail("NewSatelliteFallback", newSatelliteFallback);
return true;
}
@ -945,24 +979,25 @@ public:
Optional<Standalone<StringRef>> clusterControllerProcessId;
Optional<Standalone<StringRef>> clusterControllerDcId;
AsyncVar<Optional<vector<Optional<Key>>>> desiredDcIds; //desired DC priorities
AsyncVar<Optional<vector<Optional<Key>>>> changingDcIds; //current DC priorities for everyone other than the cluster controller process
AsyncVar<Optional<vector<Optional<Key>>>> changedDcIds; //current DC priority for the cluster controller process
AsyncVar<std::pair<bool,Optional<vector<Optional<Key>>>>> changingDcIds; //current DC priorities to change first, and whether that is the cluster controller
AsyncVar<std::pair<bool,Optional<vector<Optional<Key>>>>> changedDcIds; //current DC priorities to change second, and whether the cluster controller has been changed
UID id;
std::vector<RecruitFromConfigurationRequest> outstandingRecruitmentRequests;
std::vector<RecruitRemoteFromConfigurationRequest> outstandingRemoteRecruitmentRequests;
std::vector<std::pair<RecruitStorageRequest, double>> outstandingStorageRequests;
ActorCollection ac;
UpdateWorkerList updateWorkerList;
Future<Void> betterMasterExistsChecker;
Future<Void> outstandingRequestChecker;
DBInfo db;
Database cx;
double startTime;
Optional<double> remoteStartTime;
Version datacenterVersionDifference;
bool versionDifferenceUpdated;
explicit ClusterControllerData( ClusterControllerFullInterface ccInterface )
: id(ccInterface.id()), ac(false), betterMasterExistsChecker(Void()), gotProcessClasses(false), gotFullyRecoveredConfig(false), startTime(now()), datacenterVersionDifference(0), versionDifferenceUpdated(false)
: id(ccInterface.id()), ac(false), outstandingRequestChecker(Void()), gotProcessClasses(false), gotFullyRecoveredConfig(false), startTime(now()), datacenterVersionDifference(0), versionDifferenceUpdated(false)
{
auto serverInfo = db.serverInfo->get();
serverInfo.id = g_random->randomUniqueID();
@ -1214,31 +1249,34 @@ void checkOutstandingStorageRequests( ClusterControllerData* self ) {
}
}
ACTOR Future<Void> doCheckOutstandingMasterRequests( ClusterControllerData* self ) {
Void _ = wait( delay(SERVER_KNOBS->CHECK_BETTER_MASTER_INTERVAL) );
if (self->betterMasterExists()) {
if (!self->db.forceMasterFailure.isSet()) {
self->db.forceMasterFailure.send( Void() );
TraceEvent("MasterRegistrationKill", self->id).detail("MasterId", self->db.serverInfo->get().master.id());
ACTOR Future<Void> doCheckOutstandingRequests( ClusterControllerData* self ) {
try {
Void _ = wait( delay(SERVER_KNOBS->CHECK_OUTSTANDING_INTERVAL) );
checkOutstandingRecruitmentRequests( self );
checkOutstandingRemoteRecruitmentRequests( self );
checkOutstandingStorageRequests( self );
self->checkRecoveryStalled();
if (self->betterMasterExists()) {
if (!self->db.forceMasterFailure.isSet()) {
self->db.forceMasterFailure.send( Void() );
TraceEvent("MasterRegistrationKill", self->id).detail("MasterId", self->db.serverInfo->get().master.id());
}
}
} catch( Error &e ) {
if(e.code() != error_code_operation_failed && e.code() != error_code_no_more_servers) {
TraceEvent(SevError, "CheckOutstandingError").error(e);
}
}
return Void();
}
void checkOutstandingMasterRequests( ClusterControllerData* self ) {
self->checkRecoveryStalled();
if( !self->betterMasterExistsChecker.isReady() )
void checkOutstandingRequests( ClusterControllerData* self ) {
if( !self->outstandingRequestChecker.isReady() )
return;
self->betterMasterExistsChecker = doCheckOutstandingMasterRequests(self);
}
void checkOutstandingRequests( ClusterControllerData* self ) {
checkOutstandingRecruitmentRequests( self );
checkOutstandingRemoteRecruitmentRequests( self );
checkOutstandingStorageRequests( self );
checkOutstandingMasterRequests( self );
self->outstandingRequestChecker = doCheckOutstandingRequests(self);
}
ACTOR Future<Void> rebootAndCheck( ClusterControllerData* cluster, Optional<Standalone<StringRef>> processID ) {
@ -1252,7 +1290,7 @@ ACTOR Future<Void> rebootAndCheck( ClusterControllerData* cluster, Optional<Stan
if(watcher != cluster->id_worker.end()) {
watcher->second.reboots--;
if( watcher->second.reboots < 2 )
checkOutstandingMasterRequests( cluster );
checkOutstandingRequests( cluster );
}
return Void();
@ -1481,7 +1519,7 @@ ACTOR Future<Void> clusterRecruitRemoteFromConfiguration( ClusterControllerData*
req.reply.send( self->findRemoteWorkersForConfiguration( req ) );
return Void();
} catch (Error& e) {
if (e.code() == error_code_no_more_servers && now() - self->startTime >= SERVER_KNOBS->WAIT_FOR_GOOD_RECRUITMENT_DELAY) {
if (e.code() == error_code_no_more_servers && self->remoteStartTime.present() && now() - self->remoteStartTime.get() >= SERVER_KNOBS->WAIT_FOR_GOOD_REMOTE_RECRUITMENT_DELAY) {
self->outstandingRemoteRecruitmentRequests.push_back( req );
TraceEvent(SevWarn, "RecruitRemoteFromConfigurationNotAvailable", self->id).error(e);
return Void();
@ -1575,7 +1613,7 @@ void clusterRegisterMaster( ClusterControllerData* self, RegisterMasterRequest c
self->db.serverInfo->set( dbInfo );
}
checkOutstandingMasterRequests(self);
checkOutstandingRequests(self);
}
void registerWorker( RegisterWorkerRequest req, ClusterControllerData *self ) {
@ -1589,11 +1627,21 @@ void registerWorker( RegisterWorkerRequest req, ClusterControllerData *self ) {
if ( w.address() == g_network->getLocalAddress() ) {
self->clusterControllerProcessId = w.locality.processId();
self->clusterControllerDcId = w.locality.dcId();
if(self->changedDcIds.get().present()) {
newPriorityInfo.dcFitness = ClusterControllerPriorityInfo::calculateDCFitness( w.locality.dcId(), self->changedDcIds.get().get() );
if(self->changingDcIds.get().first) {
if(self->changingDcIds.get().second.present()) {
newPriorityInfo.dcFitness = ClusterControllerPriorityInfo::calculateDCFitness( w.locality.dcId(), self->changingDcIds.get().second.get() );
}
} else if(self->changedDcIds.get().second.present()) {
newPriorityInfo.dcFitness = ClusterControllerPriorityInfo::calculateDCFitness( w.locality.dcId(), self->changedDcIds.get().second.get() );
}
} else {
if(!self->changingDcIds.get().first) {
if(self->changingDcIds.get().second.present()) {
newPriorityInfo.dcFitness = ClusterControllerPriorityInfo::calculateDCFitness( w.locality.dcId(), self->changingDcIds.get().second.get() );
}
} else if(self->changedDcIds.get().second.present()) {
newPriorityInfo.dcFitness = ClusterControllerPriorityInfo::calculateDCFitness( w.locality.dcId(), self->changedDcIds.get().second.get() );
}
} else if(self->changingDcIds.get().present()) {
newPriorityInfo.dcFitness = ClusterControllerPriorityInfo::calculateDCFitness( w.locality.dcId(), self->changingDcIds.get().get() );
}
// Check process class and exclusive property
@ -1900,38 +1948,91 @@ ACTOR Future<Void> updatedChangingDatacenters(ClusterControllerData *self) {
//do not change the cluster controller until all the processes have had a chance to register
Void _ = wait( delay(SERVER_KNOBS->WAIT_FOR_GOOD_RECRUITMENT_DELAY) );
loop {
self->changingDcIds.set(self->desiredDcIds.get());
if(self->changingDcIds.get().present()) {
for ( auto& it : self->id_worker ) {
uint8_t fitness = ClusterControllerPriorityInfo::calculateDCFitness( it.second.interf.locality.dcId(), self->changingDcIds.get().get() );
if ( it.first != self->clusterControllerProcessId && it.second.priorityInfo.dcFitness != fitness ) {
it.second.priorityInfo.dcFitness = fitness;
if(!it.second.reply.isSet()) {
it.second.reply.send( RegisterWorkerReply( it.second.processClass, it.second.priorityInfo ) );
state Future<Void> onChange = self->desiredDcIds.onChange();
if(!self->desiredDcIds.get().present()) {
self->changingDcIds.set(std::make_pair(false,self->desiredDcIds.get()));
} else {
auto& worker = self->id_worker[self->clusterControllerProcessId];
uint8_t newFitness = ClusterControllerPriorityInfo::calculateDCFitness( worker.interf.locality.dcId(), self->desiredDcIds.get().get() );
self->changingDcIds.set(std::make_pair(worker.priorityInfo.dcFitness > newFitness,self->desiredDcIds.get()));
if ( worker.priorityInfo.dcFitness > newFitness ) {
worker.priorityInfo.dcFitness = newFitness;
if(!worker.reply.isSet()) {
worker.reply.send( RegisterWorkerReply( worker.processClass, worker.priorityInfo ) );
}
} else {
state int currentFit = ProcessClass::BestFit;
while(currentFit <= ProcessClass::NeverAssign) {
bool updated = false;
for ( auto& it : self->id_worker ) {
if( ( !it.second.priorityInfo.isExcluded && it.second.priorityInfo.processClassFitness == currentFit ) || currentFit == ProcessClass::NeverAssign ) {
uint8_t fitness = ClusterControllerPriorityInfo::calculateDCFitness( it.second.interf.locality.dcId(), self->changingDcIds.get().second.get() );
if ( it.first != self->clusterControllerProcessId && it.second.priorityInfo.dcFitness != fitness ) {
updated = true;
it.second.priorityInfo.dcFitness = fitness;
if(!it.second.reply.isSet()) {
it.second.reply.send( RegisterWorkerReply( it.second.processClass, it.second.priorityInfo ) );
}
}
}
}
if(updated && currentFit < ProcessClass::NeverAssign) {
Void _ = wait( delay(SERVER_KNOBS->CC_CLASS_DELAY) );
}
currentFit++;
}
}
}
Void _ = wait(self->desiredDcIds.onChange());
Void _ = wait(onChange);
}
}
ACTOR Future<Void> updatedChangedDatacenters(ClusterControllerData *self) {
state Future<Void> changeDelay = delay(SERVER_KNOBS->CC_CHANGE_DELAY);
state Future<Void> onChange = self->changingDcIds.onChange();
loop {
choose {
when( Void _ = wait(self->changingDcIds.onChange()) ) { changeDelay = delay(SERVER_KNOBS->CC_CHANGE_DELAY); }
when( Void _ = wait(onChange) ) {
changeDelay = delay(SERVER_KNOBS->CC_CHANGE_DELAY);
onChange = self->changingDcIds.onChange();
}
when( Void _ = wait(changeDelay) ) {
changeDelay = Never();
self->changedDcIds.set(self->changingDcIds.get());
if(self->changedDcIds.get().present()) {
auto& worker = self->id_worker[self->clusterControllerProcessId];
uint8_t fitness = ClusterControllerPriorityInfo::calculateDCFitness( worker.interf.locality.dcId(), self->changedDcIds.get().get() );
if ( worker.priorityInfo.dcFitness != fitness ) {
worker.priorityInfo.dcFitness = fitness;
if(!worker.reply.isSet()) {
worker.reply.send( RegisterWorkerReply( worker.processClass, worker.priorityInfo ) );
onChange = self->changingDcIds.onChange();
self->changedDcIds.set(self->changingDcIds.get());
if(self->changedDcIds.get().second.present()) {
if( !self->changedDcIds.get().first ) {
auto& worker = self->id_worker[self->clusterControllerProcessId];
uint8_t newFitness = ClusterControllerPriorityInfo::calculateDCFitness( worker.interf.locality.dcId(), self->changedDcIds.get().second.get() );
if( worker.priorityInfo.dcFitness != newFitness ) {
worker.priorityInfo.dcFitness = newFitness;
if(!worker.reply.isSet()) {
worker.reply.send( RegisterWorkerReply( worker.processClass, worker.priorityInfo ) );
}
}
} else {
state int currentFit = ProcessClass::BestFit;
while(currentFit <= ProcessClass::NeverAssign) {
bool updated = false;
for ( auto& it : self->id_worker ) {
if( ( !it.second.priorityInfo.isExcluded && it.second.priorityInfo.processClassFitness == currentFit ) || currentFit == ProcessClass::NeverAssign ) {
uint8_t fitness = ClusterControllerPriorityInfo::calculateDCFitness( it.second.interf.locality.dcId(), self->changedDcIds.get().second.get() );
if ( it.first != self->clusterControllerProcessId && it.second.priorityInfo.dcFitness != fitness ) {
updated = true;
it.second.priorityInfo.dcFitness = fitness;
if(!it.second.reply.isSet()) {
it.second.reply.send( RegisterWorkerReply( it.second.processClass, it.second.priorityInfo ) );
}
}
}
}
if(updated && currentFit < ProcessClass::NeverAssign) {
Void _ = wait( delay(SERVER_KNOBS->CC_CLASS_DELAY) );
}
currentFit++;
}
}
}
@ -1941,6 +2042,7 @@ ACTOR Future<Void> updatedChangedDatacenters(ClusterControllerData *self) {
}
ACTOR Future<Void> updateDatacenterVersionDifference( ClusterControllerData *self ) {
state double lastLogTime = 0;
loop {
self->versionDifferenceUpdated = false;
if(self->db.serverInfo->get().recoveryState >= RecoveryState::FULLY_RECOVERED && self->db.config.usableRegions == 1) {
@ -1977,12 +2079,12 @@ ACTOR Future<Void> updateDatacenterVersionDifference( ClusterControllerData *sel
Void _ = wait(self->db.serverInfo->onChange());
continue;
}
state Future<Void> onChange = self->db.serverInfo->onChange();
loop {
state Future<TLogQueuingMetricsReply> primaryMetrics = primaryLog.get().getQueuingMetrics.getReply( TLogQueuingMetricsRequest() );
state Future<TLogQueuingMetricsReply> remoteMetrics = remoteLog.get().getQueuingMetrics.getReply( TLogQueuingMetricsRequest() );
Void _ = wait( ( success(primaryMetrics) && success(remoteMetrics) ) || onChange );
if(onChange.isReady()) {
break;
@ -1990,6 +2092,10 @@ ACTOR Future<Void> updateDatacenterVersionDifference( ClusterControllerData *sel
self->versionDifferenceUpdated = true;
self->datacenterVersionDifference = primaryMetrics.get().v - remoteMetrics.get().v;
if(now() - lastLogTime > SERVER_KNOBS->CLUSTER_CONTROLLER_LOGGING_DELAY) {
lastLogTime = now();
TraceEvent("DatacenterVersionDifference", self->id).detail("Difference", self->datacenterVersionDifference);
}
Void _ = wait( delay(SERVER_KNOBS->VERSION_LAG_METRIC_INTERVAL) || onChange );
if(onChange.isReady()) {

View File

@ -89,10 +89,13 @@ struct RecruitFromConfigurationReply {
vector<WorkerInterface> storageServers;
vector<WorkerInterface> oldLogRouters;
Optional<Key> dcId;
bool satelliteFallback;
RecruitFromConfigurationReply() : satelliteFallback(false) {}
template <class Ar>
void serialize( Ar& ar ) {
ar & tLogs & satelliteTLogs & proxies & resolvers & storageServers & oldLogRouters & dcId;
ar & tLogs & satelliteTLogs & proxies & resolvers & storageServers & oldLogRouters & dcId & satelliteFallback;
}
};

View File

@ -404,7 +404,7 @@ ACTOR Future<Reference<InitialDataDistribution>> getInitialDataDistribution( Dat
// for each range
for(int i = 0; i < keyServers.size() - 1; i++) {
ShardInfo info( keyServers[i].key );
DDShardInfo info( keyServers[i].key );
decodeKeyServersValue( keyServers[i].value, src, dest );
if(remoteDcIds.size()) {
auto srcIter = team_cache.find(src);
@ -471,7 +471,7 @@ ACTOR Future<Reference<InitialDataDistribution>> getInitialDataDistribution( Dat
}
// a dummy shard at the end with no keys or servers makes life easier for trackInitialShards()
result->shards.push_back( ShardInfo(allKeys.end) );
result->shards.push_back( DDShardInfo(allKeys.end) );
return result;
}

View File

@ -175,7 +175,8 @@ private:
void insert(Team team, KeyRange const& range);
};
struct ShardInfo {
// DDShardInfo is so named to avoid link-time name collision with ShardInfo within the StorageServer
struct DDShardInfo {
Key key;
vector<UID> primarySrc;
vector<UID> remoteSrc;
@ -183,7 +184,7 @@ struct ShardInfo {
vector<UID> remoteDest;
bool hasDest;
explicit ShardInfo(Key key) : key(key), hasDest(false) {}
explicit DDShardInfo(Key key) : key(key), hasDest(false) {}
};
struct InitialDataDistribution : ReferenceCounted<InitialDataDistribution> {
@ -191,7 +192,7 @@ struct InitialDataDistribution : ReferenceCounted<InitialDataDistribution> {
vector<std::pair<StorageServerInterface, ProcessClass>> allServers;
std::set<vector<UID>> primaryTeams;
std::set<vector<UID>> remoteTeams;
vector<ShardInfo> shards;
vector<DDShardInfo> shards;
};
Future<Void> dataDistribution(

View File

@ -248,7 +248,6 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs) {
init( COMMIT_BATCHES_MEM_TO_TOTAL_MEM_SCALE_FACTOR, 10.0 );
// Master Server
init( MASTER_LOGGING_DELAY, 1.0 );
// masterCommitter() in the master server will allow lower priority tasks (e.g. DataDistibution)
// by delay()ing for this amount of time between accepted batches of TransactionRequests.
init( COMMIT_SLEEP_TIME, 0.0001 ); if( randomize && BUGGIFY ) COMMIT_SLEEP_TIME = 0;
@ -267,6 +266,7 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs) {
init( LAST_LIMITED_RATIO, 0.6 );
//Cluster Controller
init( CLUSTER_CONTROLLER_LOGGING_DELAY, 5.0 );
init( MASTER_FAILURE_REACTION_TIME, 0.4 ); if( randomize && BUGGIFY ) MASTER_FAILURE_REACTION_TIME = 10.0;
init( MASTER_FAILURE_SLOPE_DURING_RECOVERY, 0.1 );
init( WORKER_COORDINATION_PING_DELAY, 60 );
@ -274,19 +274,21 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs) {
init( SHUTDOWN_TIMEOUT, 600 ); if( randomize && BUGGIFY ) SHUTDOWN_TIMEOUT = 60.0;
init( MASTER_SPIN_DELAY, 1.0 ); if( randomize && BUGGIFY ) MASTER_SPIN_DELAY = 10.0;
init( CC_CHANGE_DELAY, 0.1 );
init( WAIT_FOR_GOOD_RECRUITMENT_DELAY, 0.1 );
init( CC_CLASS_DELAY, 0.01 );
init( WAIT_FOR_GOOD_RECRUITMENT_DELAY, 1.0 );
init( WAIT_FOR_GOOD_REMOTE_RECRUITMENT_DELAY, 5.0 );
init( ATTEMPT_RECRUITMENT_DELAY, 0.035 );
init( WORKER_FAILURE_TIME, 1.0 ); if( randomize && BUGGIFY ) WORKER_FAILURE_TIME = 10.0;
init( CHECK_BETTER_MASTER_INTERVAL, 1.0 ); if( randomize && BUGGIFY ) CHECK_BETTER_MASTER_INTERVAL = 0.001;
init( CHECK_OUTSTANDING_INTERVAL, 0.5 ); if( randomize && BUGGIFY ) CHECK_OUTSTANDING_INTERVAL = 0.001;
init( VERSION_LAG_METRIC_INTERVAL, 0.5 ); if( randomize && BUGGIFY ) VERSION_LAG_METRIC_INTERVAL = 10.0;
init( MAX_VERSION_DIFFERENCE, 20 * VERSIONS_PER_SECOND );
init( INCOMPATIBLE_PEERS_LOGGING_INTERVAL, 600 ); if( randomize && BUGGIFY ) INCOMPATIBLE_PEERS_LOGGING_INTERVAL = 60.0;
init( EXPECTED_MASTER_FITNESS, ProcessClass::GoodFit );
init( EXPECTED_TLOG_FITNESS, ProcessClass::GoodFit );
init( EXPECTED_LOG_ROUTER_FITNESS, ProcessClass::GoodFit );
init( EXPECTED_PROXY_FITNESS, ProcessClass::GoodFit );
init( EXPECTED_RESOLVER_FITNESS, ProcessClass::GoodFit );
init( EXPECTED_MASTER_FITNESS, ProcessClass::UnsetFit );
init( EXPECTED_TLOG_FITNESS, ProcessClass::UnsetFit );
init( EXPECTED_LOG_ROUTER_FITNESS, ProcessClass::UnsetFit );
init( EXPECTED_PROXY_FITNESS, ProcessClass::UnsetFit );
init( EXPECTED_RESOLVER_FITNESS, ProcessClass::UnsetFit );
init( RECRUITMENT_TIMEOUT, 600 ); if( randomize && BUGGIFY ) RECRUITMENT_TIMEOUT = g_random->coinflip() ? 60.0 : 1.0;
init( POLICY_RATING_TESTS, 200 ); if( randomize && BUGGIFY ) POLICY_RATING_TESTS = 20;
@ -343,7 +345,7 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs) {
//Storage Server
init( STORAGE_LOGGING_DELAY, 5.0 );
init( STORAGE_SERVER_POLL_METRICS_DELAY, 1.0 );
init( FUTURE_VERSION_DELAY, 1.0 ); if( randomize && BUGGIFY ) FUTURE_VERSION_DELAY = 0.001;
init( FUTURE_VERSION_DELAY, 1.0 );
init( STORAGE_LIMIT_BYTES, 500000 );
init( BUGGIFY_LIMIT_BYTES, 1000 );
init( FETCH_BLOCK_BYTES, 2e6 );

View File

@ -192,7 +192,6 @@ public:
double PROXY_SPIN_DELAY;
// Master Server
double MASTER_LOGGING_DELAY;
double COMMIT_SLEEP_TIME;
double MIN_BALANCE_TIME;
int64_t MIN_BALANCE_DIFFERENCE;
@ -208,6 +207,7 @@ public:
int64_t RESOLVER_STATE_MEMORY_LIMIT;
//Cluster Controller
double CLUSTER_CONTROLLER_LOGGING_DELAY;
double MASTER_FAILURE_REACTION_TIME;
double MASTER_FAILURE_SLOPE_DURING_RECOVERY;
int WORKER_COORDINATION_PING_DELAY;
@ -215,10 +215,12 @@ public:
double SHUTDOWN_TIMEOUT;
double MASTER_SPIN_DELAY;
double CC_CHANGE_DELAY;
double CC_CLASS_DELAY;
double WAIT_FOR_GOOD_RECRUITMENT_DELAY;
double WAIT_FOR_GOOD_REMOTE_RECRUITMENT_DELAY;
double ATTEMPT_RECRUITMENT_DELAY;
double WORKER_FAILURE_TIME;
double CHECK_BETTER_MASTER_INTERVAL;
double CHECK_OUTSTANDING_INTERVAL;
double INCOMPATIBLE_PEERS_LOGGING_INTERVAL;
double VERSION_LAG_METRIC_INTERVAL;
int64_t MAX_VERSION_DIFFERENCE;

View File

@ -82,7 +82,10 @@ ACTOR Future<Void> tryBecomeLeaderInternal( ServerCoordinators coordinators, Val
state bool iAmLeader = false;
state UID prevChangeID;
if( asyncPriorityInfo->get().processClassFitness > ProcessClass::UnsetFit || asyncPriorityInfo->get().dcFitness == ClusterControllerPriorityInfo::FitnessBad || asyncPriorityInfo->get().isExcluded ) {
if(asyncPriorityInfo->get().dcFitness == ClusterControllerPriorityInfo::FitnessBad || asyncPriorityInfo->get().dcFitness == ClusterControllerPriorityInfo::FitnessRemote || asyncPriorityInfo->get().isExcluded) {
Void _ = wait( delay(SERVER_KNOBS->WAIT_FOR_GOOD_REMOTE_RECRUITMENT_DELAY) );
} else if( asyncPriorityInfo->get().processClassFitness > ProcessClass::UnsetFit ) {
Void _ = wait( delay(SERVER_KNOBS->WAIT_FOR_GOOD_RECRUITMENT_DELAY) );
}

View File

@ -37,17 +37,17 @@ struct LogRouterData {
struct TagData : NonCopyable, public ReferenceCounted<TagData> {
std::deque<std::pair<Version, LengthPrefixedStringRef>> version_messages;
Version popped;
Version knownCommittedVersion;
Version durableKnownCommittedVersion;
Tag tag;
TagData( Tag tag, Version popped, Version knownCommittedVersion ) : tag(tag), popped(popped), knownCommittedVersion(knownCommittedVersion) {}
TagData( Tag tag, Version popped, Version durableKnownCommittedVersion ) : tag(tag), popped(popped), durableKnownCommittedVersion(durableKnownCommittedVersion) {}
TagData(TagData&& r) noexcept(true) : version_messages(std::move(r.version_messages)), tag(r.tag), popped(r.popped), knownCommittedVersion(r.knownCommittedVersion) {}
TagData(TagData&& r) noexcept(true) : version_messages(std::move(r.version_messages)), tag(r.tag), popped(r.popped), durableKnownCommittedVersion(r.durableKnownCommittedVersion) {}
void operator= (TagData&& r) noexcept(true) {
version_messages = std::move(r.version_messages);
tag = r.tag;
popped = r.popped;
knownCommittedVersion = r.knownCommittedVersion;
durableKnownCommittedVersion = r.durableKnownCommittedVersion;
}
// Erase messages not needed to update *from* versions >= before (thus, messages with toversion <= before)
@ -79,6 +79,7 @@ struct LogRouterData {
NotifiedVersion version;
NotifiedVersion minPopped;
Version startVersion;
Version minKnownCommittedVersion;
Deque<std::pair<Version, Standalone<VectorRef<uint8_t>>>> messageBlocks;
Tag routerTag;
bool allowPops;
@ -101,7 +102,7 @@ struct LogRouterData {
return newTagData;
}
LogRouterData(UID dbgid, InitializeLogRouterRequest req) : dbgid(dbgid), routerTag(req.routerTag), logSystem(new AsyncVar<Reference<ILogSystem>>()), version(req.startVersion-1), minPopped(req.startVersion-1), startVersion(req.startVersion), allowPops(false) {
LogRouterData(UID dbgid, InitializeLogRouterRequest req) : dbgid(dbgid), routerTag(req.routerTag), logSystem(new AsyncVar<Reference<ILogSystem>>()), version(req.startVersion-1), minPopped(req.startVersion-1), startVersion(req.startVersion), allowPops(false), minKnownCommittedVersion(0) {
//setup just enough of a logSet to be able to call getPushLocations
logSet.logServers.resize(req.tLogLocalities.size());
logSet.tLogPolicy = req.tLogPolicy;
@ -162,7 +163,7 @@ void commitMessages( LogRouterData* self, Version version, const std::vector<Tag
}
}
}
msgSize -= msg.message.size();
}
self->messageBlocks.push_back( std::make_pair(version, block) );
@ -193,6 +194,8 @@ ACTOR Future<Void> pullAsyncData( LogRouterData *self ) {
}
}
self->minKnownCommittedVersion = std::max(self->minKnownCommittedVersion, r->getMinKnownCommittedVersion());
state Version ver = 0;
state std::vector<TagsAndMessage> messages;
while (true) {
@ -306,6 +309,7 @@ ACTOR Future<Void> logRouterPeekMessages( LogRouterData* self, TLogPeekRequest r
TLogPeekReply reply;
reply.maxKnownVersion = self->version.get();
reply.minKnownCommittedVersion = self->minKnownCommittedVersion;
reply.messages = messages.toStringRef();
reply.popped = self->minPopped.get() >= self->startVersion ? self->minPopped.get() : 0;
reply.end = endVersion;
@ -318,10 +322,10 @@ ACTOR Future<Void> logRouterPeekMessages( LogRouterData* self, TLogPeekRequest r
ACTOR Future<Void> logRouterPop( LogRouterData* self, TLogPopRequest req ) {
auto tagData = self->getTagData(req.tag);
if (!tagData) {
tagData = self->createTagData(req.tag, req.to, req.knownCommittedVersion);
tagData = self->createTagData(req.tag, req.to, req.durableKnownCommittedVersion);
} else if (req.to > tagData->popped) {
tagData->popped = req.to;
tagData->knownCommittedVersion = req.knownCommittedVersion;
tagData->durableKnownCommittedVersion = req.durableKnownCommittedVersion;
Void _ = wait(tagData->eraseMessagesBefore( req.to, self, TaskTLogPop ));
}
@ -330,7 +334,7 @@ ACTOR Future<Void> logRouterPop( LogRouterData* self, TLogPopRequest req ) {
for( auto it : self->tag_data ) {
if(it) {
minPopped = std::min( it->popped, minPopped );
minKnownCommittedVersion = std::min( it->knownCommittedVersion, minKnownCommittedVersion );
minKnownCommittedVersion = std::min( it->durableKnownCommittedVersion, minKnownCommittedVersion );
}
}
@ -340,10 +344,7 @@ ACTOR Future<Void> logRouterPop( LogRouterData* self, TLogPopRequest req ) {
}
if(self->logSystem->get() && self->allowPops) {
//The knownCommittedVersion might not be committed on the primary logs, so subtracting max_read_transaction_life_versions will ensure it is committed.
//We then need to subtract max_read_transaction_life_versions again ensure we do not pop below the knownCommittedVersion of the primary logs.
//FIXME: if we get the true knownCommittedVersion when peeking from the primary logs we only need to subtract max_read_transaction_life_versions once.
self->logSystem->get()->pop(minKnownCommittedVersion - 2*SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS, self->routerTag);
self->logSystem->get()->pop(std::min(minKnownCommittedVersion, self->minKnownCommittedVersion), self->routerTag);
}
req.reply.send(Void());
self->minPopped.set(std::max(minPopped, self->minPopped.get()));

View File

@ -76,7 +76,8 @@ public:
void populateSatelliteTagLocations(int logRouterTags, int oldLogRouterTags) {
satelliteTagLocations.clear();
satelliteTagLocations.resize(std::max(logRouterTags,oldLogRouterTags) + 1);
std::map<int,int> server_usedBest;
std::set<std::pair<int,int>> used_servers;
for(int i = 0; i < tLogLocalities.size(); i++) {
used_servers.insert(std::make_pair(0,i));
@ -109,6 +110,16 @@ public:
for(auto& entry : resultEntries) {
resultPairs.push_back(*serverMap->getObject(entry));
}
int firstBestUsed = server_usedBest[resultPairs[0].second];
for(int i = 1; i < resultPairs.size(); i++) {
int thisBestUsed = server_usedBest[resultPairs[i].second];
if(thisBestUsed < firstBestUsed) {
std::swap(resultPairs[0], resultPairs[i]);
firstBestUsed = thisBestUsed;
}
}
server_usedBest[resultPairs[0].second]++;
for(auto& res : resultPairs) {
satelliteTagLocations[team].push_back(res.second);
used_servers.erase(res);
@ -126,20 +137,31 @@ public:
}
void checkSatelliteTagLocations() {
std::vector<int> usedBest;
std::vector<int> used;
usedBest.resize(tLogLocalities.size());
used.resize(tLogLocalities.size());
for(auto team : satelliteTagLocations) {
usedBest[team[0]]++;
for(auto loc : team) {
used[loc]++;
}
}
int minUsedBest = satelliteTagLocations.size();
int maxUsedBest = 0;
for(auto i : usedBest) {
minUsedBest = std::min(minUsedBest, i);
maxUsedBest = std::max(maxUsedBest, i);
}
int minUsed = satelliteTagLocations.size();
int maxUsed = 0;
for(auto i : used) {
minUsed = std::min(minUsed, i);
maxUsed = std::max(maxUsed, i);
}
TraceEvent(maxUsed - minUsed > 1 ? (g_network->isSimulated() ? SevError : SevWarnAlways) : SevInfo, "CheckSatelliteTagLocations").detail("MinUsed", minUsed).detail("MaxUsed", maxUsed);
TraceEvent(((maxUsed - minUsed > 1) || (maxUsedBest - minUsedBest > 1)) ? (g_network->isSimulated() ? SevError : SevWarnAlways) : SevInfo, "CheckSatelliteTagLocations").detail("MinUsed", minUsed).detail("MaxUsed", maxUsed).detail("MinUsedBest", minUsedBest).detail("MaxUsedBest", maxUsedBest);
}
int bestLocationFor( Tag tag ) {
@ -314,6 +336,8 @@ struct ILogSystem {
// Returns the maximum version known to have been pushed (not necessarily durably) into the log system (0 is always a possible result!)
virtual Version getMaxKnownVersion() { return 0; }
virtual Version getMinKnownCommittedVersion() = 0;
virtual void addref() = 0;
virtual void delref() = 0;
@ -358,6 +382,7 @@ struct ILogSystem {
virtual bool isExhausted();
virtual LogMessageVersion version();
virtual Version popped();
virtual Version getMinKnownCommittedVersion();
virtual void addref() {
ReferenceCounted<ServerPeekCursor>::addref();
@ -411,6 +436,7 @@ struct ILogSystem {
virtual bool isExhausted();
virtual LogMessageVersion version();
virtual Version popped();
virtual Version getMinKnownCommittedVersion();
virtual void addref() {
ReferenceCounted<MergedPeekCursor>::addref();
@ -455,6 +481,7 @@ struct ILogSystem {
virtual bool isExhausted();
virtual LogMessageVersion version();
virtual Version popped();
virtual Version getMinKnownCommittedVersion();
virtual void addref() {
ReferenceCounted<SetPeekCursor>::addref();
@ -488,6 +515,7 @@ struct ILogSystem {
virtual bool isExhausted();
virtual LogMessageVersion version();
virtual Version popped();
virtual Version getMinKnownCommittedVersion();
virtual void addref() {
ReferenceCounted<MultiCursor>::addref();
@ -516,7 +544,7 @@ struct ILogSystem {
// Never returns normally, but throws an error if the subsystem stops working
//Future<Void> push( UID bundle, int64_t seq, VectorRef<TaggedMessageRef> messages );
virtual Future<Void> push( Version prevVersion, Version version, Version knownCommittedVersion, struct LogPushData& data, Optional<UID> debugID = Optional<UID>() ) = 0;
virtual Future<Version> push( Version prevVersion, Version version, Version knownCommittedVersion, Version minKnownCommittedVersion, struct LogPushData& data, Optional<UID> debugID = Optional<UID>() ) = 0;
// Waits for the version number of the bundle (in this epoch) to be prevVersion (i.e. for all pushes ordered earlier)
// Puts the given messages into the bundle, each with the given tags, and with message versions (version, 0) - (version, N)
// Changes the version number of the bundle to be version (unblocking the next push)
@ -535,7 +563,7 @@ struct ILogSystem {
// Same contract as peek(), but blocks until the preferred log server(s) for the given tag are available (and is correspondingly less expensive)
virtual Reference<IPeekCursor> peekLogRouter( UID dbgid, Version begin, Tag tag ) = 0;
// Same contract as peek(), but can only peek from the logs elected in the same generation.
// Same contract as peek(), but can only peek from the logs elected in the same generation.
// If the preferred log server is down, a different log from the same generation will merge results locally before sending them to the log router.
virtual void pop( Version upTo, Tag tag, Version knownCommittedVersion = 0, int8_t popLocality = tagLocalityInvalid ) = 0;
@ -548,8 +576,8 @@ struct ILogSystem {
virtual Future<Void> endEpoch() = 0;
// Ends the current epoch without starting a new one
static Reference<ILogSystem> fromServerDBInfo( UID const& dbgid, struct ServerDBInfo const& db, bool usePreviousEpochEnd = false, Optional<PromiseStream<Future<Void>>> addActor = Optional<PromiseStream<Future<Void>>>() );
static Reference<ILogSystem> fromLogSystemConfig( UID const& dbgid, struct LocalityData const&, struct LogSystemConfig const&, bool excludeRemote = false, bool usePreviousEpochEnd = false, Optional<PromiseStream<Future<Void>>> addActor = Optional<PromiseStream<Future<Void>>>() );
static Reference<ILogSystem> fromServerDBInfo( UID const& dbgid, struct ServerDBInfo const& db, bool useRecoveredAt = false, Optional<PromiseStream<Future<Void>>> addActor = Optional<PromiseStream<Future<Void>>>() );
static Reference<ILogSystem> fromLogSystemConfig( UID const& dbgid, struct LocalityData const&, struct LogSystemConfig const&, bool excludeRemote = false, bool useRecoveredAt = false, Optional<PromiseStream<Future<Void>>> addActor = Optional<PromiseStream<Future<Void>>>() );
// Constructs a new ILogSystem implementation from the given ServerDBInfo/LogSystemConfig. Might return a null reference if there isn't a fully recovered log system available.
// The caller can peek() the returned log system and can push() if it has version numbers reserved for it and prevVersions

View File

@ -156,11 +156,11 @@ struct LogSystemConfig {
int32_t expectedLogSets;
UID recruitmentID;
bool stopped;
Optional<Version> previousEpochEndVersion;
Optional<Version> recoveredAt;
LogSystemConfig() : logSystemType(0), logRouterTags(0), expectedLogSets(0), stopped(false) {}
std::string toString() const {
std::string toString() const {
return format("type: %d oldGenerations: %d tags: %d %s", logSystemType, oldTLogs.size(), logRouterTags, describe(tLogs).c_str());
}
@ -217,7 +217,7 @@ struct LogSystemConfig {
bool operator == ( const LogSystemConfig& rhs ) const { return isEqual(rhs); }
bool isEqual(LogSystemConfig const& r) const {
return logSystemType == r.logSystemType && tLogs == r.tLogs && oldTLogs == r.oldTLogs && expectedLogSets == r.expectedLogSets && logRouterTags == r.logRouterTags && recruitmentID == r.recruitmentID && stopped == r.stopped && previousEpochEndVersion == r.previousEpochEndVersion;
return logSystemType == r.logSystemType && tLogs == r.tLogs && oldTLogs == r.oldTLogs && expectedLogSets == r.expectedLogSets && logRouterTags == r.logRouterTags && recruitmentID == r.recruitmentID && stopped == r.stopped && recoveredAt == r.recoveredAt;
}
bool isEqualIds(LogSystemConfig const& r) const {
@ -248,7 +248,7 @@ struct LogSystemConfig {
template <class Ar>
void serialize( Ar& ar ) {
ar & logSystemType & tLogs & logRouterTags & oldTLogs & expectedLogSets & recruitmentID & stopped & previousEpochEndVersion;
ar & logSystemType & tLogs & logRouterTags & oldTLogs & expectedLogSets & recruitmentID & stopped & recoveredAt;
}
};

View File

@ -26,6 +26,7 @@
ILogSystem::ServerPeekCursor::ServerPeekCursor( Reference<AsyncVar<OptionalInterface<TLogInterface>>> const& interf, Tag tag, Version begin, Version end, bool returnIfBlocked, bool parallelGetMore )
: interf(interf), tag(tag), messageVersion(begin), end(end), hasMsg(false), rd(results.arena, results.messages, Unversioned()), randomID(g_random->randomUniqueID()), poppedVersion(0), returnIfBlocked(returnIfBlocked), sequence(0), parallelGetMore(parallelGetMore) {
this->results.maxKnownVersion = 0;
this->results.minKnownCommittedVersion = 0;
//TraceEvent("SPC_Starting", randomID).detail("Tag", tag.toString()).detail("Begin", begin).detail("End", end).backtrace();
}
@ -34,6 +35,7 @@ ILogSystem::ServerPeekCursor::ServerPeekCursor( TLogPeekReply const& results, Lo
{
//TraceEvent("SPC_Clone", randomID);
this->results.maxKnownVersion = 0;
this->results.minKnownCommittedVersion = 0;
if(hasMsg)
nextMessage();
@ -141,6 +143,7 @@ ACTOR Future<Void> serverPeekParallelGetMore( ILogSystem::ServerPeekCursor* self
}
loop {
state Version expectedBegin = self->messageVersion.version;
try {
while(self->futureResults.size() < SERVER_KNOBS->PARALLEL_GET_MORE_REQUESTS && self->interf->get().present()) {
self->futureResults.push_back( brokenPromiseToNever( self->interf->get().interf().peekMessages.getReply(TLogPeekRequest(self->messageVersion.version,self->tag,self->returnIfBlocked, std::make_pair(self->randomID, self->sequence++)), taskID) ) );
@ -148,6 +151,10 @@ ACTOR Future<Void> serverPeekParallelGetMore( ILogSystem::ServerPeekCursor* self
choose {
when( TLogPeekReply res = wait( self->interf->get().present() ? self->futureResults.front() : Never() ) ) {
if(res.begin.get() != expectedBegin) {
throw timed_out();
}
expectedBegin = res.end;
self->futureResults.pop_front();
self->results = res;
if(res.popped.present())
@ -254,6 +261,8 @@ bool ILogSystem::ServerPeekCursor::isExhausted() {
LogMessageVersion ILogSystem::ServerPeekCursor::version() { return messageVersion; } // Call only after nextMessage(). The sequence of the current message, or results.end if nextMessage() has returned false.
Version ILogSystem::ServerPeekCursor::getMinKnownCommittedVersion() { return results.minKnownCommittedVersion; }
Version ILogSystem::ServerPeekCursor::popped() { return poppedVersion; }
ILogSystem::MergedPeekCursor::MergedPeekCursor( vector< Reference<ILogSystem::IPeekCursor> > const& serverCursors, Version begin, bool collectTags )
@ -485,6 +494,10 @@ bool ILogSystem::MergedPeekCursor::isExhausted() {
LogMessageVersion ILogSystem::MergedPeekCursor::version() { return messageVersion; }
Version ILogSystem::MergedPeekCursor::getMinKnownCommittedVersion() {
return serverCursors[currentCursor]->getMinKnownCommittedVersion();
}
Version ILogSystem::MergedPeekCursor::popped() {
Version poppedVersion = 0;
for (auto& c : serverCursors)
@ -776,6 +789,10 @@ bool ILogSystem::SetPeekCursor::isExhausted() {
LogMessageVersion ILogSystem::SetPeekCursor::version() { return messageVersion; }
Version ILogSystem::SetPeekCursor::getMinKnownCommittedVersion() {
return serverCursors[currentSet][currentCursor]->getMinKnownCommittedVersion();
}
Version ILogSystem::SetPeekCursor::popped() {
Version poppedVersion = 0;
for (auto& cursors : serverCursors) {
@ -858,6 +875,10 @@ LogMessageVersion ILogSystem::MultiCursor::version() {
return cursors.back()->version();
}
Version ILogSystem::MultiCursor::getMinKnownCommittedVersion() {
return cursors.back()->getMinKnownCommittedVersion();
}
Version ILogSystem::MultiCursor::popped() {
return std::max(poppedVersion, cursors.back()->popped());
}

View File

@ -175,7 +175,8 @@ struct ProxyCommitData {
LogSystemDiskQueueAdapter* logAdapter;
Reference<ILogSystem> logSystem;
IKeyValueStore* txnStateStore;
NotifiedVersion committedVersion; // Provided that this recovery has succeeded or will succeed, this version is fully committed (durable)
NotifiedVersion committedVersion; // Provided that this recovery has succeeded or will succeed, this version is fully committed (durable)
Version minKnownCommittedVersion; // No version smaller than this one will be used as the known committed version during recovery
Version version; // The version at which txnStateStore is up to date
Promise<Void> validState; // Set once txnStateStore and version are valid
double lastVersionTime;
@ -223,9 +224,9 @@ struct ProxyCommitData {
}
ProxyCommitData(UID dbgid, MasterInterface master, RequestStream<GetReadVersionRequest> getConsistentReadVersion, Version recoveryTransactionVersion, RequestStream<CommitTransactionRequest> commit, Reference<AsyncVar<ServerDBInfo>> db, bool firstProxy)
: dbgid(dbgid), stats(dbgid, &version, &committedVersion, &commitBatchesMemBytesCount), master(master),
: dbgid(dbgid), stats(dbgid, &version, &committedVersion, &commitBatchesMemBytesCount), master(master),
logAdapter(NULL), txnStateStore(NULL),
committedVersion(recoveryTransactionVersion), version(0),
committedVersion(recoveryTransactionVersion), version(0), minKnownCommittedVersion(0),
lastVersionTime(0), commitVersionRequestNumber(1), mostRecentProcessedRequestNumber(0),
getConsistentReadVersion(getConsistentReadVersion), commit(commit), lastCoalesceTime(0),
localCommitBatchesStarted(0), locked(false), firstProxy(firstProxy),
@ -794,8 +795,7 @@ ACTOR Future<Void> commitBatch(
if ( prevVersion && commitVersion - prevVersion < SERVER_KNOBS->MAX_VERSIONS_IN_FLIGHT/2 )
debug_advanceMaxCommittedVersion(UID(), commitVersion);
Future<Void> loggingComplete = self->logSystem->push( prevVersion, commitVersion, self->committedVersion.get(), toCommit, debugID )
|| self->committedVersion.whenAtLeast( commitVersion+1 );
Future<Version> loggingComplete = self->logSystem->push( prevVersion, commitVersion, self->committedVersion.get(), self->minKnownCommittedVersion, toCommit, debugID );
if (!forceRecovery) {
ASSERT(self->latestLocalCommitBatchLogging.get() == localBatchNumber-1);
@ -803,12 +803,25 @@ ACTOR Future<Void> commitBatch(
}
/////// Phase 4: Logging (network bound; pipelined up to MAX_READ_TRANSACTION_LIFE_VERSIONS (limited by loop above))
Void _ = wait(loggingComplete);
try {
choose {
when(Version ver = wait(loggingComplete)) {
self->minKnownCommittedVersion = std::max(self->minKnownCommittedVersion, ver);
}
when(Void _ = wait(self->committedVersion.whenAtLeast( commitVersion+1 ))) {}
}
} catch(Error &e) {
if(e.code() == error_code_broken_promise) {
throw master_tlog_failed();
}
throw;
}
Void _ = wait(yield());
self->logSystem->pop(msg.popTo, txsTag);
/////// Phase 5: Replies (CPU bound; no particular order required, though ordered execution would be best for latency)
/////// Phase 5: Replies (CPU bound; no particular order required, though ordered execution would be best for latency)
if ( prevVersion && commitVersion - prevVersion < SERVER_KNOBS->MAX_VERSIONS_IN_FLIGHT/2 )
debug_advanceMinCommittedVersion(UID(), commitVersion);
@ -826,7 +839,7 @@ ACTOR Future<Void> commitBatch(
if( commitVersion > self->committedVersion.get() ) {
self->locked = lockedAfter;
self->committedVersion.set(commitVersion);
}
}
if (forceRecovery) {
TraceEvent(SevWarn, "RestartingTxnSubsystem", self->dbgid).detail("Stage", "ProxyShutdown");
@ -1170,7 +1183,7 @@ ACTOR Future<Void> masterProxyServerCore(
state Future<Void> lastCommitComplete = Void();
state PromiseStream<Future<Void>> addActor;
state Future<Void> onError = actorCollection(addActor.getFuture());
state Future<Void> onError = transformError( actorCollection(addActor.getFuture()), broken_promise(), master_tlog_failed() );
state double lastCommit = 0;
state std::set<Sequence> txnSequences;
state Sequence maxSequence = std::numeric_limits<Sequence>::max();

View File

@ -952,6 +952,7 @@ namespace oldTLog {
TLogPeekReply reply;
reply.maxKnownVersion = logData->version.get();
reply.minKnownCommittedVersion = 0;
if(poppedVer > req.begin) {
reply.popped = poppedVer;
reply.end = poppedVer;
@ -974,6 +975,7 @@ namespace oldTLog {
} else {
sequenceData.send(reply.end);
}
reply.begin = req.begin;
}
req.reply.send( reply );

View File

@ -68,13 +68,13 @@ ACTOR Future<WorkerInterface> getMasterWorker( Database cx, Reference<AsyncVar<S
ACTOR Future<int64_t> getDataInFlight( Database cx, WorkerInterface masterWorker ) {
try {
TraceEvent("DataInFlight").detail("Database", printable(cx->dbName)).detail("Stage", "ContactingMaster");
Standalone<StringRef> md = wait( timeoutError(masterWorker.eventLogRequest.getReply(
TraceEventFields md = wait( timeoutError(masterWorker.eventLogRequest.getReply(
EventLogRequest( StringRef( cx->dbName.toString() + "/TotalDataInFlight" ) ) ), 1.0 ) );
int64_t dataInFlight;
sscanf(extractAttribute(md.toString(), "TotalBytes").c_str(), "%lld", &dataInFlight);
sscanf(md.getValue("TotalBytes").c_str(), "%lld", &dataInFlight);
return dataInFlight;
} catch( Error &e ) {
TraceEvent("QuietDatabaseFailure", masterWorker.id()).detail("Reason", "Failed to extract DataInFlight");
TraceEvent("QuietDatabaseFailure", masterWorker.id()).detail("Reason", "Failed to extract DataInFlight").error(e);
throw;
}
@ -89,13 +89,13 @@ ACTOR Future<int64_t> getDataInFlight( Database cx, Reference<AsyncVar<ServerDBI
}
//Computes the queue size for storage servers and tlogs using the bytesInput and bytesDurable attributes
int64_t getQueueSize( Standalone<StringRef> md ) {
int64_t getQueueSize( TraceEventFields md ) {
double inputRate, durableRate;
double inputRoughness, durableRoughness;
int64_t inputBytes, durableBytes;
sscanf(extractAttribute(md.toString(), "BytesInput").c_str(), "%lf %lf %lld", &inputRate, &inputRoughness, &inputBytes);
sscanf(extractAttribute(md.toString(), "BytesDurable").c_str(), "%lf %lf %lld", &durableRate, &durableRoughness, &durableBytes);
sscanf(md.getValue("BytesInput").c_str(), "%lf %lf %lld", &inputRate, &inputRoughness, &inputBytes);
sscanf(md.getValue("BytesDurable").c_str(), "%lf %lf %lld", &durableRate, &durableRoughness, &durableBytes);
return inputBytes - durableBytes;
}
@ -110,7 +110,7 @@ ACTOR Future<int64_t> getMaxTLogQueueSize( Database cx, Reference<AsyncVar<Serve
workersMap[worker.first.address()] = worker.first;
}
state std::vector<Future<Standalone<StringRef>>> messages;
state std::vector<Future<TraceEventFields>> messages;
state std::vector<TLogInterface> tlogs = dbInfo->get().logSystemConfig.allPresentLogs();
for(int i = 0; i < tlogs.size(); i++) {
auto itr = workersMap.find(tlogs[i].address());
@ -182,7 +182,7 @@ ACTOR Future<int64_t> getMaxStorageServerQueueSize( Database cx, Reference<Async
workersMap[worker.first.address()] = worker.first;
}
state std::vector<Future<Standalone<StringRef>>> messages;
state std::vector<Future<TraceEventFields>> messages;
for(int i = 0; i < servers.size(); i++) {
auto itr = workersMap.find(servers[i].address());
if(itr == workersMap.end()) {
@ -224,17 +224,17 @@ ACTOR Future<int64_t> getDataDistributionQueueSize( Database cx, WorkerInterface
try {
TraceEvent("DataDistributionQueueSize").detail("Database", printable(cx->dbName)).detail("Stage", "ContactingMaster");
Standalone<StringRef> movingDataMessage = wait( timeoutError(masterWorker.eventLogRequest.getReply(
TraceEventFields movingDataMessage = wait( timeoutError(masterWorker.eventLogRequest.getReply(
EventLogRequest( StringRef( cx->dbName.toString() + "/MovingData") ) ), 1.0 ) );
TraceEvent("DataDistributionQueueSize").detail("Database", printable(cx->dbName)).detail("Stage", "GotString").detail("Result", printable(movingDataMessage)).detail("TrackLatest", printable( StringRef( cx->dbName.toString() + "/MovingData") ) );
TraceEvent("DataDistributionQueueSize").detail("Database", printable(cx->dbName)).detail("Stage", "GotString")/*.detail("Result", printable(movingDataMessage))*/.detail("TrackLatest", printable( StringRef( cx->dbName.toString() + "/MovingData") ) );
int64_t inQueue;
sscanf(extractAttribute(movingDataMessage.toString(), "InQueue").c_str(), "%lld", &inQueue);
sscanf(movingDataMessage.getValue("InQueue").c_str(), "%lld", &inQueue);
if(reportInFlight) {
int64_t inFlight;
sscanf(extractAttribute(movingDataMessage.toString(), "InFlight").c_str(), "%lld", &inFlight);
sscanf(movingDataMessage.getValue("InFlight").c_str(), "%lld", &inFlight);
inQueue += inFlight;
}
@ -258,10 +258,10 @@ ACTOR Future<bool> getDataDistributionActive( Database cx, WorkerInterface maste
try {
TraceEvent("DataDistributionActive").detail("Database", printable(cx->dbName)).detail("Stage", "ContactingMaster");
Standalone<StringRef> activeMessage = wait( timeoutError(masterWorker.eventLogRequest.getReply(
TraceEventFields activeMessage = wait( timeoutError(masterWorker.eventLogRequest.getReply(
EventLogRequest( StringRef( cx->dbName.toString() + "/DDTrackerStarting") ) ), 1.0 ) );
return extractAttribute(activeMessage.toString(), "State") == "Active";
return activeMessage.getValue("State") == "Active";
} catch( Error &e ) {
TraceEvent("QuietDatabaseFailure", masterWorker.id()).detail("Reason", "Failed to extract DataDistributionActive");
throw;
@ -273,10 +273,10 @@ ACTOR Future<bool> getStorageServersRecruiting( Database cx, Reference<AsyncVar<
try {
TraceEvent("StorageServersRecruiting").detail("Database", printable(cx->dbName)).detail("Stage", "ContactingMaster");
Standalone<StringRef> recruitingMessage = wait( timeoutError(masterWorker.eventLogRequest.getReply(
TraceEventFields recruitingMessage = wait( timeoutError(masterWorker.eventLogRequest.getReply(
EventLogRequest( StringRef( cx->dbName.toString() + "/StorageServerRecruitment_" + dbInfo->get().master.id().toString()) ) ), 1.0 ) );
return extractAttribute(recruitingMessage.toString(), "State") == "Recruiting";
return recruitingMessage.getValue("State") == "Recruiting";
} catch( Error &e ) {
TraceEvent("QuietDatabaseFailure", masterWorker.id()).detail("Reason", "Failed to extract StorageServersRecruiting").detail("MasterID", dbInfo->get().master.id());
throw;

View File

@ -119,6 +119,7 @@ T simulate( const T& in ) {
static void simInitTLS(Reference<TLSOptions> tlsOptions) {
tlsOptions->set_cert_data( certBytes );
tlsOptions->set_key_data( certBytes );
tlsOptions->set_verify_peers(std::vector<std::string>(1, "Check.Valid=0"));
tlsOptions->register_network();
}
@ -864,6 +865,7 @@ void SimulationConfig::generateNormalConfig(int minimumReplication) {
ASSERT(false); // Programmer forgot to adjust cases.
}
if (g_random->random01() < 0.25) db.desiredLogRouterCount = g_random->randomInt(1,7);
if (g_random->random01() < 0.25) db.remoteDesiredTLogCount = g_random->randomInt(1,7);
}
@ -935,25 +937,16 @@ void setupSimulatedSystem( vector<Future<Void>> *systemActors, std::string baseF
g_simulator.remoteTLogPolicy = simconfig.db.getRemoteTLogPolicy();
g_simulator.usableRegions = simconfig.db.usableRegions;
if(simconfig.db.regions.size() == 2) {
g_simulator.primaryDcId = simconfig.db.regions[0].dcId;
g_simulator.remoteDcId = simconfig.db.regions[1].dcId;
g_simulator.hasSatelliteReplication = simconfig.db.regions[0].satelliteTLogReplicationFactor > 0;
ASSERT((!simconfig.db.regions[0].satelliteTLogPolicy && !simconfig.db.regions[1].satelliteTLogPolicy) || simconfig.db.regions[0].satelliteTLogPolicy->info() == simconfig.db.regions[1].satelliteTLogPolicy->info());
g_simulator.satelliteTLogPolicy = simconfig.db.regions[0].satelliteTLogPolicy;
g_simulator.satelliteTLogWriteAntiQuorum = simconfig.db.regions[0].satelliteTLogWriteAntiQuorum;
for(auto s : simconfig.db.regions[0].satellites) {
g_simulator.primarySatelliteDcIds.push_back(s.dcId);
}
for(auto s : simconfig.db.regions[1].satellites) {
g_simulator.remoteSatelliteDcIds.push_back(s.dcId);
}
} else if(simconfig.db.regions.size() == 1) {
if(simconfig.db.regions.size() > 0) {
g_simulator.primaryDcId = simconfig.db.regions[0].dcId;
g_simulator.hasSatelliteReplication = simconfig.db.regions[0].satelliteTLogReplicationFactor > 0;
g_simulator.satelliteTLogPolicy = simconfig.db.regions[0].satelliteTLogPolicy;
g_simulator.satelliteTLogWriteAntiQuorum = simconfig.db.regions[0].satelliteTLogWriteAntiQuorum;
if(simconfig.db.regions[0].satelliteTLogUsableDcsFallback > 0) {
g_simulator.satelliteTLogPolicy = simconfig.db.regions[0].satelliteTLogPolicyFallback;
g_simulator.satelliteTLogWriteAntiQuorum = simconfig.db.regions[0].satelliteTLogWriteAntiQuorumFallback;
} else {
g_simulator.satelliteTLogPolicy = simconfig.db.regions[0].satelliteTLogPolicy;
g_simulator.satelliteTLogWriteAntiQuorum = simconfig.db.regions[0].satelliteTLogWriteAntiQuorum;
}
for(auto s : simconfig.db.regions[0].satellites) {
g_simulator.primarySatelliteDcIds.push_back(s.dcId);
@ -962,7 +955,16 @@ void setupSimulatedSystem( vector<Future<Void>> *systemActors, std::string baseF
g_simulator.hasSatelliteReplication = false;
g_simulator.satelliteTLogWriteAntiQuorum = 0;
}
if(simconfig.db.regions.size() == 2) {
g_simulator.remoteDcId = simconfig.db.regions[1].dcId;
ASSERT((!simconfig.db.regions[0].satelliteTLogPolicy && !simconfig.db.regions[1].satelliteTLogPolicy) || simconfig.db.regions[0].satelliteTLogPolicy->info() == simconfig.db.regions[1].satelliteTLogPolicy->info());
for(auto s : simconfig.db.regions[1].satellites) {
g_simulator.remoteSatelliteDcIds.push_back(s.dcId);
}
}
ASSERT(g_simulator.storagePolicy && g_simulator.tLogPolicy);
ASSERT(!g_simulator.hasSatelliteReplication || g_simulator.satelliteTLogPolicy);
TraceEvent("SimulatorConfig").detail("ConfigString", printable(StringRef(startingConfigString)));

View File

@ -75,137 +75,28 @@ extern int limitReasonEnd;
extern const char* limitReasonName[];
extern const char* limitReasonDesc[];
// Returns -1 if it fails to find a quoted string at the start of xml; returns the position beyond the close quote
// If decoded is not NULL, writes the decoded attribute value there
int decodeQuotedAttributeValue( StringRef xml, std::string* decoded ) {
if (decoded) decoded->clear();
if (!xml.size() || xml[0] != '"') return -1;
int pos = 1;
struct WorkerEvents : std::map<NetworkAddress, TraceEventFields> {};
loop {
if (pos == xml.size()) return -1; // No closing quote
if (xml[pos]=='"') { pos++; break; } // Success
uint8_t out = xml[pos];
if (xml[pos] == '&') {
if (xml.substr(pos).startsWith(LiteralStringRef("&amp;"))) { out = '&'; pos += 5; }
else if (xml.substr(pos).startsWith(LiteralStringRef("&lt;"))) { out = '<'; pos += 4; }
else if (xml.substr(pos).startsWith(LiteralStringRef("&quot;"))) { out = '"'; pos += 6; }
else return -1;
} else
pos++;
if (decoded) decoded->push_back(out);
}
return pos;
}
// return false on failure; outputs decoded attribute value to `ret`
bool tryExtractAttribute( StringRef expanded, StringRef attributeToExtract, std::string& ret ) {
// This is only expected to parse the XML that Trace.cpp actually generates; we haven't looked at the standard to even find out what it doesn't try to do
int pos = 0;
// Consume '<'
if (pos == expanded.size() || expanded[pos] != '<') return false;
pos++;
// Consume tag name
while (pos != expanded.size() && expanded[pos] != ' ' && expanded[pos] != '/' && expanded[pos] != '>') pos++;
while (pos != expanded.size() && expanded[pos] != '>' && expanded[pos] != '/') {
// Consume whitespace
while (pos != expanded.size() && expanded[pos] == ' ') pos++;
// We should be looking at an attribute or the end of the string; find '=' at the end of the attribute, if any
int eq_or_end = pos;
while (eq_or_end != expanded.size() && expanded[eq_or_end]!='=' && expanded[eq_or_end]!='>') eq_or_end++;
if ( expanded.substr(pos, eq_or_end-pos) == attributeToExtract ) {
// Found the attribute we want; decode the value
int end = decodeQuotedAttributeValue(expanded.substr(eq_or_end+1), &ret);
if (end<0) { ret.clear(); return false; }
return true;
}
// We don't want this attribute, but we need to skip over its value
// It looks like this *could* just be a scan for '"' characters
int end = decodeQuotedAttributeValue(expanded.substr(eq_or_end+1), NULL);
if (end<0) return false;
pos = (eq_or_end+1)+end;
}
return false;
}
// Throws attribute_not_found if the key is not found
std::string extractAttribute( StringRef expanded, StringRef attributeToExtract ) {
std::string ret;
if (!tryExtractAttribute(expanded, attributeToExtract, ret))
throw attribute_not_found();
return ret;
}
std::string extractAttribute( std::string const& expanded, std::string const& attributeToExtract ) {
return extractAttribute(StringRef(expanded), StringRef(attributeToExtract));
}
TEST_CASE("fdbserver/Status/extractAttribute/basic") {
std::string a;
ASSERT( tryExtractAttribute(
LiteralStringRef("<Foo A=\"&quot;a&quot;\" B=\"\" />"),
LiteralStringRef("A"),
a) && a == LiteralStringRef("\"a\""));
ASSERT( tryExtractAttribute(
LiteralStringRef("<Foo A=\"&quot;a&quot;\" B=\"\\\" />"),
LiteralStringRef("B"),
a) && a == LiteralStringRef("\\") );
ASSERT( tryExtractAttribute(
LiteralStringRef("<Event Severity=\"10\" Time=\"1415124565.129695\" Type=\"ProgramStart\" Machine=\"10.0.0.85:6863\" ID=\"0000000000000000\" RandomSeed=\"-2044671207\" SourceVersion=\"675cd9579467+ tip\" Version=\"3.0.0-PRERELEASE\" PackageName=\"3.0\" DataFolder=\"\" ConnectionString=\"circus:81060aa85f0a5b5b@10.0.0.5:4000,10.0.0.17:4000,10.0.0.78:4000,10.0.0.162:4000,10.0.0.182:4000\" ActualTime=\"1415124565\" CommandLine=\"fdbserver -r multitest -p auto:6863 -f /tmp/circus/testspec.txt --num_testers 24 --logdir /tmp/circus/multitest\" BuggifyEnabled=\"0\"/>"),
LiteralStringRef("Version"),
a) && a == LiteralStringRef("3.0.0-PRERELEASE") );
ASSERT( !tryExtractAttribute(
LiteralStringRef("<Event Severity=\"10\" Time=\"1415124565.129695\" Type=\"ProgramStart\" Machine=\"10.0.0.85:6863\" ID=\"0000000000000000\" RandomSeed=\"-2044671207\" SourceVersion=\"675cd9579467+ tip\" Version=\"3.0.0-PRERELEASE\" PackageName=\"3.0\" DataFolder=\"\" ConnectionString=\"circus:81060aa85f0a5b5b@10.0.0.5:4000,10.0.0.17:4000,10.0.0.78:4000,10.0.0.162:4000,10.0.0.182:4000\" ActualTime=\"1415124565\" CommandLine=\"fdbserver -r multitest -p auto:6863 -f /tmp/circus/testspec.txt --num_testers 24 --logdir /tmp/circus/multitest\" BuggifyEnabled=\"0\"/>"),
LiteralStringRef("ersion"),
a) );
return Void();
}
TEST_CASE("fdbserver/Status/extractAttribute/fuzz") {
// This is just looking for anything that crashes or infinite loops
std::string out;
for(int i=0; i<100000; i++)
{
std::string s = "<Event Severity=\"10\" Time=\"1415124565.129695\" Type=\"Program &quot;Start&quot;\" Machine=\"10.0.0.85:6863\" ID=\"0000000000000000\" RandomSeed=\"-2044671207\" SourceVersion=\"675cd9579467+ tip\" Version=\"3.0.0-PRERELEASE\" PackageName=\"3.0\" DataFolder=\"\" ConnectionString=\"circus:81060aa85f0a5b5b@10.0.0.5:4000,10.0.0.17:4000,10.0.0.78:4000,10.0.0.162:4000,10.0.0.182:4000\" ActualTime=\"1415124565\" CommandLine=\"fdbserver -r multitest -p auto:6863 -f /tmp/circus/testspec.txt --num_testers 24 --logdir /tmp/circus/multitest\" BuggifyEnabled=\"0\"/>";
s[ g_random->randomInt(0, s.size()) ] = g_random->randomChoice(LiteralStringRef("\" =q0\\&"));
tryExtractAttribute(s, LiteralStringRef("Version"), out);
}
return Void();
}
struct WorkerEvents : std::map<NetworkAddress, std::string> {};
ACTOR static Future< Optional<std::string> > latestEventOnWorker(WorkerInterface worker, std::string eventName) {
ACTOR static Future< Optional<TraceEventFields> > latestEventOnWorker(WorkerInterface worker, std::string eventName) {
try {
EventLogRequest req = eventName.size() > 0 ? EventLogRequest(Standalone<StringRef>(eventName)) : EventLogRequest();
ErrorOr<Standalone<StringRef>> eventTrace = wait( errorOr(timeoutError(worker.eventLogRequest.getReply(req), 2.0)));
ErrorOr<TraceEventFields> eventTrace = wait( errorOr(timeoutError(worker.eventLogRequest.getReply(req), 2.0)));
if (eventTrace.isError()){
return Optional<std::string>();
return Optional<TraceEventFields>();
}
return eventTrace.get().toString();
return eventTrace.get();
}
catch (Error &e){
if (e.code() == error_code_actor_cancelled)
throw;
return Optional<std::string>();
return Optional<TraceEventFields>();
}
}
ACTOR static Future< Optional< std::pair<WorkerEvents, std::set<std::string>> > > latestEventOnWorkers(std::vector<std::pair<WorkerInterface, ProcessClass>> workers, std::string eventName) {
try {
state vector<Future<ErrorOr<Standalone<StringRef>>>> eventTraces;
state vector<Future<ErrorOr<TraceEventFields>>> eventTraces;
for (int c = 0; c < workers.size(); c++) {
EventLogRequest req = eventName.size() > 0 ? EventLogRequest(Standalone<StringRef>(eventName)) : EventLogRequest();
eventTraces.push_back(errorOr(timeoutError(workers[c].first.eventLogRequest.getReply(req), 2.0)));
@ -217,13 +108,13 @@ ACTOR static Future< Optional< std::pair<WorkerEvents, std::set<std::string>> >
WorkerEvents results;
for (int i = 0; i < eventTraces.size(); i++) {
ErrorOr<Standalone<StringRef>> v = eventTraces[i].get();
const ErrorOr<TraceEventFields>& v = eventTraces[i].get();
if (v.isError()){
failed.insert(workers[i].first.address().toString());
results[workers[i].first.address()] = "";
results[workers[i].first.address()] = TraceEventFields();
}
else {
results[workers[i].first.address()] = v.get().toString();
results[workers[i].first.address()] = v.get();
}
}
@ -342,21 +233,21 @@ static StatusObject getLocalityInfo(const LocalityData& locality) {
return localityObj;
}
static StatusObject getError(std::string error) {
static StatusObject getError(const TraceEventFields& errorFields) {
StatusObject statusObj;
try {
if (error.size()) {
double time = atof(extractAttribute(error, "Time").c_str());
if (errorFields.size()) {
double time = atof(errorFields.getValue("Time").c_str());
statusObj["time"] = time;
statusObj["raw_log_message"] = error;
statusObj["raw_log_message"] = errorFields.toString();
std::string type = extractAttribute(error, "Type");
std::string type = errorFields.getValue("Type");
statusObj["type"] = type;
std::string description = type;
std::string errorName;
if (tryExtractAttribute(error, LiteralStringRef("Error"), errorName)) {
if(errorFields.tryGetValue("Error", errorName)) {
statusObj["name"] = errorName;
description += ": " + errorName;
}
@ -374,7 +265,7 @@ static StatusObject getError(std::string error) {
}
}
catch (Error &e){
TraceEvent(SevError, "StatusGetErrorError").error(e).detail("RawError", error);
TraceEvent(SevError, "StatusGetErrorError").error(e).detail("RawError", errorFields.toString());
}
return statusObj;
}
@ -385,7 +276,7 @@ static StatusObject machineStatusFetcher(WorkerEvents mMetrics, vector<std::pair
int failed = 0;
// map from machine networkAddress to datacenter ID
WorkerEvents dcIds;
std::map<NetworkAddress, std::string> dcIds;
std::map<NetworkAddress, LocalityData> locality;
for (auto worker : workers){
@ -401,12 +292,12 @@ static StatusObject machineStatusFetcher(WorkerEvents mMetrics, vector<std::pair
}
StatusObject statusObj; // Represents the status for a machine
std::string event = it->second;
const TraceEventFields& event = it->second;
try {
std::string address = toIPString(it->first.ip);
// We will use the "physical" caluculated machine ID here to limit exposure to machineID repurposing
std::string machineId = extractAttribute(event, "MachineID");
std::string machineId = event.getValue("MachineID");
// If this machine ID does not already exist in the machineMap, add it
if (!machineMap.count(machineId)) {
@ -424,23 +315,23 @@ static StatusObject machineStatusFetcher(WorkerEvents mMetrics, vector<std::pair
StatusObject memoryObj;
metric = parseDouble(extractAttribute(event, "TotalMemory"));
metric = parseDouble(event.getValue("TotalMemory"));
memoryObj["total_bytes"] = metric;
metric = parseDouble(extractAttribute(event, "CommittedMemory"));
metric = parseDouble(event.getValue("CommittedMemory"));
memoryObj["committed_bytes"] = metric;
metric = parseDouble(extractAttribute(event, "AvailableMemory"));
metric = parseDouble(event.getValue("AvailableMemory"));
memoryObj["free_bytes"] = metric;
statusObj["memory"] = memoryObj;
StatusObject cpuObj;
metric = parseDouble(extractAttribute(event, "CPUSeconds"));
metric = parseDouble(event.getValue("CPUSeconds"));
double cpu_seconds = metric;
metric = parseDouble(extractAttribute(event, "Elapsed"));
metric = parseDouble(event.getValue("Elapsed"));
double elapsed = metric;
if (elapsed > 0){
@ -451,17 +342,17 @@ static StatusObject machineStatusFetcher(WorkerEvents mMetrics, vector<std::pair
StatusObject networkObj;
metric = parseDouble(extractAttribute(event, "MbpsSent"));
metric = parseDouble(event.getValue("MbpsSent"));
StatusObject megabits_sent;
megabits_sent["hz"] = metric;
networkObj["megabits_sent"] = megabits_sent;
metric = parseDouble(extractAttribute(event, "MbpsReceived"));
metric = parseDouble(event.getValue("MbpsReceived"));
StatusObject megabits_received;
megabits_received["hz"] = metric;
networkObj["megabits_received"] = megabits_received;
metric = parseDouble(extractAttribute(event, "RetransSegs"));
metric = parseDouble(event.getValue("RetransSegs"));
StatusObject retransSegsObj;
if (elapsed > 0){
retransSegsObj["hz"] = metric / elapsed;
@ -512,50 +403,62 @@ struct RolesInfo {
obj["role"] = role;
return roles.insert( make_pair(address, obj ))->second;
}
StatusObject& addRole(std::string const& role, StorageServerInterface& iface, std::string const& metrics, Version maxTLogVersion) {
StatusObject& addRole(std::string const& role, StorageServerInterface& iface, TraceEventFields const& metrics, Version maxTLogVersion) {
StatusObject obj;
obj["id"] = iface.id().shortString();
obj["role"] = role;
try {
obj["stored_bytes"] = parseInt64(extractAttribute(metrics, "BytesStored"));
obj["kvstore_used_bytes"] = parseInt64(extractAttribute(metrics, "KvstoreBytesUsed"));
obj["kvstore_free_bytes"] = parseInt64(extractAttribute(metrics, "KvstoreBytesFree"));
obj["kvstore_available_bytes"] = parseInt64(extractAttribute(metrics, "KvstoreBytesAvailable"));
obj["kvstore_total_bytes"] = parseInt64(extractAttribute(metrics, "KvstoreBytesTotal"));
obj["input_bytes"] = parseCounter(extractAttribute(metrics, "BytesInput"));
obj["durable_bytes"] = parseCounter(extractAttribute(metrics, "BytesDurable"));
obj["query_queue_max"] = parseInt(extractAttribute(metrics, "QueryQueueMax"));
obj["finished_queries"] = parseCounter(extractAttribute(metrics, "FinishedQueries"));
obj["stored_bytes"] = parseInt64(metrics.getValue("BytesStored"));
obj["kvstore_used_bytes"] = parseInt64(metrics.getValue("KvstoreBytesUsed"));
obj["kvstore_free_bytes"] = parseInt64(metrics.getValue("KvstoreBytesFree"));
obj["kvstore_available_bytes"] = parseInt64(metrics.getValue("KvstoreBytesAvailable"));
obj["kvstore_total_bytes"] = parseInt64(metrics.getValue("KvstoreBytesTotal"));
obj["input_bytes"] = parseCounter(metrics.getValue("BytesInput"));
obj["durable_bytes"] = parseCounter(metrics.getValue("BytesDurable"));
obj["query_queue_max"] = parseInt(metrics.getValue("QueryQueueMax"));
obj["finished_queries"] = parseCounter(metrics.getValue("FinishedQueries"));
Version version = parseInt64(extractAttribute(metrics, "Version"));
Version version = parseInt64(metrics.getValue("Version"));
obj["data_version"] = version;
int64_t versionLag = parseInt64(metrics.getValue("VersionLag"));
if(maxTLogVersion > 0) {
obj["data_version_lag"] = std::max<Version>(0, maxTLogVersion - version);
// It's possible that the storage server hasn't talked to the logs recently, in which case it may not be aware of how far behind it is.
// To account for that, we also compute the version difference between each storage server and the tlog with the largest version.
//
// Because this data is only logged periodically, this difference will likely be an overestimate for the lag. We subtract off the logging interval
// in order to make this estimate a bounded underestimate instead.
versionLag = std::max<int64_t>(versionLag, maxTLogVersion - version - SERVER_KNOBS->STORAGE_LOGGING_DELAY * SERVER_KNOBS->VERSIONS_PER_SECOND);
}
StatusObject dataLag;
dataLag["versions"] = versionLag;
dataLag["seconds"] = versionLag / (double)SERVER_KNOBS->VERSIONS_PER_SECOND;
obj["data_lag"] = dataLag;
} catch (Error& e) {
if(e.code() != error_code_attribute_not_found)
throw e;
}
return roles.insert( make_pair(iface.address(), obj ))->second;
}
StatusObject& addRole(std::string const& role, TLogInterface& iface, std::string const& metrics) {
StatusObject& addRole(std::string const& role, TLogInterface& iface, TraceEventFields const& metrics) {
StatusObject obj;
obj["id"] = iface.id().shortString();
obj["role"] = role;
try {
obj["kvstore_used_bytes"] = parseInt64(extractAttribute(metrics, "KvstoreBytesUsed"));
obj["kvstore_free_bytes"] = parseInt64(extractAttribute(metrics, "KvstoreBytesFree"));
obj["kvstore_available_bytes"] = parseInt64(extractAttribute(metrics, "KvstoreBytesAvailable"));
obj["kvstore_total_bytes"] = parseInt64(extractAttribute(metrics, "KvstoreBytesTotal"));
obj["queue_disk_used_bytes"] = parseInt64(extractAttribute(metrics, "QueueDiskBytesUsed"));
obj["queue_disk_free_bytes"] = parseInt64(extractAttribute(metrics, "QueueDiskBytesFree"));
obj["queue_disk_available_bytes"] = parseInt64(extractAttribute(metrics, "QueueDiskBytesAvailable"));
obj["queue_disk_total_bytes"] = parseInt64(extractAttribute(metrics, "QueueDiskBytesTotal"));
obj["input_bytes"] = parseCounter(extractAttribute(metrics, "BytesInput"));
obj["durable_bytes"] = parseCounter(extractAttribute(metrics, "BytesDurable"));
obj["data_version"] = parseInt64(extractAttribute(metrics, "Version"));
obj["kvstore_used_bytes"] = parseInt64(metrics.getValue("KvstoreBytesUsed"));
obj["kvstore_free_bytes"] = parseInt64(metrics.getValue("KvstoreBytesFree"));
obj["kvstore_available_bytes"] = parseInt64(metrics.getValue("KvstoreBytesAvailable"));
obj["kvstore_total_bytes"] = parseInt64(metrics.getValue("KvstoreBytesTotal"));
obj["queue_disk_used_bytes"] = parseInt64(metrics.getValue("QueueDiskBytesUsed"));
obj["queue_disk_free_bytes"] = parseInt64(metrics.getValue("QueueDiskBytesFree"));
obj["queue_disk_available_bytes"] = parseInt64(metrics.getValue("QueueDiskBytesAvailable"));
obj["queue_disk_total_bytes"] = parseInt64(metrics.getValue("QueueDiskBytesTotal"));
obj["input_bytes"] = parseCounter(metrics.getValue("BytesInput"));
obj["durable_bytes"] = parseCounter(metrics.getValue("BytesDurable"));
obj["data_version"] = parseInt64(metrics.getValue("Version"));
} catch (Error& e) {
if(e.code() != error_code_attribute_not_found)
throw e;
@ -586,8 +489,8 @@ ACTOR static Future<StatusObject> processStatusFetcher(
WorkerEvents traceFileOpenErrors,
WorkerEvents programStarts,
std::map<std::string, StatusObject> processIssues,
vector<std::pair<StorageServerInterface, std::string>> storageServers,
vector<std::pair<TLogInterface, std::string>> tLogs,
vector<std::pair<StorageServerInterface, TraceEventFields>> storageServers,
vector<std::pair<TLogInterface, TraceEventFields>> tLogs,
Database cx,
Optional<DatabaseConfiguration> configuration,
std::set<std::string> *incomplete_reasons) {
@ -604,10 +507,10 @@ ACTOR static Future<StatusObject> processStatusFetcher(
Void _ = wait(yield());
if (traceFileErrorsItr->second.size()){
try {
// Have event string, parse it and turn it into a message object describing the trace file opening error
std::string event = traceFileErrorsItr->second;
std::string fileName = extractAttribute(event, "Filename");
StatusObject msgObj = makeMessage("file_open_error", format("Could not open file '%s' (%s).", fileName.c_str(), extractAttribute(event, "Error").c_str()).c_str());
// Have event fields, parse it and turn it into a message object describing the trace file opening error
const TraceEventFields& event = traceFileErrorsItr->second;
std::string fileName = event.getValue("Filename");
StatusObject msgObj = makeMessage("file_open_error", format("Could not open file '%s' (%s).", fileName.c_str(), event.getValue("Error").c_str()).c_str());
msgObj["file_name"] = fileName;
// Map the address of the worker to the error message object
@ -626,11 +529,11 @@ ACTOR static Future<StatusObject> processStatusFetcher(
state std::map<Optional<Standalone<StringRef>>, MachineMemoryInfo>::iterator memInfo = machineMemoryUsage.insert(std::make_pair(workerItr->first.locality.machineId(), MachineMemoryInfo())).first;
try {
ASSERT(pMetrics.count(workerItr->first.address()));
std::string processMetrics = pMetrics[workerItr->first.address()];
const TraceEventFields& processMetrics = pMetrics[workerItr->first.address()];
if(memInfo->second.valid()) {
if(processMetrics.size() > 0) {
memInfo->second.memoryUsage += parseDouble(extractAttribute(processMetrics, "Memory"));
memInfo->second.memoryUsage += parseDouble(processMetrics.getValue("Memory"));
++memInfo->second.numProcesses;
}
else
@ -656,7 +559,7 @@ ACTOR static Future<StatusObject> processStatusFetcher(
}
}
state std::vector<std::pair<TLogInterface, std::string>>::iterator log;
state std::vector<std::pair<TLogInterface, TraceEventFields>>::iterator log;
state Version maxTLogVersion = 0;
for(log = tLogs.begin(); log != tLogs.end(); ++log) {
StatusObject const& roleStatus = roles.addRole( "log", log->first, log->second );
@ -666,7 +569,7 @@ ACTOR static Future<StatusObject> processStatusFetcher(
Void _ = wait(yield());
}
state std::vector<std::pair<StorageServerInterface, std::string>>::iterator ss;
state std::vector<std::pair<StorageServerInterface, TraceEventFields>>::iterator ss;
state std::map<NetworkAddress, int64_t> ssLag;
for(ss = storageServers.begin(); ss != storageServers.end(); ++ss) {
StatusObject const& roleStatus = roles.addRole( "storage", ss->first, ss->second, maxTLogVersion );
@ -692,45 +595,45 @@ ACTOR static Future<StatusObject> processStatusFetcher(
processMap[printable(workerItr->first.locality.processId())] = StatusObject();
NetworkAddress address = workerItr->first.address();
std::string event = pMetrics[workerItr->first.address()];
const TraceEventFields& event = pMetrics[workerItr->first.address()];
statusObj["address"] = address.toString();
StatusObject memoryObj;
if (event.size() > 0) {
std::string zoneID = extractAttribute(event, "ZoneID");
std::string zoneID = event.getValue("ZoneID");
statusObj["fault_domain"] = zoneID;
std::string MachineID = extractAttribute(event, "MachineID");
std::string MachineID = event.getValue("MachineID");
statusObj["machine_id"] = MachineID;
statusObj["locality"] = getLocalityInfo(workerItr->first.locality);
statusObj["uptime_seconds"] = parseDouble(extractAttribute(event, "UptimeSeconds"));
statusObj["uptime_seconds"] = parseDouble(event.getValue("UptimeSeconds"));
metric = parseDouble(extractAttribute(event, "CPUSeconds"));
metric = parseDouble(event.getValue("CPUSeconds"));
double cpu_seconds = metric;
// rates are calculated over the last elapsed seconds
metric = parseDouble(extractAttribute(event, "Elapsed"));
metric = parseDouble(event.getValue("Elapsed"));
double elapsed = metric;
metric = parseDouble(extractAttribute(event, "DiskIdleSeconds"));
metric = parseDouble(event.getValue("DiskIdleSeconds"));
double diskIdleSeconds = metric;
metric = parseDouble(extractAttribute(event, "DiskReads"));
metric = parseDouble(event.getValue("DiskReads"));
double diskReads = metric;
metric = parseDouble(extractAttribute(event, "DiskWrites"));
metric = parseDouble(event.getValue("DiskWrites"));
double diskWrites = metric;
uint64_t diskReadsCount = parseInt64(extractAttribute(event, "DiskReadsCount"));
uint64_t diskReadsCount = parseInt64(event.getValue("DiskReadsCount"));
uint64_t diskWritesCount = parseInt64(extractAttribute(event, "DiskWritesCount"));
uint64_t diskWritesCount = parseInt64(event.getValue("DiskWritesCount"));
metric = parseDouble(extractAttribute(event, "DiskWriteSectors"));
metric = parseDouble(event.getValue("DiskWriteSectors"));
double diskWriteSectors = metric;
metric = parseDouble(extractAttribute(event, "DiskReadSectors"));
metric = parseDouble(event.getValue("DiskReadSectors"));
double diskReadSectors = metric;
StatusObject diskObj;
@ -757,39 +660,39 @@ ACTOR static Future<StatusObject> processStatusFetcher(
diskObj["writes"] = writesObj;
}
diskObj["total_bytes"] = parseInt64(extractAttribute(event, "DiskTotalBytes"));
diskObj["free_bytes"] = parseInt64(extractAttribute(event, "DiskFreeBytes"));
diskObj["total_bytes"] = parseInt64(event.getValue("DiskTotalBytes"));
diskObj["free_bytes"] = parseInt64(event.getValue("DiskFreeBytes"));
statusObj["disk"] = diskObj;
StatusObject networkObj;
networkObj["current_connections"] = parseInt64(extractAttribute(event, "CurrentConnections"));
networkObj["current_connections"] = parseInt64(event.getValue("CurrentConnections"));
StatusObject connections_established;
connections_established["hz"] = parseDouble(extractAttribute(event, "ConnectionsEstablished"));
connections_established["hz"] = parseDouble(event.getValue("ConnectionsEstablished"));
networkObj["connections_established"] = connections_established;
StatusObject connections_closed;
connections_closed["hz"] = parseDouble(extractAttribute(event, "ConnectionsClosed"));
connections_closed["hz"] = parseDouble(event.getValue("ConnectionsClosed"));
networkObj["connections_closed"] = connections_closed;
StatusObject connection_errors;
connection_errors["hz"] = parseDouble(extractAttribute(event, "ConnectionErrors"));
connection_errors["hz"] = parseDouble(event.getValue("ConnectionErrors"));
networkObj["connection_errors"] = connection_errors;
metric = parseDouble(extractAttribute(event, "MbpsSent"));
metric = parseDouble(event.getValue("MbpsSent"));
StatusObject megabits_sent;
megabits_sent["hz"] = metric;
networkObj["megabits_sent"] = megabits_sent;
metric = parseDouble(extractAttribute(event, "MbpsReceived"));
metric = parseDouble(event.getValue("MbpsReceived"));
StatusObject megabits_received;
megabits_received["hz"] = metric;
networkObj["megabits_received"] = megabits_received;
statusObj["network"] = networkObj;
metric = parseDouble(extractAttribute(event, "Memory"));
metric = parseDouble(event.getValue("Memory"));
memoryObj["used_bytes"] = metric;
metric = parseDouble(extractAttribute(event, "UnusedAllocatedMemory"));
metric = parseDouble(event.getValue("UnusedAllocatedMemory"));
memoryObj["unused_allocated_memory"] = metric;
}
@ -797,16 +700,16 @@ ACTOR static Future<StatusObject> processStatusFetcher(
auto const& psxml = programStarts.at(address);
if(psxml.size() > 0) {
int64_t memLimit = parseInt64(extractAttribute(psxml, "MemoryLimit"));
int64_t memLimit = parseInt64(psxml.getValue("MemoryLimit"));
memoryObj["limit_bytes"] = memLimit;
std::string version;
if (tryExtractAttribute(psxml, LiteralStringRef("Version"), version)) {
if (psxml.tryGetValue("Version", version)) {
statusObj["version"] = version;
}
std::string commandLine;
if (tryExtractAttribute(psxml, LiteralStringRef("CommandLine"), commandLine)) {
if (psxml.tryGetValue("CommandLine", commandLine)) {
statusObj["command_line"] = commandLine;
}
}
@ -815,7 +718,7 @@ ACTOR static Future<StatusObject> processStatusFetcher(
// if this process address is in the machine metrics
if (mMetrics.count(address) && mMetrics[address].size()){
double availableMemory;
availableMemory = parseDouble(extractAttribute(mMetrics[address], "AvailableMemory"));
availableMemory = parseDouble(mMetrics[address].getValue("AvailableMemory"));
auto machineMemInfo = machineMemoryUsage[workerItr->first.locality.machineId()];
if (machineMemInfo.valid()) {
@ -918,8 +821,8 @@ ACTOR static Future<StatusObject> recoveryStateStatusFetcher(std::pair<WorkerInt
state StatusObject message;
try {
Standalone<StringRef> md = wait( timeoutError(mWorker.first.eventLogRequest.getReply( EventLogRequest( LiteralStringRef("MasterRecoveryState") ) ), 1.0) );
state int mStatusCode = parseInt( extractAttribute(md, LiteralStringRef("StatusCode")) );
TraceEventFields md = wait( timeoutError(mWorker.first.eventLogRequest.getReply( EventLogRequest( LiteralStringRef("MasterRecoveryState") ) ), 1.0) );
state int mStatusCode = parseInt( md.getValue("StatusCode") );
if (mStatusCode < 0 || mStatusCode >= RecoveryStatus::END)
throw attribute_not_found();
@ -927,9 +830,9 @@ ACTOR static Future<StatusObject> recoveryStateStatusFetcher(std::pair<WorkerInt
// Add additional metadata for certain statuses
if (mStatusCode == RecoveryStatus::recruiting_transaction_servers) {
int requiredLogs = atoi( extractAttribute(md, LiteralStringRef("RequiredTLogs")).c_str() );
int requiredProxies = atoi( extractAttribute(md, LiteralStringRef("RequiredProxies")).c_str() );
int requiredResolvers = atoi( extractAttribute(md, LiteralStringRef("RequiredResolvers")).c_str() );
int requiredLogs = atoi( md.getValue("RequiredTLogs").c_str() );
int requiredProxies = atoi( md.getValue("RequiredProxies").c_str() );
int requiredResolvers = atoi( md.getValue("RequiredResolvers").c_str() );
//int requiredProcesses = std::max(requiredLogs, std::max(requiredResolvers, requiredProxies));
//int requiredMachines = std::max(requiredLogs, 1);
@ -937,7 +840,7 @@ ACTOR static Future<StatusObject> recoveryStateStatusFetcher(std::pair<WorkerInt
message["required_proxies"] = requiredProxies;
message["required_resolvers"] = requiredResolvers;
} else if (mStatusCode == RecoveryStatus::locking_old_transaction_servers) {
message["missing_logs"] = extractAttribute(md, LiteralStringRef("MissingIDs")).c_str();
message["missing_logs"] = md.getValue("MissingIDs").c_str();
}
// TODO: time_in_recovery: 0.5
// time_in_state: 0.1
@ -1165,32 +1068,32 @@ ACTOR static Future<StatusObject> dataStatusFetcher(std::pair<WorkerInterface, P
state StatusObject statusObjData;
try {
std::vector<Future<Standalone<StringRef>>> futures;
std::vector<Future<TraceEventFields>> futures;
// TODO: Should this be serial?
futures.push_back(timeoutError(mWorker.first.eventLogRequest.getReply(EventLogRequest(StringRef(dbName + "/DDTrackerStarting"))), 1.0));
futures.push_back(timeoutError(mWorker.first.eventLogRequest.getReply(EventLogRequest(StringRef(dbName + "/DDTrackerStats"))), 1.0));
std::vector<Standalone<StringRef>> dataInfo = wait(getAll(futures));
std::vector<TraceEventFields> dataInfo = wait(getAll(futures));
Standalone<StringRef> startingStats = dataInfo[0];
state Standalone<StringRef> dataStats = dataInfo[1];
TraceEventFields startingStats = dataInfo[0];
state TraceEventFields dataStats = dataInfo[1];
if (startingStats.size() && extractAttribute(startingStats, LiteralStringRef("State")) != "Active") {
if (startingStats.size() && startingStats.getValue("State") != "Active") {
stateSectionObj["name"] = "initializing";
stateSectionObj["description"] = "(Re)initializing automatic data distribution";
}
else {
state Standalone<StringRef> md = wait(timeoutError(mWorker.first.eventLogRequest.getReply(EventLogRequest(StringRef(dbName + "/MovingData"))), 1.0));
state TraceEventFields md = wait(timeoutError(mWorker.first.eventLogRequest.getReply(EventLogRequest(StringRef(dbName + "/MovingData"))), 1.0));
// If we have a MovingData message, parse it.
if (md.size())
{
int64_t partitionsInQueue = parseInt64(extractAttribute(md, LiteralStringRef("InQueue")));
int64_t partitionsInFlight = parseInt64(extractAttribute(md, LiteralStringRef("InFlight")));
int64_t averagePartitionSize = parseInt64(extractAttribute(md, LiteralStringRef("AverageShardSize")));
int64_t totalBytesWritten = parseInt64(extractAttribute(md, LiteralStringRef("BytesWritten")));
int highestPriority = parseInt(extractAttribute(md, LiteralStringRef("HighestPriority")));
int64_t partitionsInQueue = parseInt64(md.getValue("InQueue"));
int64_t partitionsInFlight = parseInt64(md.getValue("InFlight"));
int64_t averagePartitionSize = parseInt64(md.getValue("AverageShardSize"));
int64_t totalBytesWritten = parseInt64(md.getValue("BytesWritten"));
int highestPriority = parseInt(md.getValue("HighestPriority"));
if( averagePartitionSize >= 0 ) {
StatusObject moving_data;
@ -1253,9 +1156,9 @@ ACTOR static Future<StatusObject> dataStatusFetcher(std::pair<WorkerInterface, P
if (dataStats.size())
{
int64_t totalDBBytes = parseInt64(extractAttribute(dataStats, LiteralStringRef("TotalSizeBytes")));
int64_t totalDBBytes = parseInt64(dataStats.getValue("TotalSizeBytes"));
statusObjData["total_kv_size_bytes"] = totalDBBytes;
int shards = parseInt(extractAttribute(dataStats, LiteralStringRef("Shards")));
int shards = parseInt(dataStats.getValue("Shards"));
statusObjData["partitions_count"] = shards;
}
@ -1287,30 +1190,30 @@ namespace std
}
ACTOR template <class iface>
static Future<vector<std::pair<iface, std::string>>> getServerMetrics(vector<iface> servers, std::unordered_map<NetworkAddress, WorkerInterface> address_workers, std::string suffix) {
state vector<Future<Optional<std::string>>> futures;
static Future<vector<std::pair<iface, TraceEventFields>>> getServerMetrics(vector<iface> servers, std::unordered_map<NetworkAddress, WorkerInterface> address_workers, std::string suffix) {
state vector<Future<Optional<TraceEventFields>>> futures;
for (auto s : servers) {
futures.push_back(latestEventOnWorker(address_workers[s.address()], s.id().toString() + suffix));
}
Void _ = wait(waitForAll(futures));
vector<std::pair<iface, std::string>> results;
vector<std::pair<iface, TraceEventFields>> results;
for (int i = 0; i < servers.size(); i++) {
results.push_back(std::make_pair(servers[i], futures[i].get().present() ? futures[i].get().get() : ""));
results.push_back(std::make_pair(servers[i], futures[i].get().present() ? futures[i].get().get() : TraceEventFields()));
}
return results;
}
ACTOR static Future<vector<std::pair<StorageServerInterface, std::string>>> getStorageServersAndMetrics(Database cx, std::unordered_map<NetworkAddress, WorkerInterface> address_workers) {
ACTOR static Future<vector<std::pair<StorageServerInterface, TraceEventFields>>> getStorageServersAndMetrics(Database cx, std::unordered_map<NetworkAddress, WorkerInterface> address_workers) {
vector<StorageServerInterface> servers = wait(timeoutError(getStorageServers(cx, true), 5.0));
vector<std::pair<StorageServerInterface, std::string>> results = wait(getServerMetrics(servers, address_workers, "/StorageMetrics"));
vector<std::pair<StorageServerInterface, TraceEventFields>> results = wait(getServerMetrics(servers, address_workers, "/StorageMetrics"));
return results;
}
ACTOR static Future<vector<std::pair<TLogInterface, std::string>>> getTLogsAndMetrics(Reference<AsyncVar<struct ServerDBInfo>> db, std::unordered_map<NetworkAddress, WorkerInterface> address_workers) {
ACTOR static Future<vector<std::pair<TLogInterface, TraceEventFields>>> getTLogsAndMetrics(Reference<AsyncVar<struct ServerDBInfo>> db, std::unordered_map<NetworkAddress, WorkerInterface> address_workers) {
vector<TLogInterface> servers = db->get().logSystemConfig.allPresentLogs();
vector<std::pair<TLogInterface, std::string>> results = wait(getServerMetrics(servers, address_workers, "/TLogMetrics"));
vector<std::pair<TLogInterface, TraceEventFields>> results = wait(getServerMetrics(servers, address_workers, "/TLogMetrics"));
return results;
}
@ -1334,6 +1237,7 @@ static int getExtraTLogEligibleMachines(vector<std::pair<WorkerInterface, Proces
int extraTlogEligibleMachines = std::numeric_limits<int>::max();
for(auto& region : configuration.regions) {
extraTlogEligibleMachines = std::min<int>( extraTlogEligibleMachines, dcId_machine[region.dcId].size() - std::max(configuration.remoteTLogReplicationFactor, std::max(configuration.tLogReplicationFactor, configuration.storageTeamSize) ) );
//FIXME: does not take into account fallback satellite policies
if(region.satelliteTLogReplicationFactor > 0) {
int totalSatelliteEligible = 0;
for(auto& sat : region.satellites) {
@ -1346,7 +1250,7 @@ static int getExtraTLogEligibleMachines(vector<std::pair<WorkerInterface, Proces
}
ACTOR static Future<StatusObject> workloadStatusFetcher(Reference<AsyncVar<struct ServerDBInfo>> db, vector<std::pair<WorkerInterface, ProcessClass>> workers, std::pair<WorkerInterface, ProcessClass> mWorker,
std::string dbName, StatusObject *qos, StatusObject *data_overlay, std::set<std::string> *incomplete_reasons, Future<ErrorOr<vector<std::pair<StorageServerInterface, std::string>>>> storageServerFuture)
std::string dbName, StatusObject *qos, StatusObject *data_overlay, std::set<std::string> *incomplete_reasons, Future<ErrorOr<vector<std::pair<StorageServerInterface, TraceEventFields>>>> storageServerFuture)
{
state StatusObject statusObj;
state StatusObject operationsObj;
@ -1355,7 +1259,7 @@ ACTOR static Future<StatusObject> workloadStatusFetcher(Reference<AsyncVar<struc
// Writes and conflicts
try {
vector<Future<Standalone<StringRef>>> proxyStatFutures;
vector<Future<TraceEventFields>> proxyStatFutures;
std::map<NetworkAddress, std::pair<WorkerInterface, ProcessClass>> workersMap;
for (auto w : workers) {
workersMap[w.first.address()] = w;
@ -1367,16 +1271,16 @@ ACTOR static Future<StatusObject> workloadStatusFetcher(Reference<AsyncVar<struc
else
throw all_alternatives_failed(); // We need data from all proxies for this result to be trustworthy
}
vector<Standalone<StringRef>> proxyStats = wait(getAll(proxyStatFutures));
vector<TraceEventFields> proxyStats = wait(getAll(proxyStatFutures));
StatusObject mutations=makeCounter(), mutationBytes=makeCounter(), txnConflicts=makeCounter(), txnStartOut=makeCounter(), txnCommitOutSuccess=makeCounter();
for (auto &ps : proxyStats) {
mutations = addCounters( mutations, parseCounter(extractAttribute(ps, LiteralStringRef("Mutations"))) );
mutationBytes = addCounters( mutationBytes, parseCounter(extractAttribute(ps, LiteralStringRef("MutationBytes"))) );
txnConflicts = addCounters( txnConflicts, parseCounter(extractAttribute(ps, LiteralStringRef("TxnConflicts"))) );
txnStartOut = addCounters( txnStartOut, parseCounter(extractAttribute(ps, LiteralStringRef("TxnStartOut"))) );
txnCommitOutSuccess = addCounters( txnCommitOutSuccess, parseCounter(extractAttribute(ps, LiteralStringRef("TxnCommitOutSuccess"))) );
mutations = addCounters( mutations, parseCounter(ps.getValue("Mutations")) );
mutationBytes = addCounters( mutationBytes, parseCounter(ps.getValue("MutationBytes")) );
txnConflicts = addCounters( txnConflicts, parseCounter(ps.getValue("TxnConflicts")) );
txnStartOut = addCounters( txnStartOut, parseCounter(ps.getValue("TxnStartOut")) );
txnCommitOutSuccess = addCounters( txnCommitOutSuccess, parseCounter(ps.getValue("TxnCommitOutSuccess")) );
}
operationsObj["writes"] = mutations;
@ -1397,19 +1301,19 @@ ACTOR static Future<StatusObject> workloadStatusFetcher(Reference<AsyncVar<struc
// Transactions
try {
Standalone<StringRef> md = wait( timeoutError(mWorker.first.eventLogRequest.getReply( EventLogRequest(StringRef(dbName+"/RkUpdate") ) ), 1.0) );
double tpsLimit = parseDouble(extractAttribute(md, LiteralStringRef("TPSLimit")));
double transPerSec = parseDouble(extractAttribute(md, LiteralStringRef("ReleasedTPS")));
int ssCount = parseInt(extractAttribute(md, LiteralStringRef("StorageServers")));
int tlogCount = parseInt(extractAttribute(md, LiteralStringRef("TLogs")));
int64_t worstFreeSpaceStorageServer = parseInt64(extractAttribute(md, LiteralStringRef("WorstFreeSpaceStorageServer")));
int64_t worstFreeSpaceTLog = parseInt64(extractAttribute(md, LiteralStringRef("WorstFreeSpaceTLog")));
int64_t worstStorageServerQueue = parseInt64(extractAttribute(md, LiteralStringRef("WorstStorageServerQueue")));
int64_t limitingStorageServerQueue = parseInt64(extractAttribute(md, LiteralStringRef("LimitingStorageServerQueue")));
int64_t worstTLogQueue = parseInt64(extractAttribute(md, LiteralStringRef("WorstTLogQueue")));
int64_t totalDiskUsageBytes = parseInt64(extractAttribute(md, LiteralStringRef("TotalDiskUsageBytes")));
int64_t worstVersionLag = parseInt64(extractAttribute(md, LiteralStringRef("WorstStorageServerVersionLag")));
int64_t limitingVersionLag = parseInt64(extractAttribute(md, LiteralStringRef("LimitingStorageServerVersionLag")));
TraceEventFields md = wait( timeoutError(mWorker.first.eventLogRequest.getReply( EventLogRequest(StringRef(dbName+"/RkUpdate") ) ), 1.0) );
double tpsLimit = parseDouble(md.getValue("TPSLimit"));
double transPerSec = parseDouble(md.getValue("ReleasedTPS"));
int ssCount = parseInt(md.getValue("StorageServers"));
int tlogCount = parseInt(md.getValue("TLogs"));
int64_t worstFreeSpaceStorageServer = parseInt64(md.getValue("WorstFreeSpaceStorageServer"));
int64_t worstFreeSpaceTLog = parseInt64(md.getValue("WorstFreeSpaceTLog"));
int64_t worstStorageServerQueue = parseInt64(md.getValue("WorstStorageServerQueue"));
int64_t limitingStorageServerQueue = parseInt64(md.getValue("LimitingStorageServerQueue"));
int64_t worstTLogQueue = parseInt64(md.getValue("WorstTLogQueue"));
int64_t totalDiskUsageBytes = parseInt64(md.getValue("TotalDiskUsageBytes"));
int64_t worstVersionLag = parseInt64(md.getValue("WorstStorageServerVersionLag"));
int64_t limitingVersionLag = parseInt64(md.getValue("LimitingStorageServerVersionLag"));
(*data_overlay)["total_disk_used_bytes"] = totalDiskUsageBytes;
if(ssCount > 0) {
@ -1428,13 +1332,13 @@ ACTOR static Future<StatusObject> workloadStatusFetcher(Reference<AsyncVar<struc
(*qos)["transactions_per_second_limit"] = tpsLimit;
(*qos)["released_transactions_per_second"] = transPerSec;
int reason = parseInt(extractAttribute(md, LiteralStringRef("Reason")));
int reason = parseInt(md.getValue("Reason"));
StatusObject perfLimit;
if (transPerSec > tpsLimit * 0.8) {
// If reason is known, set qos.performance_limited_by, otherwise omit
if (reason >= 0 && reason < limitReasonEnd) {
perfLimit = makeMessage(limitReasonName[reason], limitReasonDesc[reason]);
std::string reason_server_id = extractAttribute(md, LiteralStringRef("ReasonServerID"));
std::string reason_server_id = md.getValue("ReasonServerID");
if (!reason_server_id.empty())
perfLimit["reason_server_id"] = reason_server_id;
}
@ -1455,7 +1359,7 @@ ACTOR static Future<StatusObject> workloadStatusFetcher(Reference<AsyncVar<struc
// Reads
try {
ErrorOr<vector<std::pair<StorageServerInterface, std::string>>> storageServers = wait(storageServerFuture);
ErrorOr<vector<std::pair<StorageServerInterface, TraceEventFields>>> storageServers = wait(storageServerFuture);
if(!storageServers.present()) {
throw storageServers.getError();
}
@ -1465,9 +1369,9 @@ ACTOR static Future<StatusObject> workloadStatusFetcher(Reference<AsyncVar<struc
StatusObject readBytes = makeCounter();
for(auto &ss : storageServers.get()) {
reads = addCounters(reads, parseCounter(extractAttribute(ss.second, LiteralStringRef("FinishedQueries"))));
readKeys = addCounters(readKeys, parseCounter(extractAttribute(ss.second, LiteralStringRef("RowsQueried"))));
readBytes = addCounters(readBytes, parseCounter(extractAttribute(ss.second, LiteralStringRef("BytesQueried"))));
reads = addCounters(reads, parseCounter(ss.second.getValue("FinishedQueries")));
readKeys = addCounters(readKeys, parseCounter(ss.second.getValue("RowsQueried")));
readBytes = addCounters(readBytes, parseCounter(ss.second.getValue("BytesQueried")));
}
operationsObj["reads"] = reads;
@ -1827,8 +1731,8 @@ ACTOR Future<StatusReply> clusterGetStatus(
}
state std::map<std::string, StatusObject> processIssues = getProcessIssuesAsMessages(workerIssues);
state vector<std::pair<StorageServerInterface, std::string>> storageServers;
state vector<std::pair<TLogInterface, std::string>> tLogs;
state vector<std::pair<StorageServerInterface, TraceEventFields>> storageServers;
state vector<std::pair<TLogInterface, TraceEventFields>> tLogs;
state StatusObject qos;
state StatusObject data_overlay;
@ -1863,8 +1767,8 @@ ACTOR Future<StatusReply> clusterGetStatus(
state std::unordered_map<NetworkAddress, WorkerInterface> address_workers;
for (auto worker : workers)
address_workers[worker.first.address()] = worker.first;
state Future<ErrorOr<vector<std::pair<StorageServerInterface, std::string>>>> storageServerFuture = errorOr(getStorageServersAndMetrics(cx, address_workers));
state Future<ErrorOr<vector<std::pair<TLogInterface, std::string>>>> tLogFuture = errorOr(getTLogsAndMetrics(db, address_workers));
state Future<ErrorOr<vector<std::pair<StorageServerInterface, TraceEventFields>>>> storageServerFuture = errorOr(getStorageServersAndMetrics(cx, address_workers));
state Future<ErrorOr<vector<std::pair<TLogInterface, TraceEventFields>>>> tLogFuture = errorOr(getTLogsAndMetrics(db, address_workers));
state int minReplicasRemaining = -1;
std::vector<Future<StatusObject>> futures2;
@ -1915,7 +1819,7 @@ ACTOR Future<StatusReply> clusterGetStatus(
}
// Need storage servers now for processStatusFetcher() below.
ErrorOr<vector<std::pair<StorageServerInterface, std::string>>> _storageServers = wait(storageServerFuture);
ErrorOr<vector<std::pair<StorageServerInterface, TraceEventFields>>> _storageServers = wait(storageServerFuture);
if (_storageServers.present()) {
storageServers = _storageServers.get();
}
@ -1923,7 +1827,7 @@ ACTOR Future<StatusReply> clusterGetStatus(
messages.push_back(makeMessage("storage_servers_error", "Timed out trying to retrieve storage servers."));
// ...also tlogs
ErrorOr<vector<std::pair<TLogInterface, std::string>>> _tLogs = wait(tLogFuture);
ErrorOr<vector<std::pair<TLogInterface, TraceEventFields>>> _tLogs = wait(tLogFuture);
if (_tLogs.present()) {
tLogs = _tLogs.get();
}

View File

@ -30,7 +30,6 @@
typedef std::map< NetworkAddress, std::pair<std::string,UID> > ProcessIssuesMap;
typedef std::map< NetworkAddress, Standalone<VectorRef<ClientVersionRef>> > ClientVersionMap;
std::string extractAttribute( std::string const& expanded, std::string const& attributeToExtract );
Future<StatusReply> clusterGetStatus( Reference<AsyncVar<struct ServerDBInfo>> const& db, Database const& cx, vector<std::pair<WorkerInterface, ProcessClass>> const& workers,
ProcessIssuesMap const& workerIssues, ProcessIssuesMap const& clientIssues, ClientVersionMap const& clientVersionMap, std::map<NetworkAddress, std::string> const& traceLogGroupMap,
ServerCoordinators const& coordinators, std::vector<NetworkAddress> const& incompatibleConnections, Version const& datacenterVersionDifference );

View File

@ -127,7 +127,7 @@ struct VerUpdateRef {
VerUpdateRef( Arena& to, const VerUpdateRef& from ) : version(from.version), mutations( to, from.mutations ), isPrivateData( from.isPrivateData ) {}
int expectedSize() const { return mutations.expectedSize(); }
template <class Ar>
template <class Ar>
void serialize( Ar& ar ) {
ar & version & mutations & isPrivateData;
}
@ -139,10 +139,12 @@ struct TLogPeekReply {
Version end;
Optional<Version> popped;
Version maxKnownVersion;
Version minKnownCommittedVersion;
Optional<Version> begin;
template <class Ar>
void serialize(Ar& ar) {
ar & arena & messages & end & popped & maxKnownVersion;
ar & arena & messages & end & popped & maxKnownVersion & minKnownCommittedVersion & begin;
}
};
@ -166,16 +168,16 @@ struct TLogPeekRequest {
struct TLogPopRequest {
Arena arena;
Version to;
Version knownCommittedVersion;
Version durableKnownCommittedVersion;
Tag tag;
ReplyPromise<Void> reply;
TLogPopRequest( Version to, Version knownCommittedVersion, Tag tag ) : to(to), knownCommittedVersion(knownCommittedVersion), tag(tag) {}
TLogPopRequest( Version to, Version durableKnownCommittedVersion, Tag tag ) : to(to), durableKnownCommittedVersion(durableKnownCommittedVersion), tag(tag) {}
TLogPopRequest() {}
template <class Ar>
void serialize(Ar& ar) {
ar & arena & to & knownCommittedVersion & tag & reply;
ar & arena & to & durableKnownCommittedVersion & tag & reply;
}
};
@ -198,19 +200,19 @@ struct TagMessagesRef {
struct TLogCommitRequest {
Arena arena;
Version prevVersion, version, knownCommittedVersion;
Version prevVersion, version, knownCommittedVersion, minKnownCommittedVersion;
StringRef messages;// Each message prefixed by a 4-byte length
ReplyPromise<Void> reply;
ReplyPromise<Version> reply;
Optional<UID> debugID;
TLogCommitRequest() {}
TLogCommitRequest( const Arena& a, Version prevVersion, Version version, Version knownCommittedVersion, StringRef messages, Optional<UID> debugID )
: arena(a), prevVersion(prevVersion), version(version), knownCommittedVersion(knownCommittedVersion), messages(messages), debugID(debugID) {}
template <class Ar>
TLogCommitRequest( const Arena& a, Version prevVersion, Version version, Version knownCommittedVersion, Version minKnownCommittedVersion, StringRef messages, Optional<UID> debugID )
: arena(a), prevVersion(prevVersion), version(version), knownCommittedVersion(knownCommittedVersion), minKnownCommittedVersion(minKnownCommittedVersion), messages(messages), debugID(debugID) {}
template <class Ar>
void serialize( Ar& ar ) {
ar & prevVersion & version & knownCommittedVersion & messages & reply & arena & debugID;
ar & prevVersion & version & knownCommittedVersion & minKnownCommittedVersion & messages & reply & arena & debugID;
}
};

View File

@ -360,7 +360,7 @@ struct LogData : NonCopyable, public ReferenceCounted<LogData> {
VersionMetricHandle persistentDataVersion, persistentDataDurableVersion; // The last version number in the portion of the log (written|durable) to persistentData
NotifiedVersion version, queueCommittedVersion;
Version queueCommittingVersion;
Version knownCommittedVersion, durableKnownCommittedVersion;
Version knownCommittedVersion, durableKnownCommittedVersion, minKnownCommittedVersion;
Deque<std::pair<Version, Standalone<VectorRef<uint8_t>>>> messageBlocks;
std::vector<std::vector<Reference<TagData>>> tag_data; //tag.locality | tag.id
@ -409,7 +409,7 @@ struct LogData : NonCopyable, public ReferenceCounted<LogData> {
explicit LogData(TLogData* tLogData, TLogInterface interf, Tag remoteTag, bool isPrimary, int logRouterTags, UID recruitmentID) : tLogData(tLogData), knownCommittedVersion(1), logId(interf.id()),
cc("TLog", interf.id().toString()), bytesInput("BytesInput", cc), bytesDurable("BytesDurable", cc), remoteTag(remoteTag), isPrimary(isPrimary), logRouterTags(logRouterTags), recruitmentID(recruitmentID),
logSystem(new AsyncVar<Reference<ILogSystem>>()), logRouterPoppedVersion(0), durableKnownCommittedVersion(0),
logSystem(new AsyncVar<Reference<ILogSystem>>()), logRouterPoppedVersion(0), durableKnownCommittedVersion(0), minKnownCommittedVersion(0),
// These are initialized differently on init() or recovery
recoveryCount(), stopped(false), initialized(false), queueCommittingVersion(0), newPersistentDataVersion(invalidVersion), unrecoveredBefore(1), recoveredAt(1), unpoppedRecoveredTags(0),
logRouterPopToVersion(0), locality(tagLocalityInvalid)
@ -990,6 +990,7 @@ ACTOR Future<Void> tLogPeekMessages( TLogData* self, TLogPeekRequest req, Refere
if(poppedVer > req.begin) {
TLogPeekReply rep;
rep.maxKnownVersion = logData->version.get();
rep.minKnownCommittedVersion = logData->minKnownCommittedVersion;
rep.popped = poppedVer;
rep.end = poppedVer;
@ -1006,6 +1007,7 @@ ACTOR Future<Void> tLogPeekMessages( TLogData* self, TLogPeekRequest req, Refere
} else {
sequenceData.send(rep.end);
}
rep.begin = req.begin;
}
req.reply.send( rep );
@ -1048,6 +1050,7 @@ ACTOR Future<Void> tLogPeekMessages( TLogData* self, TLogPeekRequest req, Refere
TLogPeekReply reply;
reply.maxKnownVersion = logData->version.get();
reply.minKnownCommittedVersion = logData->minKnownCommittedVersion;
reply.messages = messages.toStringRef();
reply.end = endVersion;
@ -1066,6 +1069,7 @@ ACTOR Future<Void> tLogPeekMessages( TLogData* self, TLogPeekRequest req, Refere
} else {
sequenceData.send(reply.end);
}
reply.begin = req.begin;
}
req.reply.send( reply );
@ -1166,7 +1170,7 @@ ACTOR Future<Void> tLogCommit(
g_traceBatch.addEvent("CommitDebug", tlogDebugID.get().first(), "TLog.tLogCommit.BeforeWaitForVersion");
}
logData->knownCommittedVersion = std::max(logData->knownCommittedVersion, req.knownCommittedVersion);
logData->minKnownCommittedVersion = std::max(logData->minKnownCommittedVersion, req.minKnownCommittedVersion);
Void _ = wait( logData->version.whenAtLeast( req.prevVersion ) );
@ -1199,10 +1203,12 @@ ACTOR Future<Void> tLogCommit(
TraceEvent("TLogCommit", logData->logId).detail("Version", req.version);
commitMessages(logData, req.version, req.arena, req.messages, self->bytesInput);
logData->knownCommittedVersion = std::max(logData->knownCommittedVersion, req.knownCommittedVersion);
// Log the changes to the persistent queue, to be committed by commitQueue()
TLogQueueEntryRef qe;
qe.version = req.version;
qe.knownCommittedVersion = req.knownCommittedVersion;
qe.knownCommittedVersion = logData->knownCommittedVersion;
qe.messages = req.messages;
qe.id = logData->logId;
self->persistentQueue->push( qe, logData );
@ -1232,7 +1238,7 @@ ACTOR Future<Void> tLogCommit(
if(req.debugID.present())
g_traceBatch.addEvent("CommitDebug", tlogDebugID.get().first(), "TLog.tLogCommit.After");
req.reply.send( Void() );
req.reply.send( logData->durableKnownCommittedVersion );
return Void();
}
@ -1452,7 +1458,7 @@ void removeLog( TLogData* self, Reference<LogData> logData ) {
}
}
ACTOR Future<Void> pullAsyncData( TLogData* self, Reference<LogData> logData, std::vector<Tag> tags, Version beginVersion, Optional<Version> endVersion, bool poppedIsKnownCommitted ) {
ACTOR Future<Void> pullAsyncData( TLogData* self, Reference<LogData> logData, std::vector<Tag> tags, Version beginVersion, Optional<Version> endVersion, bool poppedIsKnownCommitted, bool parallelGetMore ) {
state Future<Void> dbInfoChange = Void();
state Reference<ILogSystem::IPeekCursor> r;
state Version tagAt = beginVersion;
@ -1462,14 +1468,11 @@ ACTOR Future<Void> pullAsyncData( TLogData* self, Reference<LogData> logData, st
loop {
choose {
when(Void _ = wait( r ? r->getMore(TaskTLogCommit) : Never() ) ) {
if(poppedIsKnownCommitted) {
logData->knownCommittedVersion = std::max(logData->knownCommittedVersion, r->popped());
}
break;
}
when( Void _ = wait( dbInfoChange ) ) {
if( logData->logSystem->get() ) {
r = logData->logSystem->get()->peek( logData->logId, tagAt, tags );
r = logData->logSystem->get()->peek( logData->logId, tagAt, tags, parallelGetMore );
} else {
r = Reference<ILogSystem::IPeekCursor>();
}
@ -1504,6 +1507,11 @@ ACTOR Future<Void> pullAsyncData( TLogData* self, Reference<LogData> logData, st
if(endVersion.present() && ver > endVersion.get()) {
return Void();
}
if(poppedIsKnownCommitted) {
logData->knownCommittedVersion = std::max(logData->knownCommittedVersion, r->popped());
}
commitMessages(logData, ver, messages, self->bytesInput);
// Log the changes to the persistent queue, to be committed by commitQueue()
@ -1534,6 +1542,11 @@ ACTOR Future<Void> pullAsyncData( TLogData* self, Reference<LogData> logData, st
if(endVersion.present() && ver > endVersion.get()) {
return Void();
}
if(poppedIsKnownCommitted) {
logData->knownCommittedVersion = std::max(logData->knownCommittedVersion, r->popped());
}
// Log the changes to the persistent queue, to be committed by commitQueue()
TLogQueueEntryRef qe;
qe.version = ver;
@ -1591,7 +1604,7 @@ ACTOR Future<Void> tLogCore( TLogData* self, Reference<LogData> logData, TLogInt
if(!logData->isPrimary) {
std::vector<Tag> tags;
tags.push_back(logData->remoteTag);
logData->addActor.send( pullAsyncData(self, logData, tags, logData->unrecoveredBefore, Optional<Version>(), true) );
logData->addActor.send( pullAsyncData(self, logData, tags, logData->unrecoveredBefore, Optional<Version>(), true, false) );
}
try {
@ -1949,10 +1962,10 @@ ACTOR Future<Void> tLogStart( TLogData* self, InitializeTLogRequest req, Localit
logData->logRouterPopToVersion = req.recoverAt;
std::vector<Tag> tags;
tags.push_back(logData->remoteTag);
Void _ = wait(pullAsyncData(self, logData, tags, logData->unrecoveredBefore, req.recoverAt, true) || logData->removed);
Void _ = wait(pullAsyncData(self, logData, tags, logData->unrecoveredBefore, req.recoverAt, true, false) || logData->removed);
} else if(!req.recoverTags.empty()) {
ASSERT(logData->unrecoveredBefore > req.knownCommittedVersion);
Void _ = wait(pullAsyncData(self, logData, req.recoverTags, req.knownCommittedVersion + 1, req.recoverAt, false) || logData->removed);
Void _ = wait(pullAsyncData(self, logData, req.recoverTags, req.knownCommittedVersion + 1, req.recoverAt, false, true) || logData->removed);
}
}

View File

@ -30,17 +30,15 @@
#include "fdbrpc/ReplicationUtils.h"
#include "RecoveryState.h"
ACTOR static Future<Void> reportTLogCommitErrors( Future<Void> commitReply, UID debugID ) {
try {
Void _ = wait(commitReply);
return Void();
} catch (Error& e) {
if (e.code() == error_code_broken_promise)
throw master_tlog_failed();
else if (e.code() != error_code_actor_cancelled && e.code() != error_code_tlog_stopped)
TraceEvent(SevError, "MasterTLogCommitRequestError", debugID).error(e);
throw;
ACTOR Future<Version> minVersionWhenReady( Future<Void> f, std::vector<Future<Version>> replies) {
Void _ = wait(f);
Version minVersion = std::numeric_limits<Version>::max();
for(auto& reply : replies) {
if(reply.isReady() && !reply.isError()) {
minVersion = std::min(minVersion, reply.get());
}
}
return minVersion;
}
struct OldLogData {
@ -79,8 +77,8 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
bool remoteLogsWrittenToCoreState;
bool hasRemoteServers;
Optional<Version> epochEndVersion;
Optional<Version> previousEpochEndVersion;
Optional<Version> recoverAt;
Optional<Version> recoveredAt;
Version knownCommittedVersion;
LocalityData locality;
std::map< std::pair<UID, Tag>, std::pair<Version, Version> > outstandingPops; // For each currently running popFromLog actor, (log server #, tag)->popped version
@ -122,7 +120,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
return epochEnd( outLogSystem, dbgid, oldState, rejoins, locality );
}
static Reference<ILogSystem> fromLogSystemConfig( UID const& dbgid, LocalityData const& locality, LogSystemConfig const& lsConf, bool excludeRemote, bool usePreviousEpochEnd, Optional<PromiseStream<Future<Void>>> addActor ) {
static Reference<ILogSystem> fromLogSystemConfig( UID const& dbgid, LocalityData const& locality, LogSystemConfig const& lsConf, bool excludeRemote, bool useRecoveredAt, Optional<PromiseStream<Future<Void>>> addActor ) {
ASSERT( lsConf.logSystemType == 2 || (lsConf.logSystemType == 0 && !lsConf.tLogs.size()) );
//ASSERT(lsConf.epoch == epoch); //< FIXME
Reference<TagPartitionedLogSystem> logSystem( new TagPartitionedLogSystem(dbgid, locality, addActor) );
@ -132,8 +130,8 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
logSystem->logRouterTags = lsConf.logRouterTags;
logSystem->recruitmentID = lsConf.recruitmentID;
logSystem->stopped = lsConf.stopped;
if(usePreviousEpochEnd) {
logSystem->previousEpochEndVersion = lsConf.previousEpochEndVersion;
if(useRecoveredAt) {
logSystem->recoveredAt = lsConf.recoveredAt;
}
for( int i = 0; i < lsConf.tLogs.size(); i++ ) {
TLogSet const& tLogSet = lsConf.tLogs[i];
@ -391,27 +389,26 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
}
}
virtual Future<Void> push( Version prevVersion, Version version, Version knownCommittedVersion, LogPushData& data, Optional<UID> debugID ) {
virtual Future<Version> push( Version prevVersion, Version version, Version knownCommittedVersion, Version minKnownCommittedVersion, LogPushData& data, Optional<UID> debugID ) {
// FIXME: Randomize request order as in LegacyLogSystem?
vector<Future<Void>> quorumResults;
vector<Future<Version>> allReplies;
int location = 0;
for(auto& it : tLogs) {
if(it->isLocal && it->logServers.size()) {
vector<Future<Void>> tLogCommitResults;
for(int loc=0; loc< it->logServers.size(); loc++) {
Future<Void> commitMessage = reportTLogCommitErrors(
it->logServers[loc]->get().interf().commit.getReply(
TLogCommitRequest( data.getArena(), prevVersion, version, knownCommittedVersion, data.getMessages(location), debugID ), TaskTLogCommitReply ),
getDebugID());
addActor.get().send(commitMessage);
tLogCommitResults.push_back(commitMessage);
allReplies.push_back( it->logServers[loc]->get().interf().commit.getReply( TLogCommitRequest( data.getArena(), prevVersion, version, knownCommittedVersion, minKnownCommittedVersion, data.getMessages(location), debugID ), TaskTLogCommitReply ) );
Future<Void> commitSuccess = success(allReplies.back());
addActor.get().send(commitSuccess);
tLogCommitResults.push_back(commitSuccess);
location++;
}
quorumResults.push_back( quorum( tLogCommitResults, tLogCommitResults.size() - it->tLogWriteAntiQuorum ) );
}
}
return waitForAll(quorumResults);
return minVersionWhenReady( waitForAll(quorumResults), allReplies);
}
Reference<IPeekCursor> peekAll( UID dbgid, Version begin, Version end, Tag tag, bool parallelGetMore, bool throwIfDead ) {
@ -498,7 +495,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
Reference<IPeekCursor> peekRemote( UID dbgid, Version begin, Tag tag, bool parallelGetMore ) {
int bestSet = -1;
Version lastBegin = previousEpochEndVersion.present() ? previousEpochEndVersion.get() + 1 : 0;
Version lastBegin = recoveredAt.present() ? recoveredAt.get() + 1 : 0;
for(int t = 0; t < tLogs.size(); t++) {
if(tLogs[t]->isLocal) {
lastBegin = std::max(lastBegin, tLogs[t]->startVersion);
@ -757,23 +754,23 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
}
}
TraceEvent("TLogPeekLogRouterOldSets", dbgid).detail("Tag", tag.toString()).detail("Begin", begin).detail("OldEpoch", old.epochEnd).detail("PreviousEpochEndVersion", previousEpochEndVersion.present() ? previousEpochEndVersion.get() : -1).detail("FirstOld", firstOld);
TraceEvent("TLogPeekLogRouterOldSets", dbgid).detail("Tag", tag.toString()).detail("Begin", begin).detail("OldEpoch", old.epochEnd).detail("RecoveredAt", recoveredAt.present() ? recoveredAt.get() : -1).detail("FirstOld", firstOld);
//FIXME: do this merge on one of the logs in the other data center to avoid sending multiple copies across the WAN
return Reference<ILogSystem::SetPeekCursor>( new ILogSystem::SetPeekCursor( localSets, bestSet, localSets[bestSet]->bestLocationFor( tag ), tag, begin, firstOld && previousEpochEndVersion.present() ? previousEpochEndVersion.get() + 1 : old.epochEnd, true ) );
return Reference<ILogSystem::SetPeekCursor>( new ILogSystem::SetPeekCursor( localSets, bestSet, localSets[bestSet]->bestLocationFor( tag ), tag, begin, firstOld && recoveredAt.present() ? recoveredAt.get() + 1 : old.epochEnd, true ) );
}
firstOld = false;
}
return Reference<ILogSystem::ServerPeekCursor>( new ILogSystem::ServerPeekCursor( Reference<AsyncVar<OptionalInterface<TLogInterface>>>(), tag, begin, getPeekEnd(), false, false ) );
}
void popLogRouter( Version upTo, Tag tag, Version knownCommittedVersion, int8_t popLocality ) { //FIXME: do not need to pop all generations of old logs
void popLogRouter( Version upTo, Tag tag, Version durableKnownCommittedVersion, int8_t popLocality ) { //FIXME: do not need to pop all generations of old logs
if (!upTo) return;
for(auto& t : tLogs) {
if(t->locality == popLocality) {
for(auto& log : t->logRouters) {
Version prev = outstandingPops[std::make_pair(log->get().id(),tag)].first;
if (prev < upTo)
outstandingPops[std::make_pair(log->get().id(),tag)] = std::make_pair(upTo, knownCommittedVersion);
outstandingPops[std::make_pair(log->get().id(),tag)] = std::make_pair(upTo, durableKnownCommittedVersion);
if (prev == 0) {
popActors.add( popFromLog( this, log, tag, 0.0 ) ); //Fast pop time because log routers can only hold 5 seconds of data.
}
@ -787,7 +784,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
for(auto& log : t->logRouters) {
Version prev = outstandingPops[std::make_pair(log->get().id(),tag)].first;
if (prev < upTo)
outstandingPops[std::make_pair(log->get().id(),tag)] = std::make_pair(upTo, knownCommittedVersion);
outstandingPops[std::make_pair(log->get().id(),tag)] = std::make_pair(upTo, durableKnownCommittedVersion);
if (prev == 0)
popActors.add( popFromLog( this, log, tag, 0.0 ) );
}
@ -796,10 +793,10 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
}
}
virtual void pop( Version upTo, Tag tag, Version knownCommittedVersion, int8_t popLocality ) {
virtual void pop( Version upTo, Tag tag, Version durableKnownCommittedVersion, int8_t popLocality ) {
if (upTo <= 0) return;
if( tag.locality == tagLocalityRemoteLog) {
popLogRouter(upTo, tag, knownCommittedVersion, popLocality);
popLogRouter(upTo, tag, durableKnownCommittedVersion, popLocality);
return;
}
ASSERT(popLocality == tagLocalityInvalid);
@ -808,7 +805,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
for(auto& log : t->logServers) {
Version prev = outstandingPops[std::make_pair(log->get().id(),tag)].first;
if (prev < upTo)
outstandingPops[std::make_pair(log->get().id(),tag)] = std::make_pair(upTo, knownCommittedVersion);
outstandingPops[std::make_pair(log->get().id(),tag)] = std::make_pair(upTo, durableKnownCommittedVersion);
if (prev == 0)
popActors.add( popFromLog( this, log, tag, 1.0 ) ); //< FIXME: knob
}
@ -931,7 +928,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
logSystemConfig.logRouterTags = logRouterTags;
logSystemConfig.recruitmentID = recruitmentID;
logSystemConfig.stopped = stopped;
logSystemConfig.previousEpochEndVersion = previousEpochEndVersion;
logSystemConfig.recoveredAt = recoveredAt;
for( int i = 0; i < tLogs.size(); i++ ) {
Reference<LogSet> logSet = tLogs[i];
if(logSet->isLocal || remoteLogsWrittenToCoreState) {
@ -1034,12 +1031,12 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
}
virtual Version getEnd() {
ASSERT( epochEndVersion.present() );
return epochEndVersion.get() + 1;
ASSERT( recoverAt.present() );
return recoverAt.get() + 1;
}
Version getPeekEnd() {
if (epochEndVersion.present())
if (recoverAt.present())
return getEnd();
else
return std::numeric_limits<Version>::max();
@ -1169,7 +1166,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
// This is a brand new database
Reference<TagPartitionedLogSystem> logSystem( new TagPartitionedLogSystem(dbgid, locality) );
logSystem->logSystemType = prevState.logSystemType;
logSystem->epochEndVersion = 0;
logSystem->recoverAt = 0;
logSystem->knownCommittedVersion = 0;
logSystem->stopped = true;
outLogSystem->set(logSystem);
@ -1308,7 +1305,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
logSystem->logSystemType = prevState.logSystemType;
logSystem->rejoins = rejoins;
logSystem->lockResults = lockResults;
logSystem->epochEndVersion = minEnd;
logSystem->recoverAt = minEnd;
logSystem->knownCommittedVersion = knownCommittedVersion;
logSystem->remoteLogsWrittenToCoreState = true;
logSystem->stopped = true;
@ -1552,7 +1549,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
req.recruitmentID = self->recruitmentID;
req.storeType = configuration.tLogDataStoreType;
req.recoverFrom = oldLogSystem->getLogSystemConfig();
req.recoverAt = oldLogSystem->epochEndVersion.get();
req.recoverAt = oldLogSystem->recoverAt.get();
req.knownCommittedVersion = oldLogSystem->knownCommittedVersion;
req.epoch = recoveryCount;
req.remoteTag = Tag(tagLocalityRemoteLog, i);
@ -1598,12 +1595,12 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
state Reference<TagPartitionedLogSystem> logSystem( new TagPartitionedLogSystem(oldLogSystem->getDebugID(), oldLogSystem->locality) );
logSystem->logSystemType = 2;
logSystem->expectedLogSets = 1;
logSystem->previousEpochEndVersion = oldLogSystem->epochEndVersion;
logSystem->recoveredAt = oldLogSystem->recoverAt;
logSystem->recruitmentID = g_random->randomUniqueID();
oldLogSystem->recruitmentID = logSystem->recruitmentID;
if(configuration.usableRegions > 1) {
logSystem->logRouterTags = recr.tLogs.size();
logSystem->logRouterTags = recr.tLogs.size() * std::max<int>(1, configuration.desiredLogRouterCount / std::max<int>(1,recr.tLogs.size()));
logSystem->expectedLogSets++;
} else {
logSystem->logRouterTags = 0;
@ -1620,9 +1617,15 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
if(region.satelliteTLogReplicationFactor > 0) {
logSystem->tLogs.push_back( Reference<LogSet>( new LogSet() ) );
logSystem->tLogs[1]->tLogWriteAntiQuorum = region.satelliteTLogWriteAntiQuorum;
logSystem->tLogs[1]->tLogReplicationFactor = region.satelliteTLogReplicationFactor;
logSystem->tLogs[1]->tLogPolicy = region.satelliteTLogPolicy;
if(recr.satelliteFallback) {
logSystem->tLogs[1]->tLogWriteAntiQuorum = region.satelliteTLogWriteAntiQuorumFallback;
logSystem->tLogs[1]->tLogReplicationFactor = region.satelliteTLogReplicationFactorFallback;
logSystem->tLogs[1]->tLogPolicy = region.satelliteTLogPolicyFallback;
} else {
logSystem->tLogs[1]->tLogWriteAntiQuorum = region.satelliteTLogWriteAntiQuorum;
logSystem->tLogs[1]->tLogReplicationFactor = region.satelliteTLogReplicationFactor;
logSystem->tLogs[1]->tLogPolicy = region.satelliteTLogPolicy;
}
logSystem->tLogs[1]->isLocal = true;
logSystem->tLogs[1]->locality = tagLocalitySatellite;
logSystem->tLogs[1]->startVersion = oldLogSystem->knownCommittedVersion + 1;
@ -1710,7 +1713,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
req.recruitmentID = logSystem->recruitmentID;
req.storeType = configuration.tLogDataStoreType;
req.recoverFrom = oldLogSystem->getLogSystemConfig();
req.recoverAt = oldLogSystem->epochEndVersion.get();
req.recoverAt = oldLogSystem->recoverAt.get();
req.knownCommittedVersion = oldLogSystem->knownCommittedVersion;
req.epoch = recoveryCount;
req.locality = primaryLocality;
@ -1753,7 +1756,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
req.recruitmentID = logSystem->recruitmentID;
req.storeType = configuration.tLogDataStoreType;
req.recoverFrom = oldLogSystem->getLogSystemConfig();
req.recoverAt = oldLogSystem->epochEndVersion.get();
req.recoverAt = oldLogSystem->recoverAt.get();
req.knownCommittedVersion = oldLogSystem->knownCommittedVersion;
req.epoch = recoveryCount;
req.locality = tagLocalitySatellite;
@ -2045,11 +2048,11 @@ Future<Void> ILogSystem::recoverAndEndEpoch(Reference<AsyncVar<Reference<ILogSys
return TagPartitionedLogSystem::recoverAndEndEpoch( outLogSystem, dbgid, oldState, rejoins, locality );
}
Reference<ILogSystem> ILogSystem::fromLogSystemConfig( UID const& dbgid, struct LocalityData const& locality, struct LogSystemConfig const& conf, bool excludeRemote, bool usePreviousEpochEnd, Optional<PromiseStream<Future<Void>>> addActor ) {
Reference<ILogSystem> ILogSystem::fromLogSystemConfig( UID const& dbgid, struct LocalityData const& locality, struct LogSystemConfig const& conf, bool excludeRemote, bool useRecoveredAt, Optional<PromiseStream<Future<Void>>> addActor ) {
if (conf.logSystemType == 0)
return Reference<ILogSystem>();
else if (conf.logSystemType == 2)
return TagPartitionedLogSystem::fromLogSystemConfig( dbgid, locality, conf, excludeRemote, usePreviousEpochEnd, addActor );
return TagPartitionedLogSystem::fromLogSystemConfig( dbgid, locality, conf, excludeRemote, useRecoveredAt, addActor );
else
throw internal_error();
}
@ -2063,6 +2066,6 @@ Reference<ILogSystem> ILogSystem::fromOldLogSystemConfig( UID const& dbgid, stru
throw internal_error();
}
Reference<ILogSystem> ILogSystem::fromServerDBInfo( UID const& dbgid, ServerDBInfo const& dbInfo, bool usePreviousEpochEnd, Optional<PromiseStream<Future<Void>>> addActor ) {
return fromLogSystemConfig( dbgid, dbInfo.myLocality, dbInfo.logSystemConfig, false, usePreviousEpochEnd, addActor );
Reference<ILogSystem> ILogSystem::fromServerDBInfo( UID const& dbgid, ServerDBInfo const& dbInfo, bool useRecoveredAt, Optional<PromiseStream<Future<Void>>> addActor ) {
return fromLogSystemConfig( dbgid, dbInfo.myLocality, dbInfo.logSystemConfig, false, useRecoveredAt, addActor );
}

View File

@ -227,7 +227,7 @@ struct SetMetricsLogRateRequest {
struct EventLogRequest {
bool getLastError;
Standalone<StringRef> eventName;
ReplyPromise< Standalone<StringRef> > reply;
ReplyPromise< TraceEventFields > reply;
EventLogRequest() : getLastError(true) {}
explicit EventLogRequest( Standalone<StringRef> eventName ) : eventName( eventName ), getLastError( false ) {}

View File

@ -311,7 +311,7 @@ ACTOR Future<Void> newTLogServers( Reference<MasterData> self, RecruitFromConfig
self->dcId_locality[remoteDcId] = loc;
}
Future<RecruitRemoteFromConfigurationReply> fRemoteWorkers = brokenPromiseToNever( self->clusterController.recruitRemoteFromConfiguration.getReply( RecruitRemoteFromConfigurationRequest( self->configuration, remoteDcId, recr.tLogs.size() ) ) );
Future<RecruitRemoteFromConfigurationReply> fRemoteWorkers = brokenPromiseToNever( self->clusterController.recruitRemoteFromConfiguration.getReply( RecruitRemoteFromConfigurationRequest( self->configuration, remoteDcId, recr.tLogs.size() * std::max<int>(1, self->configuration.desiredLogRouterCount / std::max<int>(1, recr.tLogs.size())) ) ) );
Reference<ILogSystem> newLogSystem = wait( oldLogSystem->newEpoch( recr, fRemoteWorkers, self->configuration, self->cstate.myDBState.recoveryCount + 1, self->dcId_locality[recr.dcId], self->dcId_locality[remoteDcId], self->allTags, self->recruitmentStalled ) );
self->logSystem = newLogSystem;

View File

@ -349,6 +349,8 @@ public:
NotifiedVersion oldestVersion; // See also storageVersion()
NotifiedVersion durableVersion; // At least this version will be readable from storage after a power failure
int64_t versionLag; // An estimate for how many versions it takes for the data to move from the logs to this storage server
uint64_t logProtocol;
Reference<ILogSystem> logSystem;
@ -366,6 +368,7 @@ public:
AsyncMap<Key,bool> watches;
int64_t watchBytes;
int64_t numWatches;
AsyncVar<bool> noRecentUpdates;
double lastUpdate;
@ -400,9 +403,10 @@ public:
struct Counters {
CounterCollection cc;
Counter allQueries, getKeyQueries, getValueQueries, getRangeQueries, finishedQueries, rowsQueried, bytesQueried;
Counter allQueries, getKeyQueries, getValueQueries, getRangeQueries, finishedQueries, rowsQueried, bytesQueried, watchQueries;
Counter bytesInput, bytesDurable, bytesFetched,
mutationBytes; // Like bytesInput but without MVCC accounting
Counter mutations, setMutations, clearRangeMutations, atomicMutations;
Counter updateBatches, updateVersions;
Counter loops;
@ -415,10 +419,15 @@ public:
finishedQueries("FinishedQueries", cc),
rowsQueried("RowsQueried", cc),
bytesQueried("BytesQueried", cc),
watchQueries("WatchQueries", cc),
bytesInput("BytesInput", cc),
bytesDurable("BytesDurable", cc),
bytesFetched("BytesFetched", cc),
mutationBytes("MutationBytes", cc),
mutations("Mutations", cc),
setMutations("SetMutations", cc),
clearRangeMutations("ClearRangeMutations", cc),
atomicMutations("AtomicMutations", cc),
updateBatches("UpdateBatches", cc),
updateVersions("UpdateVersions", cc),
loops("Loops", cc)
@ -428,6 +437,7 @@ public:
specialCounter(cc, "StorageVersion", [self](){ return self->storageVersion(); });
specialCounter(cc, "DurableVersion", [self](){ return self->durableVersion.get(); });
specialCounter(cc, "DesiredOldestVersion", [self](){ return self->desiredOldestVersion.get(); });
specialCounter(cc, "VersionLag", [self](){ return self->versionLag; });
specialCounter(cc, "FetchKeysFetchActive", [self](){ return self->fetchKeysParallelismLock.activePermits(); });
specialCounter(cc, "FetchKeysWaiting", [self](){ return self->fetchKeysParallelismLock.waiters(); });
@ -435,6 +445,8 @@ public:
specialCounter(cc, "QueryQueueMax", [self](){ return self->getAndResetMaxQueryQueueSize(); });
specialCounter(cc, "BytesStored", [self](){ return self->metrics.byteSample.getEstimate(allKeys); });
specialCounter(cc, "ActiveWatches", [self](){ return self->numWatches; });
specialCounter(cc, "WatchBytes", [self](){ return self->watchBytes; });
specialCounter(cc, "KvstoreBytesUsed", [self](){ return self->storage.getStorageBytes().used; });
specialCounter(cc, "KvstoreBytesFree", [self](){ return self->storage.getStorageBytes().free; });
@ -447,10 +459,11 @@ public:
: instanceID(g_random->randomUniqueID().first()),
storage(this, storage), db(db),
lastTLogVersion(0), lastVersionWithData(0), restoredVersion(0),
versionLag(0),
updateEagerReads(0),
shardChangeCounter(0),
fetchKeysParallelismLock(SERVER_KNOBS->FETCH_KEYS_PARALLELISM_BYTES),
shuttingDown(false), debug_inApplyUpdate(false), debug_lastValidateTime(0), watchBytes(0),
shuttingDown(false), debug_inApplyUpdate(false), debug_lastValidateTime(0), watchBytes(0), numWatches(0),
logProtocol(0), counters(this), tag(invalidTag), maxQueryQueue(0), thisServerID(ssi.id()),
readQueueSizeMetric(LiteralStringRef("StorageServer.ReadQueueSize")),
behind(false), byteSampleClears(false, LiteralStringRef("\xff\xff\xff")), noRecentUpdates(false),
@ -760,6 +773,8 @@ ACTOR Future<Void> getValueQ( StorageServer* data, GetValueRequest req ) {
ACTOR Future<Void> watchValue_impl( StorageServer* data, WatchValueRequest req ) {
try {
++data->counters.watchQueries;
if( req.debugID.present() )
g_traceBatch.addEvent("WatchValueDebug", req.debugID.get().first(), "watchValueQ.Before"); //.detail("TaskID", g_network->getCurrentTask());
@ -792,11 +807,14 @@ ACTOR Future<Void> watchValue_impl( StorageServer* data, WatchValueRequest req )
return Void();
}
++data->numWatches;
data->watchBytes += ( req.key.expectedSize() + req.value.expectedSize() + 1000 );
try {
Void _ = wait( watchFuture );
--data->numWatches;
data->watchBytes -= ( req.key.expectedSize() + req.value.expectedSize() + 1000 );
} catch( Error &e ) {
--data->numWatches;
data->watchBytes -= ( req.key.expectedSize() + req.value.expectedSize() + 1000 );
throw;
}
@ -1809,6 +1827,9 @@ ACTOR Future<Void> fetchKeys( StorageServer *data, AddingShard* shard ) {
state int debug_nextRetryToLog = 1;
state bool isTooOld = false;
//FIXME: The client cache does not notice when servers are added to a team. To read from a local storage server we must refresh the cache manually.
data->cx->invalidateCache(keys);
loop {
try {
TEST(true); // Fetching keys for transferred shard
@ -1888,6 +1909,11 @@ ACTOR Future<Void> fetchKeys( StorageServer *data, AddingShard* shard ) {
// Throw away deferred updates from before fetchVersion, since we don't need them to use blocks fetched at that version
while (!shard->updates.empty() && shard->updates[0].version <= fetchVersion) shard->updates.pop_front();
//FIXME: remove when we no longer support upgrades from 5.X
if(debug_getRangeRetries >= 100) {
data->cx->enableLocalityLoadBalance = false;
}
debug_getRangeRetries++;
if (debug_nextRetryToLog==debug_getRangeRetries){
debug_nextRetryToLog += std::min(debug_nextRetryToLog, 1024);
@ -1902,6 +1928,9 @@ ACTOR Future<Void> fetchKeys( StorageServer *data, AddingShard* shard ) {
}
}
//FIXME: remove when we no longer support upgrades from 5.X
data->cx->enableLocalityLoadBalance = true;
// We have completed the fetch and write of the data, now we wait for MVCC window to pass.
// As we have finished this work, we will allow more work to start...
shard->fetchComplete.send(Void());
@ -2354,6 +2383,7 @@ ACTOR Future<Void> update( StorageServer* data, bool* pReceivedUpdate )
++data->counters.updateBatches;
data->lastTLogVersion = cursor->getMaxKnownVersion();
data->versionLag = std::max<int64_t>(0, data->lastTLogVersion - data->version.get());
ASSERT(*pReceivedUpdate == false);
*pReceivedUpdate = true;
@ -2477,6 +2507,28 @@ ACTOR Future<Void> update( StorageServer* data, bool* pReceivedUpdate )
updater.applyMutation(data, msg, ver);
data->counters.mutationBytes += msg.totalSize();
++data->counters.mutations;
switch(msg.type) {
case MutationRef::SetValue:
++data->counters.setMutations;
break;
case MutationRef::ClearRange:
++data->counters.clearRangeMutations;
break;
case MutationRef::AddValue:
case MutationRef::And:
case MutationRef::AndV2:
case MutationRef::AppendIfFits:
case MutationRef::ByteMax:
case MutationRef::ByteMin:
case MutationRef::Max:
case MutationRef::Min:
case MutationRef::MinV2:
case MutationRef::Or:
case MutationRef::Xor:
++data->counters.atomicMutations;
break;
}
}
else
TraceEvent(SevError, "DiscardingPeekedData", data->thisServerID).detail("Mutation", msg.toString()).detail("Version", cloneCursor2->version().toString());
@ -3140,8 +3192,8 @@ ACTOR Future<Void> storageServerCore( StorageServer* self, StorageServerInterfac
if( self->db->get().recoveryState >= RecoveryState::FULLY_RECOVERED ) {
self->logSystem = ILogSystem::fromServerDBInfo( self->thisServerID, self->db->get() );
if (self->logSystem) {
if(self->logSystem->getLogSystemConfig().oldTLogs.size()) {
self->poppedAllAfter = self->logSystem->getLogSystemConfig().oldTLogs[0].epochEnd;
if(self->db->get().logSystemConfig.recoveredAt.present()) {
self->poppedAllAfter = self->db->get().logSystemConfig.recoveredAt.get();
}
self->logCursor = self->logSystem->peekSingle( self->thisServerID, self->version.get() + 1, self->tag, self->history );
self->popVersion( self->durableVersion.get() + 1, true );

View File

@ -820,11 +820,11 @@ ACTOR Future<Void> workerServer( Reference<ClusterConnectionFile> connFile, Refe
}
}
when( EventLogRequest req = waitNext(interf.eventLogRequest.getFuture()) ) {
Standalone<StringRef> e;
TraceEventFields e;
if( req.getLastError )
e = StringRef( latestEventCache.getLatestError() );
e = latestEventCache.getLatestError();
else
e = StringRef( latestEventCache.get( req.eventName.toString() ) );
e = latestEventCache.get( req.eventName.toString() );
req.reply.send(e);
}
when( TraceBatchDumpRequest req = waitNext(interf.traceBatchDumpRequest.getFuture()) ) {

View File

@ -369,29 +369,22 @@ struct ConsistencyCheckWorkload : TestWorkload
ACTOR Future<bool> getKeyLocations(Database cx, vector<pair<KeyRange, vector<StorageServerInterface>>> shards, ConsistencyCheckWorkload *self, Promise<Standalone<VectorRef<KeyValueRef>>> keyLocationPromise)
{
state Standalone<VectorRef<KeyValueRef>> keyLocations;
state Key beginKey = allKeys.begin;
state Key beginKey = allKeys.begin.withPrefix(keyServersPrefix);
state Key endKey = allKeys.end.withPrefix(keyServersPrefix);
state int i = 0;
//If the responses are too big, we may use multiple requests to get the key locations. Each request begins where the last left off
for ( ; i < shards.size(); i++)
{
// skip serverList shards
if (!shards[i].first.begin.startsWith(keyServersPrefix)) {
break;
}
state Key endKey = shards[i].first.end.startsWith(keyServersPrefix) ? shards[i].first.end.removePrefix(keyServersPrefix) : allKeys.end;
while(beginKey < endKey)
while(beginKey < shards[i].first.end)
{
try
{
Version version = wait(self->getVersion(cx, self));
GetKeyValuesRequest req;
Key prefixBegin = beginKey.withPrefix(keyServersPrefix);
req.begin = firstGreaterOrEqual(prefixBegin);
req.end = firstGreaterOrEqual(keyServersEnd);
req.begin = firstGreaterOrEqual(beginKey);
req.end = firstGreaterOrEqual(std::min<KeyRef>(shards[i].first.end, endKey));
req.limit = SERVER_KNOBS->MOVE_KEYS_KRM_LIMIT;
req.limitBytes = SERVER_KNOBS->MOVE_KEYS_KRM_LIMIT_BYTES;
req.version = version;
@ -443,17 +436,26 @@ struct ConsistencyCheckWorkload : TestWorkload
}
auto keyValueResponse = keyValueFutures[firstValidStorageServer].get().get();
Standalone<RangeResultRef> currentLocations = krmDecodeRanges( keyServersPrefix, KeyRangeRef(beginKey, endKey), RangeResultRef( keyValueResponse.data, keyValueResponse.more) );
Standalone<RangeResultRef> currentLocations = krmDecodeRanges( keyServersPrefix, KeyRangeRef(beginKey.removePrefix(keyServersPrefix), std::min<KeyRef>(shards[i].first.end, endKey).removePrefix(keyServersPrefix)), RangeResultRef( keyValueResponse.data, keyValueResponse.more) );
//Push all but the last item, which will be pushed as the first item next iteration
keyLocations.append_deep(keyLocations.arena(), currentLocations.begin(), currentLocations.size() - 1);
if(keyValueResponse.data.size() && beginKey == keyValueResponse.data[0].key) {
keyLocations.push_back_deep(keyLocations.arena(), currentLocations[0]);
}
if(currentLocations.size() > 2) {
keyLocations.append_deep(keyLocations.arena(), &currentLocations[1], currentLocations.size() - 2);
}
//Next iteration should pick up where we left off
ASSERT(currentLocations.size() > 1);
beginKey = currentLocations.end()[-1].key;
if(!keyValueResponse.more) {
beginKey = shards[i].first.end;
} else {
beginKey = keyValueResponse.data.end()[-1].key;
}
//If this is the last iteration, then push the allKeys.end KV pair
if(beginKey == allKeys.end)
if(beginKey >= endKey)
keyLocations.push_back_deep(keyLocations.arena(), currentLocations.end()[-1]);
}
catch(Error &e)
@ -626,7 +628,7 @@ struct ConsistencyCheckWorkload : TestWorkload
}*/
//In a quiescent database, check that the team size is the same as the desired team size
if(self->firstClient && self->performQuiescentChecks && sourceStorageServers.size() != configuration.storageTeamSize)
if(self->firstClient && self->performQuiescentChecks && sourceStorageServers.size() != configuration.usableRegions*configuration.storageTeamSize)
{
TraceEvent("ConsistencyCheck_InvalidTeamSize").detail("ShardBegin", printable(range.begin)).detail("ShardEnd", printable(range.end)).detail("TeamSize", sourceStorageServers.size()).detail("DesiredTeamSize", configuration.storageTeamSize);
self->testFailure("Invalid team size");
@ -971,7 +973,7 @@ struct ConsistencyCheckWorkload : TestWorkload
//Min and max shard sizes have a 3 * shardBounds.permittedError.bytes cushion for error since shard sizes are not precise
//Shard splits ignore the first key in a shard, so its size shouldn't be considered when checking the upper bound
//0xff shards are not checked
if( canSplit && self->performQuiescentChecks && !range.begin.startsWith(keyServersPrefix) &&
if( canSplit && sampledKeys > 5 && self->performQuiescentChecks && !range.begin.startsWith(keyServersPrefix) &&
(sampledBytes < shardBounds.min.bytes - 3 * shardBounds.permittedError.bytes || sampledBytes - firstKeySampledBytes > shardBounds.max.bytes + 3 * shardBounds.permittedError.bytes))
{
TraceEvent("ConsistencyCheck_InvalidShardSize").detail("Min", shardBounds.min.bytes).detail("Max", shardBounds.max.bytes).detail("Size", shardBytes)

View File

@ -41,10 +41,10 @@ struct DDMetricsWorkload : TestWorkload {
WorkerInterface masterWorker = wait(getMasterWorker(cx, self->dbInfo));
TraceEvent("GetHighPriorityReliocationsInFlight").detail("Database", printable(cx->dbName)).detail("Stage", "ContactingMaster");
Standalone<StringRef> md = wait( timeoutError(masterWorker.eventLogRequest.getReply(
TraceEventFields md = wait( timeoutError(masterWorker.eventLogRequest.getReply(
EventLogRequest( StringRef( cx->dbName.toString() + "/MovingData" ) ) ), 1.0 ) );
int relocations;
sscanf(extractAttribute(md.toString(), "HighPriorityRelocations").c_str(), "%d", &relocations);
sscanf(md.getValue("HighPriorityRelocations").c_str(), "%d", &relocations);
return relocations;
}

View File

@ -42,17 +42,17 @@ struct WorkerErrorsWorkload : TestWorkload {
virtual void getMetrics( vector<PerfMetric>& m ) {}
ACTOR Future< std::vector< std::string > > latestEventOnWorkers( std::vector<std::pair<WorkerInterface, ProcessClass>> workers ) {
state vector<Future<Standalone<StringRef>>> eventTraces;
ACTOR Future< std::vector< TraceEventFields > > latestEventOnWorkers( std::vector<std::pair<WorkerInterface, ProcessClass>> workers ) {
state vector<Future<TraceEventFields>> eventTraces;
for(int c = 0; c < workers.size(); c++) {
eventTraces.push_back( workers[c].first.eventLogRequest.getReply( EventLogRequest() ) );
}
Void _ = wait( timeoutError( waitForAll( eventTraces ), 2.0 ) );
vector<std::string> results;
vector<TraceEventFields> results;
for(int i = 0; i < eventTraces.size(); i++) {
results.push_back( eventTraces[i].get().toString() );
results.push_back( eventTraces[i].get() );
}
return results;
@ -60,9 +60,9 @@ struct WorkerErrorsWorkload : TestWorkload {
ACTOR Future<Void> _start(Database cx, WorkerErrorsWorkload *self) {
state vector<std::pair<WorkerInterface, ProcessClass>> workers = wait( getWorkers( self->dbInfo ) );
std::vector<std::string> errors = wait( self->latestEventOnWorkers( workers ) );
std::vector<TraceEventFields> errors = wait( self->latestEventOnWorkers( workers ) );
for(auto e : errors) {
printf("%s\n", e.c_str());
printf("%s\n", e.toString().c_str());
}
return Void();
}

187
flow/FileTraceLogWriter.cpp Normal file
View File

@ -0,0 +1,187 @@
/*
* FileTraceLogWriter.cpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2018 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "FileTraceLogWriter.h"
#include "flow.h"
#include "ThreadHelper.actor.h"
#if defined(__unixish__)
#define __open ::open
#define __write ::write
#define __close ::close
#define __fsync ::fsync
#define TRACEFILE_FLAGS O_WRONLY | O_CREAT | O_EXCL
#define TRACEFILE_MODE 0664
#elif defined(_WIN32)
#include <windows.h>
#undef max
#undef min
#include <io.h>
#include <stdio.h>
#include <sys/stat.h>
#define __open _open
#define __write _write
#define __close _close
#define __fsync _commit
#define TRACEFILE_FLAGS _O_WRONLY | _O_CREAT | _O_EXCL
#define TRACEFILE_MODE _S_IWRITE
#endif
#include <fcntl.h>
FileTraceLogWriter::FileTraceLogWriter(std::string directory, std::string processName, std::string basename, std::string extension, uint64_t maxLogsSize, std::function<void()> onError)
: directory(directory), processName(processName), basename(basename), extension(extension), maxLogsSize(maxLogsSize), traceFileFD(-1), index(0), onError(onError) {}
void FileTraceLogWriter::addref() {
ReferenceCounted<FileTraceLogWriter>::addref();
}
void FileTraceLogWriter::delref() {
ReferenceCounted<FileTraceLogWriter>::delref();
}
void FileTraceLogWriter::lastError(int err) {
// Whenever we get a serious error writing a trace log, all flush barriers posted between the operation encountering
// the error and the occurrence of the error are unblocked, even though we haven't actually succeeded in flushing.
// Otherwise a permanent write error would make the program block forever.
if (err != 0 && err != EINTR) {
onError();
}
}
void FileTraceLogWriter::write(const std::string& str) {
auto ptr = str.c_str();
int remaining = str.size();
while ( remaining ) {
int ret = __write( traceFileFD, ptr, remaining );
if ( ret > 0 ) {
lastError(0);
remaining -= ret;
ptr += ret;
} else {
lastError(errno);
threadSleep(0.1);
}
}
}
void FileTraceLogWriter::open() {
cleanupTraceFiles();
auto finalname = format("%s.%d.%s", basename.c_str(), ++index, extension.c_str());
while ( (traceFileFD = __open( finalname.c_str(), TRACEFILE_FLAGS, TRACEFILE_MODE )) == -1 ) {
lastError(errno);
if (errno == EEXIST)
finalname = format("%s.%d.%s", basename.c_str(), ++index, extension.c_str());
else {
fprintf(stderr, "ERROR: could not create trace log file `%s' (%d: %s)\n", finalname.c_str(), errno, strerror(errno));
int errorNum = errno;
onMainThreadVoid([finalname, errorNum]{
TraceEvent(SevWarnAlways, "TraceFileOpenError")
.detail("Filename", finalname)
.detail("ErrorCode", errorNum)
.detail("Error", strerror(errorNum))
.trackLatest("TraceFileOpenError"); }, NULL);
threadSleep(FLOW_KNOBS->TRACE_RETRY_OPEN_INTERVAL);
}
}
onMainThreadVoid([]{ latestEventCache.clear("TraceFileOpenError"); }, NULL);
lastError(0);
}
void FileTraceLogWriter::close() {
if (traceFileFD >= 0) {
while ( __close(traceFileFD) ) threadSleep(0.1);
}
}
void FileTraceLogWriter::roll() {
close();
open();
}
void FileTraceLogWriter::sync() {
__fsync(traceFileFD);
}
void FileTraceLogWriter::extractTraceFileNameInfo(std::string const& filename, std::string &root, int &index) {
int split = filename.find_last_of('.', filename.size() - 5);
root = filename.substr(0, split);
if(sscanf(filename.substr(split + 1, filename.size() - split - 4).c_str(), "%d", &index) == EOF) {
index = -1;
}
}
bool FileTraceLogWriter::compareTraceFileName (std::string const& f1, std::string const& f2) {
std::string root1;
std::string root2;
int index1;
int index2;
extractTraceFileNameInfo(f1, root1, index1);
extractTraceFileNameInfo(f2, root2, index2);
if(root1 != root2)
return root1 < root2;
if(index1 != index2)
return index1 < index2;
return f1 < f2;
}
bool FileTraceLogWriter::reverseCompareTraceFileName(std::string f1, std::string f2) {
return compareTraceFileName(f2, f1);
}
void FileTraceLogWriter::cleanupTraceFiles() {
// Setting maxLogsSize=0 disables trace file cleanup based on dir size
if(!g_network->isSimulated() && maxLogsSize > 0) {
try {
std::vector<std::string> existingFiles = platform::listFiles(directory, extension);
std::vector<std::string> existingTraceFiles;
for(auto f = existingFiles.begin(); f != existingFiles.end(); ++f) {
if(f->substr(0, processName.length()) == processName) {
existingTraceFiles.push_back(*f);
}
}
// reverse sort, so we preserve the most recent files and delete the oldest
std::sort(existingTraceFiles.begin(), existingTraceFiles.end(), FileTraceLogWriter::reverseCompareTraceFileName);
int64_t runningTotal = 0;
std::vector<std::string>::iterator fileListIterator = existingTraceFiles.begin();
while(runningTotal < maxLogsSize && fileListIterator != existingTraceFiles.end()) {
runningTotal += (fileSize(joinPath(directory, *fileListIterator)) + FLOW_KNOBS->ZERO_LENGTH_FILE_PAD);
++fileListIterator;
}
while(fileListIterator != existingTraceFiles.end()) {
deleteFile(joinPath(directory, *fileListIterator));
++fileListIterator;
}
} catch( Error & ) {}
}
}

65
flow/FileTraceLogWriter.h Normal file
View File

@ -0,0 +1,65 @@
/*
* FileTraceLogWriter.h
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2018 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef FLOW_FILE_TRACE_LOG_WRITER_H
#define FLOW_FILE_TRACE_LOG_WRITER_H
#pragma once
#include "FastRef.h"
#include "Trace.h"
#include <functional>
class FileTraceLogWriter : public ITraceLogWriter, ReferenceCounted<FileTraceLogWriter> {
private:
std::string directory;
std::string processName;
std::string basename;
std::string extension;
uint64_t maxLogsSize;
int traceFileFD;
int index;
std::function<void()> onError;
public:
FileTraceLogWriter(std::string directory, std::string processName, std::string basename, std::string extension, uint64_t maxLogsSize, std::function<void()> onError);
void addref();
void delref();
void lastError(int err);
void write(const std::string& str);
void open();
void close();
void roll();
void sync();
static void extractTraceFileNameInfo(std::string const& filename, std::string &root, int &index);
static bool compareTraceFileName (std::string const& f1, std::string const& f2);
static bool reverseCompareTraceFileName(std::string f1, std::string f2);
void cleanupTraceFiles();
};
#endif

View File

@ -115,10 +115,12 @@ FlowKnobs::FlowKnobs(bool randomize, bool isSimulated) {
init( TRACE_RETRY_OPEN_INTERVAL, 1.00 );
init( MIN_TRACE_SEVERITY, isSimulated ? 0 : 10 ); // Related to the trace severity in Trace.h
init( MAX_TRACE_SUPPRESSIONS, 1e4 );
init( TRACE_FSYNC_ENABLED, 0 );
init( TRACE_EVENT_METRIC_UNITS_PER_SAMPLE, 500 );
init( TRACE_EVENT_THROTLLER_SAMPLE_EXPIRY, 1800.0 ); // 30 mins
init( TRACE_EVENT_THROTTLER_MSG_LIMIT, 20000 );
init( TRACE_SYNC_ENABLED, 0 );
init( TRACE_EVENT_METRIC_UNITS_PER_SAMPLE, 500 );
init( TRACE_EVENT_THROTTLER_SAMPLE_EXPIRY, 1800.0 ); // 30 mins
init( TRACE_EVENT_THROTTLER_MSG_LIMIT, 20000 );
init( TRACE_EVENT_MAX_SIZE, 4000 );
init( TRACE_LOG_MAX_PREOPEN_BUFFER, 1000000 );
//TDMetrics
init( MAX_METRICS, 600 );

View File

@ -137,10 +137,12 @@ public:
double TRACE_RETRY_OPEN_INTERVAL;
int MIN_TRACE_SEVERITY;
int MAX_TRACE_SUPPRESSIONS;
int TRACE_FSYNC_ENABLED;
int TRACE_SYNC_ENABLED;
int TRACE_EVENT_METRIC_UNITS_PER_SAMPLE;
int TRACE_EVENT_THROTLLER_SAMPLE_EXPIRY;
int TRACE_EVENT_THROTTLER_SAMPLE_EXPIRY;
int TRACE_EVENT_THROTTLER_MSG_LIMIT;
int TRACE_EVENT_MAX_SIZE;
int TRACE_LOG_MAX_PREOPEN_BUFFER;
//TDMetrics
int64_t MAX_METRIC_SIZE;

View File

@ -2317,6 +2317,7 @@ extern void flushTraceFileVoid();
extern "C" void flushAndExit(int exitCode) {
flushTraceFileVoid();
fflush(stdout);
closeTraceFile();
#ifdef _WIN32
// This function is documented as being asynchronous, but we suspect it might actually be synchronous in the
// case that it is passed a handle to the current process. If not, then there may be cases where we escalate

View File

@ -501,6 +501,16 @@ inline static void* aligned_alloc(size_t alignment, size_t size) { return memali
#elif defined(__APPLE__)
#include <cstdlib>
inline static void* aligned_alloc(size_t alignment, size_t size) {
// Linux's aligned_alloc() requires alignment to be a power of 2. While posix_memalign()
// also requires this, in addition it requires alignment to be a multiple of sizeof(void *).
// Rather than add this requirement to the platform::aligned_alloc() interface we will simply
// upgrade powers of 2 which are less than sizeof(void *) to be exactly sizeof(void *). Non
// powers of 2 of any size will fail as they would on other platforms. This change does not
// break the platform::aligned_alloc() contract as all addresses which are aligned to
// sizeof(void *) are also aligned to any power of 2 less than sizeof(void *).
if(alignment != 0 && alignment < sizeof(void *) && (alignment & (alignment - 1)) == 0) {
alignment = sizeof(void *);
}
void* ptr = nullptr;
posix_memalign(&ptr, alignment, size);
return ptr;

File diff suppressed because it is too large Load Diff

View File

@ -52,6 +52,55 @@ enum Severity {
SevMax=1000000
};
class TraceEventFields {
public:
typedef std::pair<std::string, std::string> Field;
typedef std::vector<Field> FieldContainer;
typedef FieldContainer::const_iterator FieldIterator;
TraceEventFields();
size_t size() const;
size_t sizeBytes() const;
FieldIterator begin() const;
FieldIterator end() const;
void addField(const std::string& key, const std::string& value);
void addField(std::string&& key, std::string&& value);
const Field &operator[] (int index) const;
bool tryGetValue(std::string key, std::string &outValue) const;
std::string getValue(std::string key) const;
std::string toString() const;
void validateFormat() const;
private:
FieldContainer fields;
size_t bytes;
};
template <class Archive>
inline void load( Archive& ar, TraceEventFields& value ) {
uint32_t count;
ar >> count;
std::string k;
std::string v;
for(uint32_t i = 0; i < count; ++i) {
ar >> k >> v;
value.addField(k, v);
}
}
template <class Archive>
inline void save( Archive& ar, const TraceEventFields& value ) {
ar << (uint32_t)value.size();
for(auto itr : value) {
ar << itr.first << itr.second;
}
}
class TraceBatch {
public:
void addEvent( const char *name, uint64_t id, const char *location );
@ -61,30 +110,18 @@ public:
private:
struct EventInfo {
double time;
const char *name;
uint64_t id;
const char *location;
EventInfo(double time, const char *name, uint64_t id, const char *location) : time(time), name(name), id(id), location(location) {}
TraceEventFields fields;
EventInfo(double time, const char *name, uint64_t id, const char *location);
};
struct AttachInfo {
double time;
const char *name;
uint64_t id;
uint64_t to;
AttachInfo(double time, const char *name, uint64_t id, uint64_t to) : time(time), name(name), id(id), to(to) {}
TraceEventFields fields;
AttachInfo(double time, const char *name, uint64_t id, uint64_t to);
};
struct BuggifyInfo {
double time;
int activated;
int line;
std::string file;
BuggifyInfo(double time, int activated, int line, std::string file) : time(time), activated(activated), line(line), file(file) {}
TraceEventFields fields;
BuggifyInfo(double time, int activated, int line, std::string file);
};
std::vector<EventInfo> eventBatch;
@ -97,7 +134,6 @@ class StringRef;
template <class T> class Standalone;
template <class T> class Optional;
#if 1
struct TraceEvent {
TraceEvent( const char* type, UID id = UID() ); // Assumes SevInfo severity
TraceEvent( Severity, const char* type, UID id = UID() );
@ -112,29 +148,28 @@ struct TraceEvent {
static void setNetworkThread();
static bool isNetworkThread();
TraceEvent& error(class Error const& e, bool includeCancelled=false);
TraceEvent& error(const class Error& e, bool includeCancelled=false);
TraceEvent& detail( const char* key, const char* value );
TraceEvent& detail( const char* key, const std::string& value );
TraceEvent& detail( const char* key, double value );
TraceEvent& detail( const char* key, long int value );
TraceEvent& detail( const char* key, long unsigned int value );
TraceEvent& detail( const char* key, long long int value );
TraceEvent& detail( const char* key, long long unsigned int value );
TraceEvent& detail( const char* key, int value );
TraceEvent& detail( const char* key, unsigned value );
TraceEvent& detail( const char* key, struct NetworkAddress const& value );
TraceEvent& detailf( const char* key, const char* valueFormat, ... );
TraceEvent& detailext(const char* key, StringRef const& value);
TraceEvent& detailext(const char* key, Optional<Standalone<StringRef>> const& value);
TraceEvent& detail( std::string key, std::string value );
TraceEvent& detail( std::string key, double value );
TraceEvent& detail( std::string key, long int value );
TraceEvent& detail( std::string key, long unsigned int value );
TraceEvent& detail( std::string key, long long int value );
TraceEvent& detail( std::string key, long long unsigned int value );
TraceEvent& detail( std::string key, int value );
TraceEvent& detail( std::string key, unsigned value );
TraceEvent& detail( std::string key, const struct NetworkAddress& value );
TraceEvent& detailf( std::string key, const char* valueFormat, ... );
TraceEvent& detailext( std::string key, const StringRef& value );
TraceEvent& detailext( std::string key, const Optional<Standalone<StringRef>>& value );
private:
// Private version of _detailf that does NOT write to the eventMetric. This is to be used by other detail methods
// Private version of detailf that does NOT write to the eventMetric. This is to be used by other detail methods
// which can write field metrics of a more appropriate type than string but use detailf() to add to the TraceEvent.
TraceEvent& _detailf( const char* key, const char* valueFormat, ... );
TraceEvent& detailfNoMetric( std::string&& key, const char* valueFormat, ... );
TraceEvent& detailImpl( std::string&& key, std::string&& value, bool writeEventMetricField=true );
public:
TraceEvent& detailfv( const char* key, const char* valueFormat, va_list args, bool writeEventMetricField);
TraceEvent& detail( const char* key, UID const& value );
TraceEvent& backtrace(std::string prefix = "");
TraceEvent& detail( std::string key, const UID& value );
TraceEvent& backtrace(const std::string& prefix = "");
TraceEvent& trackLatest( const char* trackingKey );
TraceEvent& sample( double sampleRate, bool logSampleRate=true );
TraceEvent& suppressFor( double duration, bool logSuppressedEventCount=true );
@ -151,7 +186,7 @@ public:
private:
bool enabled;
std::string trackingKey;
char buffer[4000];
TraceEventFields fields;
int length;
Severity severity;
const char *type;
@ -162,45 +197,28 @@ private:
bool init( Severity, const char* type );
bool init( Severity, struct TraceInterval& );
void write( int length, const void* data );
void writef( const char* format, ... );
void writeEscaped( const char* data );
void writeEscapedfv( const char* format, va_list args );
};
#else
struct TraceEvent {
TraceEvent(const char* type, UID id = UID()) {}
TraceEvent(Severity, const char* type, UID id = UID()) {}
TraceEvent(struct TraceInterval&, UID id = UID()) {}
TraceEvent(const char* type, StringRef& const id); {} // Assumes SevInfo severity
TraceEvent(Severity, const char* type, StringRef& const id); {}
static bool isEnabled(const char* type) { return false; }
struct ITraceLogWriter {
virtual void open() = 0;
virtual void roll() = 0;
virtual void close() = 0;
virtual void write(const std::string&) = 0;
virtual void sync() = 0;
TraceEvent& error(class Error const& e, bool includeCancelled = false) { return *this; }
TraceEvent& detail(const char* key, const char* value) { return *this; }
TraceEvent& detail(const char* key, const std::string& value) { return *this; }
TraceEvent& detail(const char* key, double value) { return *this; }
TraceEvent& detail(const char* key, long int value) { return *this; }
TraceEvent& detail(const char* key, long unsigned int value) { return *this; }
TraceEvent& detail(const char* key, long long int value) { return *this; }
TraceEvent& detail(const char* key, long long unsigned int value) { return *this; }
TraceEvent& detail(const char* key, int value) { return *this; }
TraceEvent& detail(const char* key, unsigned value) { return *this; }
TraceEvent& detail(const char* key, struct NetworkAddress const& value) { return *this; }
TraceEvent& detailf(const char* key, const char* valueFormat, ...) { return *this; }
TraceEvent& detailfv(const char* key, const char* valueFormat, va_list args) { return *this; }
TraceEvent& detail(const char* key, UID const& value) { return *this; }
TraceEvent& detailext(const char* key, StringRef const& value) { return *this; }
TraceEvent& detailext(const char* key, Optional<Standalone<StringRef>> const& value); { return *this; }
TraceEvent& backtrace(std::string prefix = "") { return *this; }
TraceEvent& trackLatest(const char* trackingKey) { return *this; }
TraceEvent& GetLastError() { return *this; }
virtual void addref() = 0;
virtual void delref() = 0;
};
struct ITraceLogFormatter {
virtual const char* getExtension() = 0;
virtual const char* getHeader() = 0; // Called when starting a new file
virtual const char* getFooter() = 0; // Called when ending a file
virtual std::string formatEvent(const TraceEventFields&) = 0; // Called for each event
virtual void addref() = 0;
virtual void delref() = 0;
};
#endif
struct TraceInterval {
TraceInterval( const char* type ) : count(-1), type(type), severity(SevInfo) {}
@ -216,20 +234,20 @@ struct TraceInterval {
struct LatestEventCache {
public:
void set( std::string tag, std::string contents );
std::string get( std::string tag );
std::vector<std::string> getAll();
std::vector<std::string> getAllUnsafe();
void set( std::string tag, const TraceEventFields& fields );
TraceEventFields get( std::string tag );
std::vector<TraceEventFields> getAll();
std::vector<TraceEventFields> getAllUnsafe();
void clear( std::string prefix );
void clear();
// Latest error tracking only tracks errors when called from the main thread. Other errors are silently ignored.
void setLatestError( std::string contents );
std::string getLatestError();
void setLatestError( const TraceEventFields& contents );
TraceEventFields getLatestError();
private:
std::map<NetworkAddress, std::map<std::string, std::string>> latest;
std::map<NetworkAddress, std::string> latestErrors;
std::map<NetworkAddress, std::map<std::string, TraceEventFields>> latest;
std::map<NetworkAddress, TraceEventFields> latestErrors;
};
extern LatestEventCache latestEventCache;

View File

@ -0,0 +1,95 @@
/*
* XmlTraceLogFormatter.cpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2018 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "actorcompiler.h"
#include "XmlTraceLogFormatter.h"
void XmlTraceLogFormatter::addref() {
ReferenceCounted<XmlTraceLogFormatter>::addref();
}
void XmlTraceLogFormatter::delref() {
ReferenceCounted<XmlTraceLogFormatter>::delref();
}
const char* XmlTraceLogFormatter::getExtension() {
return "xml";
}
const char* XmlTraceLogFormatter::getHeader() {
return "<?xml version=\"1.0\"?>\r\n<Trace>\r\n";
}
const char* XmlTraceLogFormatter::getFooter() {
return "</Trace>\r\n";
}
void XmlTraceLogFormatter::escape(std::stringstream &ss, std::string source) {
loop {
int index = source.find_first_of(std::string({'&', '"', '<', '>', '\r', '\n', '\0'}));
if(index == source.npos) {
break;
}
ss << source.substr(0, index);
if(source[index] == '&') {
ss << "&amp;";
}
else if(source[index] == '"') {
ss << "&quot;";
}
else if(source[index] == '<') {
ss << "&lt;";
}
else if(source[index] == '>') {
ss << "&gt;";
}
else if(source[index] == '\n' || source[index] == '\r') {
ss << " ";
}
else if(source[index] == '\0') {
ss << " ";
TraceEvent(SevWarnAlways, "StrippedIllegalCharacterFromTraceEvent").detail("Source", StringRef(source).printable()).detail("Character", StringRef(source.substr(index, 1)).printable());
}
else {
ASSERT(false);
}
source = source.substr(index+1);
}
ss << source;
}
std::string XmlTraceLogFormatter::formatEvent(const TraceEventFields &fields) {
std::stringstream ss;
ss << "<Event ";
for(auto itr : fields) {
escape(ss, itr.first);
ss << "=\"";
escape(ss, itr.second);
ss << "\" ";
}
ss << "/>\r\n";
return ss.str();
}

View File

@ -0,0 +1,43 @@
/*
* XmlTraceLogFormatter.h
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2018 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef FLOW_XML_TRACE_LOG_FORMATTER_H
#define FLOW_XML_TRACE_LOG_FORMATTER_H
#pragma once
#include <sstream>
#include "FastRef.h"
#include "Trace.h"
struct XmlTraceLogFormatter : public ITraceLogFormatter, ReferenceCounted<XmlTraceLogFormatter> {
void addref();
void delref();
const char* getExtension();
const char* getHeader();
const char* getFooter();
void escape(std::stringstream &ss, std::string source);
std::string formatEvent(const TraceEventFields &fields);
};
#endif

View File

@ -874,13 +874,13 @@ namespace actorcompiler
case "\r\n": LineCount++; break;
case "\n": LineCount++; break;
}
if (tokens[i].Value.StartsWith("/*")) LineCount += tokens[i].Value.Count(c=>c=='\n');
if (BraceDepth < 0) throw new Error(LineCount, "Mismatched braces");
if (ParenDepth < 0) throw new Error(LineCount, "Mismatched parenthesis");
tokens[i].Position = i;
tokens[i].SourceLine = LineCount;
tokens[i].BraceDepth = BraceDepth;
tokens[i].ParenDepth = ParenDepth;
if (tokens[i].Value.StartsWith("/*")) LineCount += tokens[i].Value.Count(c=>c=='\n');
switch (tokens[i].Value)
{
case "{": BraceDepth++; if (BraceDepth==1) lastBrace = tokens[i]; break;

View File

@ -118,7 +118,7 @@ ERROR( read_version_already_set, 2010, "Transaction already has a read version s
ERROR( version_invalid, 2011, "Version not valid" )
ERROR( range_limits_invalid, 2012, "Range limits not valid" )
ERROR( invalid_database_name, 2013, "Database name must be 'DB'" )
ERROR( attribute_not_found, 2014, "Attribute not found in string" )
ERROR( attribute_not_found, 2014, "Attribute not found" )
ERROR( future_not_set, 2015, "Future not ready" )
ERROR( future_not_error, 2016, "Future not an error" )
ERROR( used_during_commit, 2017, "Operation issued while a commit was outstanding" )

View File

@ -93,38 +93,52 @@ Optional<uint64_t> parse_with_suffix(std::string toparse, std::string default_un
return ret;
}
std::string format( const char* form, ... ) {
int vsformat( std::string &outputString, const char* form, va_list args) {
char buf[200];
va_list args;
va_start(args, form);
int size = vsnprintf(buf, sizeof(buf), form, args);
va_end(args);
va_list args2;
va_copy(args2, args);
int size = vsnprintf(buf, sizeof(buf), form, args2);
va_end(args2);
if(size >= 0 && size < sizeof(buf)) {
return std::string(buf, size);
outputString = std::string(buf, size);
return size;
}
#ifdef _WIN32
// Microsoft's non-standard vsnprintf doesn't return a correct size, but just an error, so determine the necessary size
va_start(args, form);
size = _vscprintf(form, args);
va_end(args);
va_copy(args2, args);
size = _vscprintf(form, args2);
va_end(args2);
#endif
if (size < 0) throw internal_error();
if (size < 0) {
return -1;
}
TEST(true); //large format result
std::string s;
s.resize(size + 1);
va_start(args, form);
size = vsnprintf(&s[0], s.size(), form, args);
va_end(args);
if (size < 0 || size >= s.size()) throw internal_error();
outputString.resize(size + 1);
size = vsnprintf(&outputString[0], outputString.size(), form, args);
if (size < 0 || size >= outputString.size()) {
return -1;
}
s.resize(size);
return s;
outputString.resize(size);
return size;
}
std::string format( const char* form, ... ) {
va_list args;
va_start(args, form);
std::string str;
int result = vsformat(str, form, args);
va_end(args);
ASSERT(result >= 0);
return str;
}
Standalone<StringRef> strinc(StringRef const& str) {

View File

@ -68,6 +68,10 @@ bool validationIsEnabled();
extern Optional<uint64_t> parse_with_suffix(std::string toparse, std::string default_unit = "");
extern std::string format(const char* form, ...);
// On success, returns the number of characters written. On failure, returns a negative number.
extern int vsformat(std::string &outputString, const char* form, va_list args);
extern Standalone<StringRef> strinc(StringRef const& str);
extern StringRef strinc(StringRef const& str, Arena& arena);
extern Standalone<StringRef> addVersionStampAtEnd(StringRef const& str);

View File

@ -1,4 +1,4 @@
<?xml version="1.0" encoding="utf-8"?>
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|X64">
@ -18,6 +18,10 @@
<ClCompile Include="Error.cpp" />
<ClCompile Include="FastAlloc.cpp" />
<ClCompile Include="FaultInjection.cpp" />
<ClCompile Include="FileTraceLogWriter.cpp" />
<ClCompile Include="XmlTraceLogFormatter.cpp" />
<ClInclude Include="FileTraceLogWriter.h" />
<ClInclude Include="XmlTraceLogFormatter.h" />
<ClInclude Include="MetricSample.h" />
<ClInclude Include="Profiler.h" />
<ActorCompiler Include="Profiler.actor.cpp" />

View File

@ -36,6 +36,8 @@
<ClCompile Include="version.cpp" />
<ClCompile Include="stacktrace.amalgamation.cpp" />
<ClCompile Include="SignalSafeUnwind.cpp" />
<ClCompile Include="XmlTraceLogFormatter.cpp" />
<ClCompile Include="FileTraceLogWriter.cpp" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="ActorCollection.h" />
@ -72,6 +74,8 @@
<ClInclude Include="SignalSafeUnwind.h" />
<ClInclude Include="MetricSample.h" />
<ClInclude Include="stacktrace.h" />
<ClInclude Include="XmlTraceLogFormatter.h" />
<ClInclude Include="FileTraceLogWriter.h" />
</ItemGroup>
<ItemGroup>
<None Include="no_intellisense.opt" />

View File

@ -132,6 +132,18 @@ Future<T> transformErrors( Future<T> f, Error err ) {
}
}
ACTOR template <class T>
Future<T> transformError( Future<T> f, Error inErr, Error outErr ) {
try {
T t = wait( f );
return t;
} catch( Error &e ) {
if( e.code() == inErr.code() )
throw outErr;
throw e;
}
}
// Note that the RequestStream<T> version of forwardPromise doesn't exist, because what to do with errors?
ACTOR template <class T>

View File

@ -44,21 +44,20 @@ public class MicroQueue {
// Remove the top element from the queue.
public static Object dequeue(TransactionContext tcx){
final KeyValue item = firstItem(tcx);
if(item == null){
return null;
}
// Remove from the top of the queue.
tcx.run(new Function<Transaction,Void>(){
return tcx.run(new Function<Transaction,Void>(){
public Void apply(Transaction tr){
final KeyValue item = firstItem(tr);
if(item == null){
return null;
}
tr.clear(item.getKey());
return null;
// Return the old value.
return Tuple.fromBytes(item.getValue()).get(0);
}
});
// Return the old value.
return Tuple.fromBytes(item.getValue()).get(0);
}
// Add an element to the queue.

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long