diff --git a/FDBLibTLS/FDBLibTLSVerify.cpp b/FDBLibTLS/FDBLibTLSVerify.cpp index b951ab5810..0c28a2f036 100644 --- a/FDBLibTLS/FDBLibTLSVerify.cpp +++ b/FDBLibTLS/FDBLibTLSVerify.cpp @@ -136,7 +136,7 @@ static std::pair splitPair(std::string const& input, c static int abbrevToNID(std::string const& sn) { int nid = NID_undef; - if (sn == "C" || sn == "CN" || sn == "L" || sn == "ST" || sn == "O" || sn == "OU") + if (sn == "C" || sn == "CN" || sn == "L" || sn == "ST" || sn == "O" || sn == "OU" || sn == "UID" || sn == "DC") nid = OBJ_sn2nid(sn.c_str()); if (nid == NID_undef) throw std::runtime_error("abbrevToNID"); diff --git a/Makefile b/Makefile index 828a455f2b..715215eb1b 100644 --- a/Makefile +++ b/Makefile @@ -154,7 +154,7 @@ clean: $(CLEAN_TARGETS) docpreview_clean @rm -rf $(DEPSDIR) @rm -rf lib/ @rm -rf bin/coverage.*.xml - @rm -f */*.g.cpp */*/*/g.cpp */*.g.h */*/*.g.h + @find . -name "*.g.cpp" -exec rm -f {} \; -or -name "*.g.h" -exec rm -f {} \; targets: @echo "Available targets:" diff --git a/bindings/bindingtester/tests/directory.py b/bindings/bindingtester/tests/directory.py index f6ad9a108b..e675c03cb6 100644 --- a/bindings/bindingtester/tests/directory.py +++ b/bindings/bindingtester/tests/directory.py @@ -28,7 +28,7 @@ from bindingtester import util from bindingtester.tests import Test, Instruction, InstructionSet, ResultSpecification from bindingtester.tests import test_util, directory_util -from bindingtester.tests.directory_util import DirListEntry +from bindingtester.tests.directory_state_tree import DirectoryStateTreeNode fdb.api_version(FDB_API_VERSION) @@ -48,12 +48,12 @@ class DirectoryTest(Test): def ensure_default_directory_subspace(self, instructions, path): directory_util.create_default_directory_subspace(instructions, path, self.random) - child = self.root.add_child((path,), path, self.root, DirListEntry(True, True)) + child = self.root.add_child(path, DirectoryStateTreeNode(True, True, has_known_prefix=True)) self.dir_list.append(child) self.dir_index = directory_util.DEFAULT_DIRECTORY_INDEX def generate_layer(self): - if random.random < 0.7: + if random.random() < 0.7: return '' else: choice = random.randint(0, 3) @@ -114,29 +114,34 @@ class DirectoryTest(Test): instructions.push_args(layer) instructions.push_args(*test_util.with_length(path)) instructions.append('DIRECTORY_OPEN') - # print '%d. Selected %s, dir=%s, has_known_prefix=%s, dir_list_len=%d' \ - # % (len(instructions), 'DIRECTORY_OPEN', repr(self.dir_index), False, len(self.dir_list)) - self.dir_list.append(self.dir_list[0].add_child(path, default_path, self.root, DirListEntry(True, True, has_known_prefix=False))) + self.dir_list.append(self.root.add_child(path, DirectoryStateTreeNode(True, True, has_known_prefix=False))) + # print('%d. Selected %s, dir=%s, dir_id=%s, has_known_prefix=%s, dir_list_len=%d' \ + # % (len(instructions), 'DIRECTORY_OPEN', repr(self.dir_index), self.dir_list[-1].dir_id, False, len(self.dir_list)-1)) instructions.setup_complete() for i in range(args.num_ops): if random.random() < 0.5: - self.dir_index = random.randrange(0, len(self.dir_list)) + while True: + self.dir_index = random.randrange(0, len(self.dir_list)) + if not self.dir_list[self.dir_index].state.is_partition or not self.dir_list[self.dir_index].state.deleted: + break + instructions.push_args(self.dir_index) instructions.append('DIRECTORY_CHANGE') + dir_entry = self.dir_list[self.dir_index] + choices = op_choices[:] - if self.dir_list[self.dir_index].is_directory: + if dir_entry.state.is_directory: choices += directory - if self.dir_list[self.dir_index].is_subspace: + if dir_entry.state.is_subspace: choices += subspace op = random.choice(choices) - dir_entry = self.dir_list[self.dir_index] - # print '%d. Selected %s, dir=%s, has_known_prefix=%s, dir_list_len=%d' \ - # % (len(instructions), op, repr(self.dir_index), repr(dir_entry.has_known_prefix), len(self.dir_list)) + # print('%d. Selected %s, dir=%d, dir_id=%d, has_known_prefix=%d, dir_list_len=%d' \ + # % (len(instructions), op, self.dir_index, dir_entry.dir_id, dir_entry.state.has_known_prefix, len(self.dir_list))) if op.endswith('_DATABASE') or op.endswith('_SNAPSHOT'): root_op = op[0:-9] @@ -151,24 +156,26 @@ class DirectoryTest(Test): elif root_op == 'DIRECTORY_CREATE_SUBSPACE': path = generate_path() - instructions.push_args(generate_prefix(allow_empty=False, is_partition=True)) + instructions.push_args(generate_prefix(require_unique=False, is_partition=True)) instructions.push_args(*test_util.with_length(path)) instructions.append(op) - self.dir_list.append(DirListEntry(False, True)) + self.dir_list.append(DirectoryStateTreeNode(False, True, has_known_prefix=True)) elif root_op == 'DIRECTORY_CREATE_LAYER': indices = [] + + prefixes = [generate_prefix(require_unique=args.concurrency==1, is_partition=True) for i in range(2)] for i in range(2): - instructions.push_args(generate_prefix(allow_empty=False, is_partition=True)) + instructions.push_args(prefixes[i]) instructions.push_args(*test_util.with_length(generate_path())) instructions.append('DIRECTORY_CREATE_SUBSPACE') indices.append(len(self.dir_list)) - self.dir_list.append(DirListEntry(False, True)) + self.dir_list.append(DirectoryStateTreeNode(False, True, has_known_prefix=True)) instructions.push_args(random.choice([0, 1])) instructions.push_args(*indices) instructions.append(op) - self.dir_list.append(DirListEntry(True, False, False)) + self.dir_list.append(DirectoryStateTreeNode.get_layer(prefixes[0])) elif root_op == 'DIRECTORY_CREATE_OR_OPEN': # Because allocated prefixes are non-deterministic, we cannot have overlapping @@ -183,14 +190,18 @@ class DirectoryTest(Test): if not op.endswith('_DATABASE') and args.concurrency == 1: test_util.blocking_commit(instructions) - self.dir_list.append(dir_entry.add_child(path, default_path, self.root, DirListEntry(True, True, False))) + child_entry = dir_entry.get_descendent(path) + if child_entry is None: + child_entry = DirectoryStateTreeNode(True, True) + + child_entry.state.has_known_prefix = False + self.dir_list.append(dir_entry.add_child(path, child_entry)) elif root_op == 'DIRECTORY_CREATE': layer = self.generate_layer() is_partition = layer == 'partition' - allow_empty_prefix = random.random() < 0.8 - prefix = generate_prefix(allow_empty=allow_empty_prefix, is_partition=is_partition) + prefix = generate_prefix(require_unique=is_partition and args.concurrency==1, is_partition=is_partition, min_length=0) # Because allocated prefixes are non-deterministic, we cannot have overlapping # transactions that allocate/remove these prefixes in a comparison test @@ -209,40 +220,59 @@ class DirectoryTest(Test): if not op.endswith('_DATABASE') and args.concurrency == 1: # and allow_empty_prefix: test_util.blocking_commit(instructions) - self.dir_list.append(dir_entry.add_child(path, default_path, self.root, DirListEntry(True, True, bool(prefix)))) + child_entry = dir_entry.get_descendent(path) + if child_entry is None: + child_entry = DirectoryStateTreeNode(True, True, has_known_prefix=bool(prefix)) + elif not bool(prefix): + child_entry.state.has_known_prefix = False + + if is_partition: + child_entry.state.is_partition = True + + self.dir_list.append(dir_entry.add_child(path, child_entry)) elif root_op == 'DIRECTORY_OPEN': path = generate_path() instructions.push_args(self.generate_layer()) instructions.push_args(*test_util.with_length(path)) instructions.append(op) - self.dir_list.append(dir_entry.add_child(path, default_path, self.root, DirListEntry(True, True))) + + child_entry = dir_entry.get_descendent(path) + if child_entry is None: + self.dir_list.append(DirectoryStateTreeNode(False, False, has_known_prefix=False)) + else: + self.dir_list.append(dir_entry.add_child(path, child_entry)) elif root_op == 'DIRECTORY_MOVE': old_path = generate_path() new_path = generate_path() instructions.push_args(*(test_util.with_length(old_path) + test_util.with_length(new_path))) instructions.append(op) - # This could probably be made to sometimes set has_known_prefix to true - self.dir_list.append(dir_entry.add_child(new_path, default_path, self.root, DirListEntry(True, True, False))) + + child_entry = dir_entry.get_descendent(old_path) + if child_entry is None: + self.dir_list.append(DirectoryStateTreeNode(False, False, has_known_prefix=False)) + else: + self.dir_list.append(dir_entry.add_child(new_path, child_entry)) # Make sure that the default directory subspace still exists after moving the specified directory - if dir_entry.is_directory and not dir_entry.is_subspace and old_path == (u'',): + if dir_entry.state.is_directory and not dir_entry.state.is_subspace and old_path == (u'',): self.ensure_default_directory_subspace(instructions, default_path) elif root_op == 'DIRECTORY_MOVE_TO': new_path = generate_path() instructions.push_args(*test_util.with_length(new_path)) instructions.append(op) - self.dir_list.append(dir_entry.root.add_child(new_path, default_path, self.root, - DirListEntry(True, True, dir_entry.has_known_prefix))) + + child_entry = dir_entry.get_descendent(()) + if child_entry is None: + self.dir_list.append(DirectoryStateTreeNode(False, False, has_known_prefix=False)) + else: + self.dir_list.append(dir_entry.add_child(new_path, child_entry)) # Make sure that the default directory subspace still exists after moving the current directory self.ensure_default_directory_subspace(instructions, default_path) - # FIXME: There is currently a problem with removing partitions. In these generated tests, it's possible - # for a removed partition to resurrect itself and insert keys into the database using its allocated - # prefix. The result is non-deterministic HCA errors. elif root_op == 'DIRECTORY_REMOVE' or root_op == 'DIRECTORY_REMOVE_IF_EXISTS': # Because allocated prefixes are non-deterministic, we cannot have overlapping # transactions that allocate/remove these prefixes in a comparison test @@ -254,12 +284,14 @@ class DirectoryTest(Test): if count == 1: path = generate_path() instructions.push_args(*test_util.with_length(path)) - instructions.push_args(count) + instructions.push_args(count) instructions.append(op) + dir_entry.delete(path) + # Make sure that the default directory subspace still exists after removing the specified directory - if path == () or (dir_entry.is_directory and not dir_entry.is_subspace and path == (u'',)): + if path == () or (dir_entry.state.is_directory and not dir_entry.state.is_subspace and path == (u'',)): self.ensure_default_directory_subspace(instructions, default_path) elif root_op == 'DIRECTORY_LIST' or root_op == 'DIRECTORY_EXISTS': @@ -278,7 +310,7 @@ class DirectoryTest(Test): instructions.append('DIRECTORY_STRIP_PREFIX') elif root_op == 'DIRECTORY_UNPACK_KEY' or root_op == 'DIRECTORY_CONTAINS': - if not dir_entry.has_known_prefix or random.random() < 0.2 or root_op == 'DIRECTORY_UNPACK_KEY': + if not dir_entry.state.has_known_prefix or random.random() < 0.2 or root_op == 'DIRECTORY_UNPACK_KEY': t = self.random.random_tuple(5) instructions.push_args(*test_util.with_length(t)) instructions.append('DIRECTORY_PACK_KEY') @@ -292,7 +324,7 @@ class DirectoryTest(Test): instructions.push_args(*test_util.with_length(t)) instructions.append(op) if root_op == 'DIRECTORY_OPEN_SUBSPACE': - self.dir_list.append(DirListEntry(False, True, dir_entry.has_known_prefix)) + self.dir_list.append(DirectoryStateTreeNode(False, True, dir_entry.state.has_known_prefix)) else: test_util.to_front(instructions, 1) instructions.append('DIRECTORY_STRIP_PREFIX') @@ -308,16 +340,18 @@ class DirectoryTest(Test): for i, dir_entry in enumerate(self.dir_list): instructions.push_args(i) instructions.append('DIRECTORY_CHANGE') - if dir_entry.is_directory: + if dir_entry.state.is_directory: instructions.push_args(self.directory_log.key()) instructions.append('DIRECTORY_LOG_DIRECTORY') - if dir_entry.has_known_prefix and dir_entry.is_subspace: - # print '%d. Logging subspace: %d' % (i, dir_entry.dir_id) + if dir_entry.state.has_known_prefix and dir_entry.state.is_subspace: + # print('%d. Logging subspace: %d' % (i, dir_entry.dir_id)) instructions.push_args(self.subspace_log.key()) instructions.append('DIRECTORY_LOG_SUBSPACE') if (i + 1) % 100 == 0: test_util.blocking_commit(instructions) + test_util.blocking_commit(instructions) + instructions.push_args(self.stack_subspace.key()) instructions.append('LOG_STACK') @@ -365,11 +399,15 @@ def generate_path(min_length=0): return path -def generate_prefix(allow_empty=True, is_partition=False): - if allow_empty and random.random() < 0.8: +def generate_prefix(require_unique=False, is_partition=False, min_length=1): + fixed_prefix = 'abcdefg' + if not require_unique and min_length == 0 and random.random() < 0.8: return None - elif is_partition or random.random() < 0.5: - length = random.randint(0 if allow_empty else 1, 5) + elif require_unique or is_partition or min_length > len(fixed_prefix) or random.random() < 0.5: + if require_unique: + min_length = max(min_length, 16) + + length = random.randint(min_length, min_length+5) if length == 0: return '' @@ -379,6 +417,6 @@ def generate_prefix(allow_empty=True, is_partition=False): else: return ''.join(chr(random.randrange(ord('\x02'), ord('\x14'))) for i in range(0, length)) else: - prefix = 'abcdefg' - generated = prefix[0:random.randrange(0 if allow_empty else 1, len(prefix))] + prefix = fixed_prefix + generated = prefix[0:random.randrange(min_length, len(prefix))] return generated diff --git a/bindings/bindingtester/tests/directory_state_tree.py b/bindings/bindingtester/tests/directory_state_tree.py new file mode 100644 index 0000000000..1191049c03 --- /dev/null +++ b/bindings/bindingtester/tests/directory_state_tree.py @@ -0,0 +1,259 @@ +import sys + +class TreeNodeState: + def __init__(self, node, dir_id, is_directory, is_subspace, has_known_prefix, root, is_partition): + self.dir_id = dir_id + self.is_directory = is_directory + self.is_subspace = is_subspace + self.has_known_prefix = has_known_prefix + self.root = root + self.is_partition = is_partition + + self.parents = { node } + self.children = {} + self.deleted = False + +# Represents an element of the directory hierarchy. As a result of various operations (e.g. moves) that +# may or may not have succeeded, a node can represent multiple possible states. +class DirectoryStateTreeNode: + # A cache of directory layers. We mustn't have multiple entries for the same layer + layers = {} + + # Because our operations may be applied to the default directory in the case that + # the current directory failed to open/create, we compute the result of each operation + # as if it was performed on the current directory and the default directory. + default_directory = None + + # Used for debugging + dir_id = 0 + + @classmethod + def reset(cls): + cls.dir_id = 0 + cls.layers = {} + cls.default_directory = None + + @classmethod + def set_default_directory(cls, default_directory): + cls.default_directory = default_directory + + @classmethod + def get_layer(cls, node_subspace_prefix): + if node_subspace_prefix not in DirectoryStateTreeNode.layers: + DirectoryStateTreeNode.layers[node_subspace_prefix] = DirectoryStateTreeNode(True, False, has_known_prefix=False) + + return DirectoryStateTreeNode.layers[node_subspace_prefix] + + def __init__(self, is_directory, is_subspace, has_known_prefix=True, root=None, is_partition=False): + self.state = TreeNodeState(self, DirectoryStateTreeNode.dir_id + 1, is_directory, is_subspace, has_known_prefix, + root or self, is_partition) + DirectoryStateTreeNode.dir_id += 1 + + def __repr__(self): + return '{DirEntry %d: %d}' % (self.state.dir_id, self.state.has_known_prefix) + + def _get_descendent(self, subpath, default): + if not subpath: + if default is not None: + self._merge(default) + return self + + default_child = None + if default is not None: + default_child = default.state.children.get(subpath[0]) + + self_child = self.state.children.get(subpath[0]) + + if self_child is None: + if default_child is None: + return None + else: + return default_child._get_descendent(subpath[1:], None) + + return self_child._get_descendent(subpath[1:], default_child) + + def get_descendent(self, subpath): + return self._get_descendent(subpath, DirectoryStateTreeNode.default_directory) + + def add_child(self, subpath, child): + child.state.root = self.state.root + if DirectoryStateTreeNode.default_directory: + # print('Adding child %r to default directory at %r' % (child, subpath)) + child = DirectoryStateTreeNode.default_directory._add_child_impl(subpath, child) + # print('Added %r' % child) + + # print('Adding child %r to directory at %r' % (child, subpath)) + c = self._add_child_impl(subpath, child) + + # print('Added %r' % c) + return c + + def _add_child_impl(self, subpath, child): + # print('%d, %d. Adding child %r (recursive): %r' % (self.state.dir_id, child.state.dir_id, child, subpath)) + if len(subpath) == 0: + # print('%d, %d. Setting child: %d, %d' % (self.state.dir_id, child.state.dir_id, self.state.has_known_prefix, child.state.has_known_prefix)) + self._merge(child) + return self + else: + if not subpath[0] in self.state.children: + # print('%d, %d. Path %r was absent from %r (%r)' % (self.state.dir_id, child.state.dir_id, subpath[0:1], self, self.state.children)) + subdir = DirectoryStateTreeNode(True, True, root=self.state.root) + self.state.children[subpath[0]] = subdir + else: + subdir = self.state.children[subpath[0]] + # print('%d, %d. Path was present' % (self.state.dir_id, child.state.dir_id)) + + if len(subpath) > 1: + subdir.state.has_known_prefix = False + + return subdir._add_child_impl(subpath[1:], child) + + def _merge(self, other): + if self.state.dir_id == other.state.dir_id: + return + + self.dir_id = other.dir_id + self.state.dir_id = min(other.state.dir_id, self.state.dir_id) + self.state.is_directory = self.state.is_directory and other.state.is_directory + self.state.is_subspace = self.state.is_subspace and other.state.is_subspace + self.state.has_known_prefix = self.state.has_known_prefix and other.state.has_known_prefix + self.state.deleted = self.state.deleted or other.state.deleted + self.state.is_partition = self.state.is_partition or other.state.is_partition + + other_children = other.state.children.copy() + other_parents = other.state.parents.copy() + + for node in other_parents: + node.state = self.state + self.state.parents.add(node) + + for c in other_children: + if c not in self.state.children: + self.state.children[c] = other_children[c] + else: + self.state.children[c]._merge(other_children[c]) + + def _delete_impl(self): + if not self.state.deleted: + self.state.deleted = True + for c in self.state.children.values(): + c._delete_impl() + + def delete(self, path): + child = self.get_descendent(path) + if child: + child._delete_impl() + +def validate_dir(dir, root): + if dir.state.is_directory: + assert dir.state.root == root + else: + assert dir.state.root == dir + +def run_test(): + all_entries = [] + + root = DirectoryStateTreeNode.get_layer('\xfe') + all_entries.append(root) + + default_dir = root.add_child(('default',), DirectoryStateTreeNode(True, True, has_known_prefix=True)) + DirectoryStateTreeNode.set_default_directory(default_dir) + all_entries.append(default_dir) + + all_entries.append(default_dir.add_child(('1',), DirectoryStateTreeNode(True, True, has_known_prefix=True))) + all_entries.append(default_dir.add_child(('1', '1'), DirectoryStateTreeNode(True, False, has_known_prefix=True))) + all_entries.append(default_dir.add_child(('2',), DirectoryStateTreeNode(True, True, has_known_prefix=True))) + all_entries.append(default_dir.add_child(('3',), DirectoryStateTreeNode(True, True, has_known_prefix=False))) + all_entries.append(default_dir.add_child(('5',), DirectoryStateTreeNode(True, True, has_known_prefix=True))) + all_entries.append(default_dir.add_child(('3', '1'), DirectoryStateTreeNode(True, True, has_known_prefix=False))) + all_entries.append(default_dir.add_child(('1', '3'), DirectoryStateTreeNode(True, True, has_known_prefix=False))) + + entry = all_entries[-1] + child_entries = [] + child_entries.append(entry.add_child(('1',), DirectoryStateTreeNode(True, False, has_known_prefix=True))) + child_entries.append(entry.add_child(('2',), DirectoryStateTreeNode(True, True, has_known_prefix=True))) + child_entries.append(entry.add_child(('3',), DirectoryStateTreeNode(True, True, has_known_prefix=True))) + child_entries.append(entry.add_child(('4',), DirectoryStateTreeNode(True, False, has_known_prefix=False))) + child_entries.append(entry.add_child(('5',), DirectoryStateTreeNode(True, True, has_known_prefix=True))) + + all_entries.append(root.add_child(('1', '2'), DirectoryStateTreeNode(True, True, has_known_prefix=False))) + all_entries.append(root.add_child(('2',), DirectoryStateTreeNode(True, True, has_known_prefix=True))) + all_entries.append(root.add_child(('3',), DirectoryStateTreeNode(True, True, has_known_prefix=True))) + all_entries.append(root.add_child(('1', '3',), DirectoryStateTreeNode(True, True, has_known_prefix=True))) + + # This directory was merged with the default, but both have readable prefixes + entry = root.get_descendent(('2',)) + assert entry.state.has_known_prefix + + entry = all_entries[-1] + all_entries.append(entry.add_child(('1',), DirectoryStateTreeNode(True, True, has_known_prefix=True))) + all_entries.append(entry.add_child(('2',), DirectoryStateTreeNode(True, True, has_known_prefix=False))) + all_entries.append(entry.add_child(('3',), DirectoryStateTreeNode(True, False, has_known_prefix=True))) + + entry_to_move = all_entries[-1] + + all_entries.append(entry.add_child(('5',), DirectoryStateTreeNode(True, False, has_known_prefix=True))) + child_entries.append(entry.add_child(('6',), DirectoryStateTreeNode(True, True, has_known_prefix=True))) + + all_entries.extend(child_entries) + + # This directory has an unknown prefix + entry = root.get_descendent(('1', '2')) + assert not entry.state.has_known_prefix + + # This directory was default created and should have an unknown prefix + # It will merge with the default directory's child, which is not a subspace + entry = root.get_descendent(('1',)) + assert not entry.state.has_known_prefix + assert not entry.state.is_subspace + + # Multiple merges will have made this prefix unreadable + entry = root.get_descendent(('2',)) + assert not entry.state.has_known_prefix + + # Merge with default directory's child that has an unknown prefix + entry = root.get_descendent(('3',)) + assert not entry.state.has_known_prefix + + # Merge with default directory's child that has an unknown prefix and merged children + entry = root.get_descendent(('1', '3')) + assert set(entry.state.children.keys()) == {'1', '2', '3', '4', '5', '6'} + + # This child entry should be the combination of ['default', '3'], ['default', '1', '3'], and ['1', '3'] + entry = entry.get_descendent(('3',)) + assert not entry.state.has_known_prefix + assert not entry.state.is_subspace + + # Verify the merge of the children + assert not child_entries[0].state.has_known_prefix + assert not child_entries[0].state.is_subspace + + assert not child_entries[1].state.has_known_prefix + assert child_entries[1].state.is_subspace + + assert not child_entries[2].state.has_known_prefix + assert not child_entries[2].state.is_subspace + + assert not child_entries[3].state.has_known_prefix + assert not child_entries[3].state.is_subspace + + assert child_entries[4].state.has_known_prefix + assert not child_entries[4].state.is_subspace + + assert child_entries[5].state.has_known_prefix + assert child_entries[5].state.is_subspace + + entry = root.add_child(('3',), entry_to_move) + all_entries.append(entry) + + # Test moving an entry + assert not entry.state.has_known_prefix + assert not entry.state.is_subspace + assert entry.state.children.keys() == ['1'] + + for e in all_entries: + validate_dir(e, root) + +if __name__ == '__main__': + sys.exit(run_test()) + diff --git a/bindings/bindingtester/tests/directory_util.py b/bindings/bindingtester/tests/directory_util.py index 3a95ed848e..041d5369d9 100644 --- a/bindings/bindingtester/tests/directory_util.py +++ b/bindings/bindingtester/tests/directory_util.py @@ -27,6 +27,7 @@ from bindingtester import FDB_API_VERSION from bindingtester import util from bindingtester.tests import test_util +from bindingtester.tests.directory_state_tree import DirectoryStateTreeNode fdb.api_version(FDB_API_VERSION) @@ -34,82 +35,26 @@ DEFAULT_DIRECTORY_INDEX = 4 DEFAULT_DIRECTORY_PREFIX = 'default' DIRECTORY_ERROR_STRING = 'DIRECTORY_ERROR' - -class DirListEntry: - dir_id = 0 # Used for debugging - - def __init__(self, is_directory, is_subspace, has_known_prefix=True, path=(), root=None): - self.root = root or self - self.path = path - self.is_directory = is_directory - self.is_subspace = is_subspace - self.has_known_prefix = has_known_prefix - self.children = {} - - self.dir_id = DirListEntry.dir_id + 1 - DirListEntry.dir_id += 1 - - def __repr__(self): - return 'DirEntry %d %r: %d' % (self.dir_id, self.path, self.has_known_prefix) - - def add_child(self, subpath, default_path, root, child): - if default_path in root.children: - # print 'Adding child %r to default directory %r at %r' % (child, root.children[DirectoryTest.DEFAULT_DIRECTORY_PATH].path, subpath) - c = root.children[default_path]._add_child_impl(subpath, child) - child.has_known_prefix = c.has_known_prefix and child.has_known_prefix - # print 'Added %r' % c - - # print 'Adding child %r to directory %r at %r' % (child, self.path, subpath) - c = self._add_child_impl(subpath, child) - # print 'Added %r' % c - return c - - def _add_child_impl(self, subpath, child): - # print '%d, %d. Adding child (recursive): %s %s' % (self.dir_id, child.dir_id, repr(self.path), repr(subpath)) - if len(subpath) == 0: - self.has_known_prefix = self.has_known_prefix and child.has_known_prefix - # print '%d, %d. Setting child: %d' % (self.dir_id, child.dir_id, self.has_known_prefix) - self._merge_children(child) - - return self - else: - if not subpath[0] in self.children: - # print '%d, %d. Path %s was absent (%s)' % (self.dir_id, child.dir_id, repr(self.path + subpath[0:1]), repr(self.children)) - subdir = DirListEntry(True, True, path=self.path + subpath[0:1], root=self.root) - subdir.has_known_prefix = len(subpath) == 1 - self.children[subpath[0]] = subdir - else: - subdir = self.children[subpath[0]] - subdir.has_known_prefix = False - # print '%d, %d. Path was present' % (self.dir_id, child.dir_id) - - return subdir._add_child_impl(subpath[1:], child) - - def _merge_children(self, other): - for c in other.children: - if c not in self.children: - self.children[c] = other.children[c] - else: - self.children[c].has_known_prefix = self.children[c].has_known_prefix and other.children[c].has_known_prefix - self.children[c]._merge_children(other.children[c]) - - def setup_directories(instructions, default_path, random): - dir_list = [DirListEntry(True, False, True)] + # Clients start with the default directory layer in the directory list + DirectoryStateTreeNode.reset() + dir_list = [DirectoryStateTreeNode.get_layer('\xfe')] + instructions.push_args(0, '\xfe') instructions.append('DIRECTORY_CREATE_SUBSPACE') - dir_list.append(DirListEntry(False, True)) + dir_list.append(DirectoryStateTreeNode(False, True)) instructions.push_args(0, '') instructions.append('DIRECTORY_CREATE_SUBSPACE') - dir_list.append(DirListEntry(False, True)) + dir_list.append(DirectoryStateTreeNode(False, True)) instructions.push_args(1, 2, 1) instructions.append('DIRECTORY_CREATE_LAYER') - dir_list.append(DirListEntry(True, False, True)) + dir_list.append(DirectoryStateTreeNode.get_layer('\xfe')) create_default_directory_subspace(instructions, default_path, random) - dir_list.append(DirListEntry(True, True, True)) + dir_list.append(dir_list[0].add_child((default_path,), DirectoryStateTreeNode(True, True, has_known_prefix=True))) + DirectoryStateTreeNode.set_default_directory(dir_list[-1]) instructions.push_args(DEFAULT_DIRECTORY_INDEX) instructions.append('DIRECTORY_SET_ERROR_INDEX') diff --git a/bindings/java/src/test/com/apple/foundationdb/test/AsyncDirectoryExtension.java b/bindings/java/src/test/com/apple/foundationdb/test/AsyncDirectoryExtension.java index 4fcf4f9f57..9c2d56a9d1 100644 --- a/bindings/java/src/test/com/apple/foundationdb/test/AsyncDirectoryExtension.java +++ b/bindings/java/src/test/com/apple/foundationdb/test/AsyncDirectoryExtension.java @@ -170,7 +170,10 @@ class AsyncDirectoryExtension { .thenAccept(children -> inst.push(Tuple.fromItems(children).pack())); } else if(op == DirectoryOperation.DIRECTORY_EXISTS) { - return inst.popParam() + // In Java, DirectoryLayer.exists can return true without doing any reads. + // Other bindings will always do a read, so we get a read version now to be compatible with that behavior. + return inst.readTcx.readAsync(tr -> tr.getReadVersion()) + .thenComposeAsync(v -> inst.popParam()) .thenComposeAsync(count -> DirectoryUtil.popPaths(inst, StackUtils.getInt(count))) .thenComposeAsync(path -> { if(path.size() == 0) diff --git a/bindings/java/src/test/com/apple/foundationdb/test/AsyncStackTester.java b/bindings/java/src/test/com/apple/foundationdb/test/AsyncStackTester.java index a81df36a73..a244b83cd8 100644 --- a/bindings/java/src/test/com/apple/foundationdb/test/AsyncStackTester.java +++ b/bindings/java/src/test/com/apple/foundationdb/test/AsyncStackTester.java @@ -317,8 +317,8 @@ public class AsyncStackTester { if(t != null) { inst.context.newTransaction(oldTr); // Other bindings allow reuse of non-retryable transactions, so we need to emulate that behavior. } - else { - inst.setTransaction(oldTr, tr); + else if(!inst.setTransaction(oldTr, tr)) { + tr.close(); } }).thenApply(v -> null); diff --git a/bindings/java/src/test/com/apple/foundationdb/test/Context.java b/bindings/java/src/test/com/apple/foundationdb/test/Context.java index a446aa2b10..c71cb45f99 100644 --- a/bindings/java/src/test/com/apple/foundationdb/test/Context.java +++ b/bindings/java/src/test/com/apple/foundationdb/test/Context.java @@ -92,6 +92,7 @@ abstract class Context implements Runnable, AutoCloseable { private static synchronized Transaction getTransaction(String trName) { Transaction tr = transactionMap.get(trName); + assert tr != null : "Null transaction"; addTransactionReference(tr); return tr; } @@ -117,7 +118,15 @@ abstract class Context implements Runnable, AutoCloseable { } private static synchronized boolean updateTransaction(String trName, Transaction oldTr, Transaction newTr) { - if(transactionMap.replace(trName, oldTr, newTr)) { + boolean added; + if(oldTr == null) { + added = (transactionMap.putIfAbsent(trName, newTr) == null); + } + else { + added = transactionMap.replace(trName, oldTr, newTr); + } + + if(added) { addTransactionReference(newTr); releaseTransaction(oldTr); return true; diff --git a/bindings/java/src/test/com/apple/foundationdb/test/DirectoryExtension.java b/bindings/java/src/test/com/apple/foundationdb/test/DirectoryExtension.java index 8c2b3265b1..fbf9168669 100644 --- a/bindings/java/src/test/com/apple/foundationdb/test/DirectoryExtension.java +++ b/bindings/java/src/test/com/apple/foundationdb/test/DirectoryExtension.java @@ -160,6 +160,11 @@ class DirectoryExtension { int count = StackUtils.getInt(inst.popParam().get()); List> path = DirectoryUtil.popPaths(inst, count).get(); boolean exists; + + // In Java, DirectoryLayer.exists can return true without doing any reads. + // Other bindings will always do a read, so we get a read version now to be compatible with that behavior. + inst.readTcx.read(tr -> tr.getReadVersion().join()); + if(path.size() == 0) exists = directory().exists(inst.readTcx).get(); else diff --git a/bindings/java/src/test/com/apple/foundationdb/test/Instruction.java b/bindings/java/src/test/com/apple/foundationdb/test/Instruction.java index 86fe713554..d991217a0a 100644 --- a/bindings/java/src/test/com/apple/foundationdb/test/Instruction.java +++ b/bindings/java/src/test/com/apple/foundationdb/test/Instruction.java @@ -72,16 +72,21 @@ class Instruction extends Stack { readTcx = isDatabase ? context.db : readTr; } - void setTransaction(Transaction newTr) { + boolean setTransaction(Transaction newTr) { if(!isDatabase) { context.updateCurrentTransaction(newTr); + return true; } + + return false; } - void setTransaction(Transaction oldTr, Transaction newTr) { + boolean setTransaction(Transaction oldTr, Transaction newTr) { if(!isDatabase) { - context.updateCurrentTransaction(oldTr, newTr); + return context.updateCurrentTransaction(oldTr, newTr); } + + return false; } void releaseTransaction() { diff --git a/bindings/java/src/test/com/apple/foundationdb/test/StackTester.java b/bindings/java/src/test/com/apple/foundationdb/test/StackTester.java index 2367c1fdc0..96281dec72 100644 --- a/bindings/java/src/test/com/apple/foundationdb/test/StackTester.java +++ b/bindings/java/src/test/com/apple/foundationdb/test/StackTester.java @@ -284,7 +284,10 @@ public class StackTester { FDBException err = new FDBException("Fake testing error", filteredError ? 1020 : errorCode); try { - inst.setTransaction(inst.tr.onError(err).join()); + Transaction tr = inst.tr.onError(err).join(); + if(!inst.setTransaction(tr)) { + tr.close(); + } } catch(Throwable t) { inst.context.newTransaction(); // Other bindings allow reuse of non-retryable transactions, so we need to emulate that behavior. diff --git a/bindings/python/tests/directory_extension.py b/bindings/python/tests/directory_extension.py index c4c39846a5..66b97d6cf9 100644 --- a/bindings/python/tests/directory_extension.py +++ b/bindings/python/tests/directory_extension.py @@ -102,71 +102,71 @@ class DirectoryExtension(): new_dir = self.dir_list[self.dir_index] clazz = new_dir.__class__.__name__ new_path = repr(new_dir._path) if hasattr(new_dir, '_path') else "" - print('changed directory to %d (%s @%s)' % (self.dir_index, clazz, new_path)) + print('changed directory to %d (%s @%r)' % (self.dir_index, clazz, new_path)) elif inst.op == six.u('DIRECTORY_SET_ERROR_INDEX'): self.error_index = inst.pop() elif inst.op == six.u('DIRECTORY_CREATE_OR_OPEN'): path = self.pop_tuples(inst.stack) layer = inst.pop() - log_op('create_or_open %s: layer=%s' % (repr(directory.get_path() + path), repr(layer))) + log_op('create_or_open %r: layer=%r' % (directory.get_path() + path, layer)) d = directory.create_or_open(inst.tr, path, layer or b'') self.append_dir(inst, d) elif inst.op == six.u('DIRECTORY_CREATE'): path = self.pop_tuples(inst.stack) layer, prefix = inst.pop(2) - log_op('create %s: layer=%s, prefix=%s' % (repr(directory.get_path() + path), repr(layer), repr(prefix))) + log_op('create %r: layer=%r, prefix=%r' % (directory.get_path() + path, layer, prefix)) self.append_dir(inst, directory.create(inst.tr, path, layer or b'', prefix)) elif inst.op == six.u('DIRECTORY_OPEN'): path = self.pop_tuples(inst.stack) layer = inst.pop() - log_op('open %s: layer=%s' % (repr(directory.get_path() + path), repr(layer))) + log_op('open %r: layer=%r' % (directory.get_path() + path, layer)) self.append_dir(inst, directory.open(inst.tr, path, layer or b'')) elif inst.op == six.u('DIRECTORY_MOVE'): old_path, new_path = self.pop_tuples(inst.stack, 2) - log_op('move %s to %s' % (repr(directory.get_path() + old_path), repr(directory.get_path() + new_path))) + log_op('move %r to %r' % (directory.get_path() + old_path, directory.get_path() + new_path)) self.append_dir(inst, directory.move(inst.tr, old_path, new_path)) elif inst.op == six.u('DIRECTORY_MOVE_TO'): new_absolute_path = self.pop_tuples(inst.stack) - log_op('move %s to %s' % (repr(directory.get_path()), repr(new_absolute_path))) + log_op('move %r to %r' % (directory.get_path(), new_absolute_path)) self.append_dir(inst, directory.move_to(inst.tr, new_absolute_path)) elif inst.op == six.u('DIRECTORY_REMOVE'): count = inst.pop() if count == 0: - log_op('remove %s' % repr(directory.get_path())) + log_op('remove %r' % (directory.get_path(),)) directory.remove(inst.tr) else: path = self.pop_tuples(inst.stack) - log_op('remove %s' % repr(directory.get_path() + path)) + log_op('remove %r' % (directory.get_path() + path,)) directory.remove(inst.tr, path) elif inst.op == six.u('DIRECTORY_REMOVE_IF_EXISTS'): count = inst.pop() if count == 0: - log_op('remove_if_exists %s' % repr(directory.get_path())) + log_op('remove_if_exists %r' % (directory.get_path(),)) directory.remove_if_exists(inst.tr) else: path = self.pop_tuples(inst.stack) - log_op('remove_if_exists %s' % repr(directory.get_path() + path)) + log_op('remove_if_exists %r' % (directory.get_path() + path,)) directory.remove_if_exists(inst.tr, path) elif inst.op == six.u('DIRECTORY_LIST'): count = inst.pop() if count == 0: result = directory.list(inst.tr) - log_op('list %s' % (repr(directory.get_path()))) + log_op('list %r' % (directory.get_path(),)) else: path = self.pop_tuples(inst.stack) result = directory.list(inst.tr, path) - log_op('list %s' % (repr(directory.get_path() + path))) + log_op('list %r' % (directory.get_path() + path,)) inst.push(fdb.tuple.pack(tuple(result))) elif inst.op == six.u('DIRECTORY_EXISTS'): count = inst.pop() if count == 0: result = directory.exists(inst.tr) - log_op('exists %s: %d' % (repr(directory.get_path()), result)) + log_op('exists %r: %d' % (directory.get_path(), result)) else: path = self.pop_tuples(inst.stack) result = directory.exists(inst.tr, path) - log_op('exists %s: %d' % (repr(directory.get_path() + path), result)) + log_op('exists %r: %d' % (directory.get_path() + path, result)) if result: inst.push(1) @@ -177,7 +177,7 @@ class DirectoryExtension(): inst.push(directory.pack(key_tuple)) elif inst.op == six.u('DIRECTORY_UNPACK_KEY'): key = inst.pop() - log_op('unpack %s in subspace with prefix %s' % (repr(key), repr(directory.rawPrefix))) + log_op('unpack %r in subspace with prefix %r' % (key, directory.rawPrefix)) tup = directory.unpack(key) for t in tup: inst.push(t) @@ -215,7 +215,7 @@ class DirectoryExtension(): elif inst.op == six.u('DIRECTORY_STRIP_PREFIX'): s = inst.pop() if not s.startswith(directory.key()): - raise Exception('String %s does not start with raw prefix %s' % (s, directory.key())) + raise Exception('String %r does not start with raw prefix %r' % (s, directory.key())) inst.push(s[len(directory.key()):]) else: diff --git a/bindings/python/tests/tester.py b/bindings/python/tests/tester.py index 57002d5bd6..6aefe7ebd7 100644 --- a/bindings/python/tests/tester.py +++ b/bindings/python/tests/tester.py @@ -91,7 +91,7 @@ class Stack: else: raw[i] = (raw[i][0], val) except fdb.FDBError as e: - # print('ERROR: %s' % repr(e)) + # print('ERROR: %r' % e) raw[i] = (raw[i][0], fdb.tuple.pack((b'ERROR', str(e.code).encode('ascii')))) if count is None: @@ -543,7 +543,7 @@ class Tester: else: raise Exception("Unknown op %s" % inst.op) except fdb.FDBError as e: - # print('ERROR: %s' % repr(e)) + # print('ERROR: %r' % e) inst.stack.push(idx, fdb.tuple.pack((b"ERROR", str(e.code).encode('ascii')))) # print(" to %s" % self.stack) diff --git a/documentation/StatusSchema.json b/documentation/StatusSchema.json index 9ebbd6bd7c..bb5433d401 100644 --- a/documentation/StatusSchema.json +++ b/documentation/StatusSchema.json @@ -62,7 +62,10 @@ ] }, "data_version":12341234, - "data_version_lag":12341234, + "data_lag": { + "seconds":5.0, + "versions":12341234 + }, "id":"eb84471d68c12d1d26f692a50000003f", "finished_queries":{ "hz":0.0, @@ -362,6 +365,7 @@ "remote_redundancy_mode":"remote_single", "remote_log_replicas":3, "remote_logs":5, + "log_routers":10, "usable_regions":1, "storage_replicas":1, "resolvers":1, diff --git a/documentation/sphinx/conf.py b/documentation/sphinx/conf.py index c8f9b459d7..3afe72e7a0 100644 --- a/documentation/sphinx/conf.py +++ b/documentation/sphinx/conf.py @@ -31,7 +31,8 @@ extensions = [ 'sphinx.ext.todo', 'sphinx.ext.ifconfig', 'brokenrole', - 'relativelink' + 'relativelink', + 'sphinxcontrib.rubydomain' ] # Add any paths that contain templates here, relative to this directory. diff --git a/documentation/sphinx/requirements.txt b/documentation/sphinx/requirements.txt index 0412c26be0..8cf8ae7f95 100644 --- a/documentation/sphinx/requirements.txt +++ b/documentation/sphinx/requirements.txt @@ -2,3 +2,4 @@ sphinx==1.5.6 sphinx-bootstrap-theme==0.4.8 pygments-style-solarized +sphinxcontrib-rubydomain==0.1dev-20100804 \ No newline at end of file diff --git a/documentation/sphinx/source/administration.rst b/documentation/sphinx/source/administration.rst index 0e3fb7de17..35ff083e0e 100644 --- a/documentation/sphinx/source/administration.rst +++ b/documentation/sphinx/source/administration.rst @@ -229,46 +229,45 @@ Use the ``status`` command of ``fdbcli`` to determine if the cluster is up and r The database is available. Welcome to the fdbcli. For help, type `help'. -fdb> status + fdb> status -Configuration: - Redundancy mode - triple - Storage engine - ssd-2 - Coordinators - 5 - Desired Proxies - 5 - Desired Logs - 8 + Configuration: + Redundancy mode - triple + Storage engine - ssd-2 + Coordinators - 5 + Desired Proxies - 5 + Desired Logs - 8 -Cluster: - FoundationDB processes - 272 - Machines - 16 - Memory availability - 14.5 GB per process on machine with least available - Retransmissions rate - 20 Hz - Fault Tolerance - 2 machines - Server time - 03/19/18 08:51:52 - -Data: - Replication health - Healthy - Moving data - 0.000 GB - Sum of key-value sizes - 3.298 TB - Disk space used - 15.243 TB - -Operating space: - Storage server - 1656.2 GB free on most full server - Log server - 1794.7 GB free on most full server - -Workload: - Read rate - 55990 Hz - Write rate - 14946 Hz - Transactions started - 6321 Hz - Transactions committed - 1132 Hz - Conflict rate - 0 Hz - -Backup and DR: - Running backups - 1 - Running DRs - 1 as primary - -Client time: 03/19/18 08:51:51 + Cluster: + FoundationDB processes - 272 + Machines - 16 + Memory availability - 14.5 GB per process on machine with least available + Retransmissions rate - 20 Hz + Fault Tolerance - 2 machines + Server time - 03/19/18 08:51:52 + Data: + Replication health - Healthy + Moving data - 0.000 GB + Sum of key-value sizes - 3.298 TB + Disk space used - 15.243 TB + + Operating space: + Storage server - 1656.2 GB free on most full server + Log server - 1794.7 GB free on most full server + + Workload: + Read rate - 55990 Hz + Write rate - 14946 Hz + Transactions started - 6321 Hz + Transactions committed - 1132 Hz + Conflict rate - 0 Hz + + Backup and DR: + Running backups - 1 + Running DRs - 1 as primary + + Client time: 03/19/18 08:51:51 The summary fields are interpreted as follows: @@ -328,131 +327,132 @@ The ``status`` command can provide detailed statistics about the cluster and the fdb> status details -Configuration: - Redundancy mode - triple - Storage engine - ssd-2 - Coordinators - 5 + Configuration: + Redundancy mode - triple + Storage engine - ssd-2 + Coordinators - 5 + + Cluster: + FoundationDB processes - 85 + Machines - 5 + Memory availability - 7.4 GB per process on machine with least available + Retransmissions rate - 5 Hz + Fault Tolerance - 2 machines + Server time - 03/19/18 08:59:37 + + Data: + Replication health - Healthy + Moving data - 0.000 GB + Sum of key-value sizes - 87.068 GB + Disk space used - 327.819 GB + + Operating space: + Storage server - 888.2 GB free on most full server + Log server - 897.3 GB free on most full server + + Workload: + Read rate - 117 Hz + Write rate - 0 Hz + Transactions started - 43 Hz + Transactions committed - 1 Hz + Conflict rate - 0 Hz -Cluster: - FoundationDB processes - 85 - Machines - 5 - Memory availability - 7.4 GB per process on machine with least available - Retransmissions rate - 5 Hz - Fault Tolerance - 2 machines - Server time - 03/19/18 08:59:37 + Process performance details: + 10.0.4.1:4500 ( 2% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 3.2 GB / 7.4 GB RAM ) + 10.0.4.1:4501 ( 1% cpu; 2% machine; 0.010 Gbps; 3% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.1:4502 ( 2% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.1:4503 ( 0% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM ) + 10.0.4.1:4504 ( 0% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.1:4505 ( 2% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM ) + 10.0.4.1:4506 ( 2% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM ) + 10.0.4.1:4507 ( 2% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM ) + 10.0.4.1:4508 ( 2% cpu; 2% machine; 0.010 Gbps; 1% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.1:4509 ( 2% cpu; 2% machine; 0.010 Gbps; 1% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.1:4510 ( 1% cpu; 2% machine; 0.010 Gbps; 1% disk IO; 2.7 GB / 7.4 GB RAM ) + 10.0.4.1:4511 ( 0% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.1:4512 ( 0% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.1:4513 ( 0% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM ) + 10.0.4.1:4514 ( 0% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 0.2 GB / 7.4 GB RAM ) + 10.0.4.1:4515 ( 12% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 0.2 GB / 7.4 GB RAM ) + 10.0.4.1:4516 ( 0% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 0.3 GB / 7.4 GB RAM ) + 10.0.4.2:4500 ( 2% cpu; 3% machine; 0.124 Gbps; 0% disk IO; 3.2 GB / 7.4 GB RAM ) + 10.0.4.2:4501 ( 15% cpu; 3% machine; 0.124 Gbps; 19% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.2:4502 ( 2% cpu; 3% machine; 0.124 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.2:4503 ( 2% cpu; 3% machine; 0.124 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.2:4504 ( 2% cpu; 3% machine; 0.124 Gbps; 1% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.2:4505 ( 18% cpu; 3% machine; 0.124 Gbps; 18% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.2:4506 ( 2% cpu; 3% machine; 0.124 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.2:4507 ( 2% cpu; 3% machine; 0.124 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.2:4508 ( 2% cpu; 3% machine; 0.124 Gbps; 19% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.2:4509 ( 0% cpu; 3% machine; 0.124 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.2:4510 ( 0% cpu; 3% machine; 0.124 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.2:4511 ( 2% cpu; 3% machine; 0.124 Gbps; 1% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.2:4512 ( 2% cpu; 3% machine; 0.124 Gbps; 19% disk IO; 2.7 GB / 7.4 GB RAM ) + 10.0.4.2:4513 ( 0% cpu; 3% machine; 0.124 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.2:4514 ( 0% cpu; 3% machine; 0.124 Gbps; 0% disk IO; 0.2 GB / 7.4 GB RAM ) + 10.0.4.2:4515 ( 11% cpu; 3% machine; 0.124 Gbps; 0% disk IO; 0.2 GB / 7.4 GB RAM ) + 10.0.4.2:4516 ( 0% cpu; 3% machine; 0.124 Gbps; 0% disk IO; 0.6 GB / 7.4 GB RAM ) + 10.0.4.3:4500 ( 14% cpu; 3% machine; 0.284 Gbps; 26% disk IO; 3.0 GB / 7.4 GB RAM ) + 10.0.4.3:4501 ( 2% cpu; 3% machine; 0.284 Gbps; 0% disk IO; 2.8 GB / 7.4 GB RAM ) + 10.0.4.3:4502 ( 2% cpu; 3% machine; 0.284 Gbps; 0% disk IO; 2.8 GB / 7.4 GB RAM ) + 10.0.4.3:4503 ( 2% cpu; 3% machine; 0.284 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM ) + 10.0.4.3:4504 ( 7% cpu; 3% machine; 0.284 Gbps; 12% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.3:4505 ( 2% cpu; 3% machine; 0.284 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM ) + 10.0.4.3:4506 ( 2% cpu; 3% machine; 0.284 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.3:4507 ( 2% cpu; 3% machine; 0.284 Gbps; 26% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.3:4508 ( 2% cpu; 3% machine; 0.284 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM ) + 10.0.4.3:4509 ( 2% cpu; 3% machine; 0.284 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.3:4510 ( 2% cpu; 3% machine; 0.284 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM ) + 10.0.4.3:4511 ( 2% cpu; 3% machine; 0.284 Gbps; 12% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.3:4512 ( 2% cpu; 3% machine; 0.284 Gbps; 3% disk IO; 2.7 GB / 7.4 GB RAM ) + 10.0.4.3:4513 ( 2% cpu; 3% machine; 0.284 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.3:4514 ( 0% cpu; 3% machine; 0.284 Gbps; 0% disk IO; 0.1 GB / 7.4 GB RAM ) + 10.0.4.3:4515 ( 0% cpu; 3% machine; 0.284 Gbps; 0% disk IO; 0.1 GB / 7.4 GB RAM ) + 10.0.4.3:4516 ( 0% cpu; 3% machine; 0.284 Gbps; 0% disk IO; 0.1 GB / 7.4 GB RAM ) + 10.0.4.4:4500 ( 2% cpu; 4% machine; 0.065 Gbps; 0% disk IO; 3.2 GB / 7.4 GB RAM ) + 10.0.4.4:4501 ( 2% cpu; 4% machine; 0.065 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.4:4502 ( 0% cpu; 4% machine; 0.065 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.4:4503 ( 2% cpu; 4% machine; 0.065 Gbps; 16% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.4:4504 ( 2% cpu; 4% machine; 0.065 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM ) + 10.0.4.4:4505 ( 0% cpu; 4% machine; 0.065 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.4:4506 ( 0% cpu; 4% machine; 0.065 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.4:4507 ( 2% cpu; 4% machine; 0.065 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.4:4508 ( 0% cpu; 4% machine; 0.065 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.4:4509 ( 2% cpu; 4% machine; 0.065 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.4:4510 ( 24% cpu; 4% machine; 0.065 Gbps; 15% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.4:4511 ( 2% cpu; 4% machine; 0.065 Gbps; 0% disk IO; 2.8 GB / 7.4 GB RAM ) + 10.0.4.4:4512 ( 2% cpu; 4% machine; 0.065 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM ) + 10.0.4.4:4513 ( 0% cpu; 4% machine; 0.065 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.4:4514 ( 0% cpu; 4% machine; 0.065 Gbps; 1% disk IO; 0.2 GB / 7.4 GB RAM ) + 10.0.4.4:4515 ( 0% cpu; 4% machine; 0.065 Gbps; 1% disk IO; 0.2 GB / 7.4 GB RAM ) + 10.0.4.4:4516 ( 0% cpu; 4% machine; 0.065 Gbps; 1% disk IO; 0.6 GB / 7.4 GB RAM ) + 10.0.4.5:4500 ( 6% cpu; 2% machine; 0.076 Gbps; 7% disk IO; 3.2 GB / 7.4 GB RAM ) + 10.0.4.5:4501 ( 2% cpu; 2% machine; 0.076 Gbps; 19% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.5:4502 ( 1% cpu; 2% machine; 0.076 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.5:4503 ( 0% cpu; 2% machine; 0.076 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.5:4504 ( 2% cpu; 2% machine; 0.076 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM ) + 10.0.4.5:4505 ( 2% cpu; 2% machine; 0.076 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM ) + 10.0.4.5:4506 ( 0% cpu; 2% machine; 0.076 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.5:4507 ( 2% cpu; 2% machine; 0.076 Gbps; 6% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.5:4508 ( 31% cpu; 2% machine; 0.076 Gbps; 8% disk IO; 2.7 GB / 7.4 GB RAM ) + 10.0.4.5:4509 ( 0% cpu; 2% machine; 0.076 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.5:4510 ( 2% cpu; 2% machine; 0.076 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM ) + 10.0.4.5:4511 ( 2% cpu; 2% machine; 0.076 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.5:4512 ( 2% cpu; 2% machine; 0.076 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.5:4513 ( 0% cpu; 2% machine; 0.076 Gbps; 3% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.5:4514 ( 0% cpu; 2% machine; 0.076 Gbps; 0% disk IO; 0.2 GB / 7.4 GB RAM ) + 10.0.4.5:4515 ( 0% cpu; 2% machine; 0.076 Gbps; 0% disk IO; 0.2 GB / 7.4 GB RAM ) + 10.0.4.5:4516 ( 0% cpu; 2% machine; 0.076 Gbps; 0% disk IO; 0.6 GB / 7.4 GB RAM ) -Data: - Replication health - Healthy - Moving data - 0.000 GB - Sum of key-value sizes - 87.068 GB - Disk space used - 327.819 GB + Coordination servers: + 10.0.4.1:4500 (reachable) + 10.0.4.2:4500 (reachable) + 10.0.4.3:4500 (reachable) + 10.0.4.4:4500 (reachable) + 10.0.4.5:4500 (reachable) + + Client time: 03/19/18 08:59:37 -Operating space: - Storage server - 888.2 GB free on most full server - Log server - 897.3 GB free on most full server - -Workload: - Read rate - 117 Hz - Write rate - 0 Hz - Transactions started - 43 Hz - Transactions committed - 1 Hz - Conflict rate - 0 Hz - -Process performance details: - 10.0.4.1:4500 ( 2% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 3.2 GB / 7.4 GB RAM ) - 10.0.4.1:4501 ( 1% cpu; 2% machine; 0.010 Gbps; 3% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.1:4502 ( 2% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.1:4503 ( 0% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM ) - 10.0.4.1:4504 ( 0% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.1:4505 ( 2% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM ) - 10.0.4.1:4506 ( 2% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM ) - 10.0.4.1:4507 ( 2% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM ) - 10.0.4.1:4508 ( 2% cpu; 2% machine; 0.010 Gbps; 1% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.1:4509 ( 2% cpu; 2% machine; 0.010 Gbps; 1% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.1:4510 ( 1% cpu; 2% machine; 0.010 Gbps; 1% disk IO; 2.7 GB / 7.4 GB RAM ) - 10.0.4.1:4511 ( 0% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.1:4512 ( 0% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.1:4513 ( 0% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM ) - 10.0.4.1:4514 ( 0% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 0.2 GB / 7.4 GB RAM ) - 10.0.4.1:4515 ( 12% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 0.2 GB / 7.4 GB RAM ) - 10.0.4.1:4516 ( 0% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 0.3 GB / 7.4 GB RAM ) - 10.0.4.2:4500 ( 2% cpu; 3% machine; 0.124 Gbps; 0% disk IO; 3.2 GB / 7.4 GB RAM ) - 10.0.4.2:4501 ( 15% cpu; 3% machine; 0.124 Gbps; 19% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.2:4502 ( 2% cpu; 3% machine; 0.124 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.2:4503 ( 2% cpu; 3% machine; 0.124 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.2:4504 ( 2% cpu; 3% machine; 0.124 Gbps; 1% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.2:4505 ( 18% cpu; 3% machine; 0.124 Gbps; 18% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.2:4506 ( 2% cpu; 3% machine; 0.124 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.2:4507 ( 2% cpu; 3% machine; 0.124 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.2:4508 ( 2% cpu; 3% machine; 0.124 Gbps; 19% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.2:4509 ( 0% cpu; 3% machine; 0.124 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.2:4510 ( 0% cpu; 3% machine; 0.124 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.2:4511 ( 2% cpu; 3% machine; 0.124 Gbps; 1% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.2:4512 ( 2% cpu; 3% machine; 0.124 Gbps; 19% disk IO; 2.7 GB / 7.4 GB RAM ) - 10.0.4.2:4513 ( 0% cpu; 3% machine; 0.124 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.2:4514 ( 0% cpu; 3% machine; 0.124 Gbps; 0% disk IO; 0.2 GB / 7.4 GB RAM ) - 10.0.4.2:4515 ( 11% cpu; 3% machine; 0.124 Gbps; 0% disk IO; 0.2 GB / 7.4 GB RAM ) - 10.0.4.2:4516 ( 0% cpu; 3% machine; 0.124 Gbps; 0% disk IO; 0.6 GB / 7.4 GB RAM ) - 10.0.4.3:4500 ( 14% cpu; 3% machine; 0.284 Gbps; 26% disk IO; 3.0 GB / 7.4 GB RAM ) - 10.0.4.3:4501 ( 2% cpu; 3% machine; 0.284 Gbps; 0% disk IO; 2.8 GB / 7.4 GB RAM ) - 10.0.4.3:4502 ( 2% cpu; 3% machine; 0.284 Gbps; 0% disk IO; 2.8 GB / 7.4 GB RAM ) - 10.0.4.3:4503 ( 2% cpu; 3% machine; 0.284 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM ) - 10.0.4.3:4504 ( 7% cpu; 3% machine; 0.284 Gbps; 12% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.3:4505 ( 2% cpu; 3% machine; 0.284 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM ) - 10.0.4.3:4506 ( 2% cpu; 3% machine; 0.284 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.3:4507 ( 2% cpu; 3% machine; 0.284 Gbps; 26% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.3:4508 ( 2% cpu; 3% machine; 0.284 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM ) - 10.0.4.3:4509 ( 2% cpu; 3% machine; 0.284 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.3:4510 ( 2% cpu; 3% machine; 0.284 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM ) - 10.0.4.3:4511 ( 2% cpu; 3% machine; 0.284 Gbps; 12% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.3:4512 ( 2% cpu; 3% machine; 0.284 Gbps; 3% disk IO; 2.7 GB / 7.4 GB RAM ) - 10.0.4.3:4513 ( 2% cpu; 3% machine; 0.284 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.3:4514 ( 0% cpu; 3% machine; 0.284 Gbps; 0% disk IO; 0.1 GB / 7.4 GB RAM ) - 10.0.4.3:4515 ( 0% cpu; 3% machine; 0.284 Gbps; 0% disk IO; 0.1 GB / 7.4 GB RAM ) - 10.0.4.3:4516 ( 0% cpu; 3% machine; 0.284 Gbps; 0% disk IO; 0.1 GB / 7.4 GB RAM ) - 10.0.4.4:4500 ( 2% cpu; 4% machine; 0.065 Gbps; 0% disk IO; 3.2 GB / 7.4 GB RAM ) - 10.0.4.4:4501 ( 2% cpu; 4% machine; 0.065 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.4:4502 ( 0% cpu; 4% machine; 0.065 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.4:4503 ( 2% cpu; 4% machine; 0.065 Gbps; 16% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.4:4504 ( 2% cpu; 4% machine; 0.065 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM ) - 10.0.4.4:4505 ( 0% cpu; 4% machine; 0.065 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.4:4506 ( 0% cpu; 4% machine; 0.065 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.4:4507 ( 2% cpu; 4% machine; 0.065 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.4:4508 ( 0% cpu; 4% machine; 0.065 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.4:4509 ( 2% cpu; 4% machine; 0.065 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.4:4510 ( 24% cpu; 4% machine; 0.065 Gbps; 15% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.4:4511 ( 2% cpu; 4% machine; 0.065 Gbps; 0% disk IO; 2.8 GB / 7.4 GB RAM ) - 10.0.4.4:4512 ( 2% cpu; 4% machine; 0.065 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM ) - 10.0.4.4:4513 ( 0% cpu; 4% machine; 0.065 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.4:4514 ( 0% cpu; 4% machine; 0.065 Gbps; 1% disk IO; 0.2 GB / 7.4 GB RAM ) - 10.0.4.4:4515 ( 0% cpu; 4% machine; 0.065 Gbps; 1% disk IO; 0.2 GB / 7.4 GB RAM ) - 10.0.4.4:4516 ( 0% cpu; 4% machine; 0.065 Gbps; 1% disk IO; 0.6 GB / 7.4 GB RAM ) - 10.0.4.5:4500 ( 6% cpu; 2% machine; 0.076 Gbps; 7% disk IO; 3.2 GB / 7.4 GB RAM ) - 10.0.4.5:4501 ( 2% cpu; 2% machine; 0.076 Gbps; 19% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.5:4502 ( 1% cpu; 2% machine; 0.076 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.5:4503 ( 0% cpu; 2% machine; 0.076 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.5:4504 ( 2% cpu; 2% machine; 0.076 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM ) - 10.0.4.5:4505 ( 2% cpu; 2% machine; 0.076 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM ) - 10.0.4.5:4506 ( 0% cpu; 2% machine; 0.076 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.5:4507 ( 2% cpu; 2% machine; 0.076 Gbps; 6% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.5:4508 ( 31% cpu; 2% machine; 0.076 Gbps; 8% disk IO; 2.7 GB / 7.4 GB RAM ) - 10.0.4.5:4509 ( 0% cpu; 2% machine; 0.076 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.5:4510 ( 2% cpu; 2% machine; 0.076 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM ) - 10.0.4.5:4511 ( 2% cpu; 2% machine; 0.076 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.5:4512 ( 2% cpu; 2% machine; 0.076 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.5:4513 ( 0% cpu; 2% machine; 0.076 Gbps; 3% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.5:4514 ( 0% cpu; 2% machine; 0.076 Gbps; 0% disk IO; 0.2 GB / 7.4 GB RAM ) - 10.0.4.5:4515 ( 0% cpu; 2% machine; 0.076 Gbps; 0% disk IO; 0.2 GB / 7.4 GB RAM ) - 10.0.4.5:4516 ( 0% cpu; 2% machine; 0.076 Gbps; 0% disk IO; 0.6 GB / 7.4 GB RAM ) - -Coordination servers: - 10.0.4.1:4500 (reachable) - 10.0.4.2:4500 (reachable) - 10.0.4.3:4500 (reachable) - 10.0.4.4:4500 (reachable) - 10.0.4.5:4500 (reachable) - -Client time: 03/19/18 08:59:37 Several details about individual FoundationDB processes are displayed in a list format in parenthesis after the IP address and port: ======= ========================================================================= diff --git a/documentation/sphinx/source/downloads.rst b/documentation/sphinx/source/downloads.rst index f3b6df0858..8fc3dc3f00 100644 --- a/documentation/sphinx/source/downloads.rst +++ b/documentation/sphinx/source/downloads.rst @@ -10,38 +10,38 @@ macOS The macOS installation package is supported on macOS 10.7+. It includes the client and (optionally) the server. -* `FoundationDB-5.2.4.pkg `_ +* `FoundationDB-5.2.5.pkg `_ Ubuntu ------ The Ubuntu packages are supported on 64-bit Ubuntu 12.04+, but beware of the Linux kernel bug in Ubuntu 12.x. -* `foundationdb-clients-5.2.4-1_amd64.deb `_ -* `foundationdb-server-5.2.4-1_amd64.deb `_ (depends on the clients package) +* `foundationdb-clients-5.2.5-1_amd64.deb `_ +* `foundationdb-server-5.2.5-1_amd64.deb `_ (depends on the clients package) RHEL/CentOS EL6 --------------- The RHEL/CentOS EL6 packages are supported on 64-bit RHEL/CentOS 6.x. -* `foundationdb-clients-5.2.4-1.el6.x86_64.rpm `_ -* `foundationdb-server-5.2.4-1.el6.x86_64.rpm `_ (depends on the clients package) +* `foundationdb-clients-5.2.5-1.el6.x86_64.rpm `_ +* `foundationdb-server-5.2.5-1.el6.x86_64.rpm `_ (depends on the clients package) RHEL/CentOS EL7 --------------- The RHEL/CentOS EL7 packages are supported on 64-bit RHEL/CentOS 7.x. -* `foundationdb-clients-5.2.4-1.el7.x86_64.rpm `_ -* `foundationdb-server-5.2.4-1.el7.x86_64.rpm `_ (depends on the clients package) +* `foundationdb-clients-5.2.5-1.el7.x86_64.rpm `_ +* `foundationdb-server-5.2.5-1.el7.x86_64.rpm `_ (depends on the clients package) Windows ------- The Windows installer is supported on 64-bit Windows XP and later. It includes the client and (optionally) the server. -* `foundationdb-5.2.4-x64.msi `_ +* `foundationdb-5.2.5-x64.msi `_ API Language Bindings ===================== @@ -58,18 +58,18 @@ On macOS and Windows, the FoundationDB Python API bindings are installed as part If you need to use the FoundationDB Python API from other Python installations or paths, download the Python package: -* `foundationdb-5.2.4.tar.gz `_ +* `foundationdb-5.2.5.tar.gz `_ Ruby 1.9.3/2.0.0+ ----------------- -* `fdb-5.2.4.gem `_ +* `fdb-5.2.5.gem `_ Java 8+ ------- -* `fdb-java-5.2.4.jar `_ -* `fdb-java-5.2.4-javadoc.jar `_ +* `fdb-java-5.2.5.jar `_ +* `fdb-java-5.2.5-javadoc.jar `_ Go 1.1+ ------- diff --git a/documentation/sphinx/source/queues-java.rst b/documentation/sphinx/source/queues-java.rst index 3d5019606d..4e141d0202 100644 --- a/documentation/sphinx/source/queues-java.rst +++ b/documentation/sphinx/source/queues-java.rst @@ -81,19 +81,18 @@ The following is a simple implementation of the basic pattern: // Remove the top element from the queue. public static Object dequeue(TransactionContext tcx){ - final KeyValue item = firstItem(tcx); - if(item == null){ - return null; - } - // Remove from the top of the queue. - tcx.run((Transaction tr) -> { + return tcx.run((Transaction tr) -> { + final KeyValue item = firstItem(tr); + if(item == null){ + return null; + } + tr.clear(item.getKey()); - return null; + // Return the old value. + return Tuple.fromBytes(item.getValue()).get(0); }); - // Return the old value. - return Tuple.fromBytes(item.getValue()).get(0); } // Add an element to the queue. diff --git a/documentation/sphinx/source/tls.rst b/documentation/sphinx/source/tls.rst index d6f903fd86..7c0567fdf6 100644 --- a/documentation/sphinx/source/tls.rst +++ b/documentation/sphinx/source/tls.rst @@ -101,7 +101,7 @@ The default behavior when the certificate or key file is not specified is to loo Default Peer Verification ^^^^^^^^^^^^^^^^^^^^^^^^^ -The default peer verification is ``Check.Valid=0``. +The default peer verification is ``Check.Valid=1``. Default Password ^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/fdbcli/fdbcli.actor.cpp b/fdbcli/fdbcli.actor.cpp index a89783b842..7024155100 100644 --- a/fdbcli/fdbcli.actor.cpp +++ b/fdbcli/fdbcli.actor.cpp @@ -884,6 +884,12 @@ void printStatus(StatusObjectReader statusObj, StatusClient::StatusLevel level, if (statusObjConfig.get("logs", intVal)) outputString += format("\n Desired Logs - %d", intVal); + + if (statusObjConfig.get("remote_logs", intVal)) + outputString += format("\n Desired Remote Logs - %d", intVal); + + if (statusObjConfig.get("log_routers", intVal)) + outputString += format("\n Desired Log Routers - %d", intVal); } catch (std::runtime_error& e) { outputString = outputStringCache; diff --git a/fdbclient/DatabaseConfiguration.cpp b/fdbclient/DatabaseConfiguration.cpp index d8f9be4115..3bca9a737a 100644 --- a/fdbclient/DatabaseConfiguration.cpp +++ b/fdbclient/DatabaseConfiguration.cpp @@ -29,7 +29,7 @@ DatabaseConfiguration::DatabaseConfiguration() void DatabaseConfiguration::resetInternal() { // does NOT reset rawConfiguration initialized = false; - masterProxyCount = resolverCount = desiredTLogCount = tLogWriteAntiQuorum = tLogReplicationFactor = storageTeamSize = -1; + masterProxyCount = resolverCount = desiredTLogCount = tLogWriteAntiQuorum = tLogReplicationFactor = storageTeamSize = desiredLogRouterCount = -1; tLogDataStoreType = storageServerStoreType = KeyValueStoreType::END; autoMasterProxyCount = CLIENT_KNOBS->DEFAULT_AUTO_PROXIES; autoResolverCount = CLIENT_KNOBS->DEFAULT_AUTO_RESOLVERS; @@ -102,11 +102,19 @@ void parse( std::vector* regions, ValueRef const& v ) { info.satelliteTLogUsableDcs = 2; info.satelliteTLogWriteAntiQuorum = 0; info.satelliteTLogPolicy = IRepPolicyRef(new PolicyAcross(2, "dcid", IRepPolicyRef(new PolicyAcross(2, "zoneid", IRepPolicyRef(new PolicyOne()))))); + info.satelliteTLogReplicationFactorFallback = 2; + info.satelliteTLogUsableDcsFallback = 1; + info.satelliteTLogWriteAntiQuorumFallback = 0; + info.satelliteTLogPolicyFallback = IRepPolicyRef(new PolicyAcross(2, "zoneid", IRepPolicyRef(new PolicyOne()))); } else if(satelliteReplication == "two_satellite_fast") { info.satelliteTLogReplicationFactor = 4; info.satelliteTLogUsableDcs = 2; info.satelliteTLogWriteAntiQuorum = 2; info.satelliteTLogPolicy = IRepPolicyRef(new PolicyAcross(2, "dcid", IRepPolicyRef(new PolicyAcross(2, "zoneid", IRepPolicyRef(new PolicyOne()))))); + info.satelliteTLogReplicationFactorFallback = 2; + info.satelliteTLogUsableDcsFallback = 1; + info.satelliteTLogWriteAntiQuorumFallback = 0; + info.satelliteTLogPolicyFallback = IRepPolicyRef(new PolicyAcross(2, "zoneid", IRepPolicyRef(new PolicyOne()))); } else { throw invalid_option(); } @@ -114,6 +122,9 @@ void parse( std::vector* regions, ValueRef const& v ) { dc.tryGet("satellite_log_replicas", info.satelliteTLogReplicationFactor); dc.tryGet("satellite_usable_dcs", info.satelliteTLogUsableDcs); dc.tryGet("satellite_anti_quorum", info.satelliteTLogWriteAntiQuorum); + dc.tryGet("satellite_log_replicas_fallback", info.satelliteTLogReplicationFactorFallback); + dc.tryGet("satellite_usable_dcs_fallback", info.satelliteTLogUsableDcsFallback); + dc.tryGet("satellite_anti_quorum_fallback", info.satelliteTLogWriteAntiQuorumFallback); regions->push_back(info); } std::sort(regions->begin(), regions->end(), RegionInfo::sort_by_priority() ); @@ -137,6 +148,9 @@ void DatabaseConfiguration::setDefaultReplicationPolicy() { if(r.satelliteTLogReplicationFactor > 0 && !r.satelliteTLogPolicy) { r.satelliteTLogPolicy = IRepPolicyRef(new PolicyAcross(r.satelliteTLogReplicationFactor, "zoneid", IRepPolicyRef(new PolicyOne()))); } + if(r.satelliteTLogReplicationFactorFallback > 0 && !r.satelliteTLogPolicyFallback) { + r.satelliteTLogPolicyFallback = IRepPolicyRef(new PolicyAcross(r.satelliteTLogReplicationFactorFallback, "zoneid", IRepPolicyRef(new PolicyOne()))); + } } } @@ -172,7 +186,8 @@ bool DatabaseConfiguration::isValid() const { r.satelliteTLogReplicationFactor >= 0 && r.satelliteTLogWriteAntiQuorum >= 0 && r.satelliteTLogUsableDcs >= 1 && - ( r.satelliteTLogReplicationFactor == 0 || ( r.satelliteTLogPolicy && r.satellites.size() ) ) ) ) { + ( r.satelliteTLogReplicationFactor == 0 || ( r.satelliteTLogPolicy && r.satellites.size() ) ) && + ( r.satelliteTLogUsableDcsFallback == 0 || ( r.satelliteTLogReplicationFactor > 0 && r.satelliteTLogReplicationFactorFallback > 0 ) ) ) ) { return false; } dcIds.insert(r.dcId); @@ -201,6 +216,8 @@ StatusObject DatabaseConfiguration::toJSON(bool noPolicies) const { result["redundancy_mode"] = "double"; } else if( tLogReplicationFactor == 4 && storageTeamSize == 6 && tlogInfo == "dcid^2 x zoneid^2 x 1" && storageInfo == "dcid^3 x zoneid^2 x 1" ) { result["redundancy_mode"] = "three_datacenter"; + } else if( tLogReplicationFactor == 4 && storageTeamSize == 4 && tlogInfo == "dcid^2 x zoneid^2 x 1" && storageInfo == "dcid^2 x zoneid^2 x 1" ) { + result["redundancy_mode"] = "three_datacenter_fallback"; } else if( tLogReplicationFactor == 3 && storageTeamSize == 3 ) { result["redundancy_mode"] = "triple"; } else if( tLogReplicationFactor == 4 && storageTeamSize == 3 && tlogInfo == "data_hall^2 x zoneid^2 x 1" && storageInfo == "data_hall^3 x 1" ) { @@ -254,21 +271,25 @@ StatusObject DatabaseConfiguration::toJSON(bool noPolicies) const { dcObj["priority"] = r.priority; dcArr.push_back(dcObj); - if(r.satelliteTLogReplicationFactor == 1 && r.satelliteTLogUsableDcs == 1 && r.satelliteTLogWriteAntiQuorum == 0) { + if(r.satelliteTLogReplicationFactor == 1 && r.satelliteTLogUsableDcs == 1 && r.satelliteTLogWriteAntiQuorum == 0 && r.satelliteTLogUsableDcsFallback == 0) { regionObj["satellite_redundancy_mode"] = "one_satellite_single"; - } else if(r.satelliteTLogReplicationFactor == 2 && r.satelliteTLogUsableDcs == 1 && r.satelliteTLogWriteAntiQuorum == 0) { + } else if(r.satelliteTLogReplicationFactor == 2 && r.satelliteTLogUsableDcs == 1 && r.satelliteTLogWriteAntiQuorum == 0 && r.satelliteTLogUsableDcsFallback == 0) { regionObj["satellite_redundancy_mode"] = "one_satellite_double"; - } else if(r.satelliteTLogReplicationFactor == 3 && r.satelliteTLogUsableDcs == 1 && r.satelliteTLogWriteAntiQuorum == 0) { + } else if(r.satelliteTLogReplicationFactor == 3 && r.satelliteTLogUsableDcs == 1 && r.satelliteTLogWriteAntiQuorum == 0 && r.satelliteTLogUsableDcsFallback == 0) { regionObj["satellite_redundancy_mode"] = "one_satellite_triple"; - } else if(r.satelliteTLogReplicationFactor == 4 && r.satelliteTLogUsableDcs == 2 && r.satelliteTLogWriteAntiQuorum == 0) { + } else if(r.satelliteTLogReplicationFactor == 4 && r.satelliteTLogUsableDcs == 2 && r.satelliteTLogWriteAntiQuorum == 0 && r.satelliteTLogUsableDcsFallback == 1 && r.satelliteTLogReplicationFactorFallback == 2 && r.satelliteTLogWriteAntiQuorumFallback == 0) { regionObj["satellite_redundancy_mode"] = "two_satellite_safe"; - } else if(r.satelliteTLogReplicationFactor == 4 && r.satelliteTLogUsableDcs == 2 && r.satelliteTLogWriteAntiQuorum == 2) { + } else if(r.satelliteTLogReplicationFactor == 4 && r.satelliteTLogUsableDcs == 2 && r.satelliteTLogWriteAntiQuorum == 2 && r.satelliteTLogUsableDcsFallback == 1 && r.satelliteTLogReplicationFactorFallback == 2 && r.satelliteTLogWriteAntiQuorumFallback == 0) { regionObj["satellite_redundancy_mode"] = "two_satellite_fast"; } else if(r.satelliteTLogReplicationFactor != 0) { regionObj["satellite_log_replicas"] = r.satelliteTLogReplicationFactor; regionObj["satellite_usable_dcs"] = r.satelliteTLogUsableDcs; regionObj["satellite_anti_quorum"] = r.satelliteTLogWriteAntiQuorum; if(r.satelliteTLogPolicy) regionObj["satellite_log_policy"] = r.satelliteTLogPolicy->info(); + regionObj["satellite_log_replicas_fallback"] = r.satelliteTLogReplicationFactorFallback; + regionObj["satellite_usable_dcs_fallback"] = r.satelliteTLogUsableDcsFallback; + regionObj["satellite_anti_quorum_fallback"] = r.satelliteTLogWriteAntiQuorumFallback; + if(r.satelliteTLogPolicyFallback) regionObj["satellite_log_policy_fallback"] = r.satelliteTLogPolicyFallback->info(); } if( r.satelliteDesiredTLogCount != -1 ) { @@ -301,6 +322,9 @@ StatusObject DatabaseConfiguration::toJSON(bool noPolicies) const { if( resolverCount != -1 ) { result["resolvers"] = resolverCount; } + if( desiredLogRouterCount != -1 ) { + result["log_routers"] = desiredLogRouterCount; + } if( remoteDesiredTLogCount != -1 ) { result["remote_logs"] = remoteDesiredTLogCount; } @@ -344,6 +368,7 @@ bool DatabaseConfiguration::setInternal(KeyRef key, ValueRef value) { else if (ck == LiteralStringRef("auto_logs")) parse(&autoDesiredTLogCount, value); else if (ck == LiteralStringRef("storage_replication_policy")) parseReplicationPolicy(&storagePolicy, value); else if (ck == LiteralStringRef("log_replication_policy")) parseReplicationPolicy(&tLogPolicy, value); + else if (ck == LiteralStringRef("log_routers")) parse(&desiredLogRouterCount, value); else if (ck == LiteralStringRef("remote_logs")) parse(&remoteDesiredTLogCount, value); else if (ck == LiteralStringRef("remote_log_replicas")) parse(&remoteTLogReplicationFactor, value); else if (ck == LiteralStringRef("remote_log_policy")) parseReplicationPolicy(&remoteTLogPolicy, value); diff --git a/fdbclient/DatabaseConfiguration.h b/fdbclient/DatabaseConfiguration.h index 4c16b41ad5..6e5924d455 100644 --- a/fdbclient/DatabaseConfiguration.h +++ b/fdbclient/DatabaseConfiguration.h @@ -55,9 +55,15 @@ struct RegionInfo { int32_t satelliteTLogWriteAntiQuorum; int32_t satelliteTLogUsableDcs; + IRepPolicyRef satelliteTLogPolicyFallback; + int32_t satelliteTLogReplicationFactorFallback; + int32_t satelliteTLogWriteAntiQuorumFallback; + int32_t satelliteTLogUsableDcsFallback; + std::vector satellites; - RegionInfo() : priority(0), satelliteDesiredTLogCount(-1), satelliteTLogReplicationFactor(0), satelliteTLogWriteAntiQuorum(0), satelliteTLogUsableDcs(1) {} + RegionInfo() : priority(0), satelliteDesiredTLogCount(-1), satelliteTLogReplicationFactor(0), satelliteTLogWriteAntiQuorum(0), satelliteTLogUsableDcs(1), + satelliteTLogReplicationFactorFallback(0), satelliteTLogWriteAntiQuorumFallback(0), satelliteTLogUsableDcsFallback(0) {} struct sort_by_priority { bool operator ()(RegionInfo const&a, RegionInfo const& b) const { return a.priority > b.priority; } @@ -65,7 +71,8 @@ struct RegionInfo { template void serialize(Ar& ar) { - ar & dcId & priority & satelliteTLogPolicy & satelliteDesiredTLogCount & satelliteTLogReplicationFactor & satelliteTLogWriteAntiQuorum & satelliteTLogUsableDcs & satellites; + ar & dcId & priority & satelliteTLogPolicy & satelliteDesiredTLogCount & satelliteTLogReplicationFactor & satelliteTLogWriteAntiQuorum & satelliteTLogUsableDcs & + satelliteTLogPolicyFallback & satelliteTLogReplicationFactorFallback & satelliteTLogWriteAntiQuorumFallback & satelliteTLogUsableDcsFallback & satellites; } }; @@ -115,12 +122,12 @@ struct DatabaseConfiguration { } return minRequired; } - int32_t minMachinesRequiredPerDatacenter() const { + int32_t minMachinesRequiredPerDatacenter() const { int minRequired = std::max( remoteTLogReplicationFactor, std::max(tLogReplicationFactor, storageTeamSize) ); for(auto& r : regions) { minRequired = std::max( minRequired, r.satelliteTLogReplicationFactor/std::max(1, r.satelliteTLogUsableDcs) ); } - return minRequired; + return minRequired; } //Killing an entire datacenter counts as killing one machine in modes that support it @@ -128,6 +135,9 @@ struct DatabaseConfiguration { int worstSatellite = regions.size() ? std::numeric_limits::max() : 0; for(auto& r : regions) { worstSatellite = std::min(worstSatellite, r.satelliteTLogReplicationFactor - r.satelliteTLogWriteAntiQuorum); + if(r.satelliteTLogUsableDcsFallback > 0) { + worstSatellite = std::min(worstSatellite, r.satelliteTLogReplicationFactorFallback - r.satelliteTLogWriteAntiQuorumFallback); + } } if(usableRegions > 1 && worstSatellite > 0) { return 1 + std::min(std::max(tLogReplicationFactor - 1 - tLogWriteAntiQuorum, worstSatellite - 1), storageTeamSize - 1); @@ -159,6 +169,7 @@ struct DatabaseConfiguration { KeyValueStoreType storageServerStoreType; // Remote TLogs + int32_t desiredLogRouterCount; int32_t remoteDesiredTLogCount; int32_t remoteTLogReplicationFactor; IRepPolicyRef remoteTLogPolicy; diff --git a/fdbclient/FDBTypes.h b/fdbclient/FDBTypes.h index afefef3782..35bad29892 100644 --- a/fdbclient/FDBTypes.h +++ b/fdbclient/FDBTypes.h @@ -604,7 +604,7 @@ static bool addressExcluded( std::set const& exclusions, Netwo struct ClusterControllerPriorityInfo { enum DCFitness { FitnessPrimary, FitnessRemote, FitnessPreferred, FitnessUnknown, FitnessBad }; //cannot be larger than 7 because of leader election mask - static DCFitness calculateDCFitness(Optional dcId, vector> dcPriority) { + static DCFitness calculateDCFitness(Optional const& dcId, vector> const& dcPriority) { if(!dcPriority.size()) { return FitnessUnknown; } else if(dcPriority.size() == 1) { diff --git a/fdbclient/Knobs.cpp b/fdbclient/Knobs.cpp index 34320fd380..a658e85f51 100644 --- a/fdbclient/Knobs.cpp +++ b/fdbclient/Knobs.cpp @@ -35,8 +35,8 @@ ClientKnobs::ClientKnobs(bool randomize) { init( SYSTEM_MONITOR_INTERVAL, 5.0 ); - init( FAILURE_MAX_DELAY, 10.0 ); if( randomize && BUGGIFY ) FAILURE_MAX_DELAY = 5.0; - init( FAILURE_MIN_DELAY, 5.0 ); if( randomize && BUGGIFY ) FAILURE_MIN_DELAY = 2.0; + init( FAILURE_MAX_DELAY, 5.0 ); + init( FAILURE_MIN_DELAY, 4.0 ); if( randomize && BUGGIFY ) FAILURE_MIN_DELAY = 1.0; init( FAILURE_TIMEOUT_DELAY, FAILURE_MIN_DELAY ); init( CLIENT_FAILURE_TIMEOUT_DELAY, FAILURE_MIN_DELAY ); @@ -168,6 +168,11 @@ ClientKnobs::ClientKnobs(bool randomize) { init( BLOBSTORE_MAX_SEND_BYTES_PER_SECOND, 1e9 ); init( BLOBSTORE_MAX_RECV_BYTES_PER_SECOND, 1e9 ); + init( BLOBSTORE_LIST_REQUESTS_PER_SECOND, 25 ); + init( BLOBSTORE_WRITE_REQUESTS_PER_SECOND, 50 ); + init( BLOBSTORE_READ_REQUESTS_PER_SECOND, 100 ); + init( BLOBSTORE_DELETE_REQUESTS_PER_SECOND, 200 ); + // Client Status Info init(CSI_SAMPLING_PROBABILITY, -1.0); init(CSI_SIZE_LIMIT, std::numeric_limits::max()); diff --git a/fdbclient/Knobs.h b/fdbclient/Knobs.h index 44fbf8337d..5ee8d3baa4 100644 --- a/fdbclient/Knobs.h +++ b/fdbclient/Knobs.h @@ -155,6 +155,10 @@ public: int BLOBSTORE_REQUEST_TRIES; int BLOBSTORE_REQUEST_TIMEOUT; int BLOBSTORE_REQUESTS_PER_SECOND; + int BLOBSTORE_LIST_REQUESTS_PER_SECOND; + int BLOBSTORE_WRITE_REQUESTS_PER_SECOND; + int BLOBSTORE_READ_REQUESTS_PER_SECOND; + int BLOBSTORE_DELETE_REQUESTS_PER_SECOND; int BLOBSTORE_CONCURRENT_REQUESTS; int BLOBSTORE_MULTIPART_MAX_PART_SIZE; int BLOBSTORE_MULTIPART_MIN_PART_SIZE; diff --git a/fdbclient/ManagementAPI.actor.cpp b/fdbclient/ManagementAPI.actor.cpp index 4aa55c2446..1f73069870 100644 --- a/fdbclient/ManagementAPI.actor.cpp +++ b/fdbclient/ManagementAPI.actor.cpp @@ -65,14 +65,14 @@ std::map configForToken( std::string const& mode ) { std::string key = mode.substr(0, pos); std::string value = mode.substr(pos+1); - if( (key == "logs" || key == "proxies" || key == "resolvers" || key == "remote_logs" || key == "satellite_logs" || key == "usable_regions") && isInteger(value) ) { + if( (key == "logs" || key == "proxies" || key == "resolvers" || key == "remote_logs" || key == "log_routers" || key == "satellite_logs" || key == "usable_regions") && isInteger(value) ) { out[p+key] = value; } if( key == "regions" ) { json_spirit::mValue mv; json_spirit::read_string( value, mv ); - + StatusObject regionObj; regionObj["regions"] = mv; out[p+key] = BinaryWriter::toValue(regionObj, IncludeVersion()).toString(); @@ -125,6 +125,10 @@ std::map configForToken( std::string const& mode ) { tLogPolicy = IRepPolicyRef(new PolicyAcross(2, "dcid", IRepPolicyRef(new PolicyAcross(2, "zoneid", IRepPolicyRef(new PolicyOne()))) )); + } else if(mode == "three_datacenter_fallback") { + redundancy="4"; + log_replicas="4"; + storagePolicy = tLogPolicy = IRepPolicyRef(new PolicyAcross(2, "dcid", IRepPolicyRef(new PolicyAcross(2, "zoneid", IRepPolicyRef(new PolicyOne()))))); } else if(mode == "three_data_hall") { redundancy="3"; log_replicas="4"; @@ -340,6 +344,9 @@ ConfigureAutoResult parseConfig( StatusObject const& status ) { } else if( result.old_replication == "three_datacenter" ) { storage_replication = 6; log_replication = 4; + } else if( result.old_replication == "three_datacenter_fallback" ) { + storage_replication = 4; + log_replication = 4; } else return ConfigureAutoResult(); diff --git a/fdbclient/NativeAPI.actor.cpp b/fdbclient/NativeAPI.actor.cpp index 373d0cf7f8..5b3671f29d 100644 --- a/fdbclient/NativeAPI.actor.cpp +++ b/fdbclient/NativeAPI.actor.cpp @@ -40,6 +40,7 @@ #include "flow/Knobs.h" #include "fdbclient/Knobs.h" #include "fdbrpc/Net2FileSystem.h" +#include "fdbrpc/simulator.h" #include @@ -3013,7 +3014,7 @@ void Transaction::checkDeferredError() { cx->checkDeferredError(); } Reference Transaction::createTrLogInfoProbabilistically(const Database &cx) { double clientSamplingProbability = std::isinf(cx->clientInfo->get().clientTxnInfoSampleRate) ? CLIENT_KNOBS->CSI_SAMPLING_PROBABILITY : cx->clientInfo->get().clientTxnInfoSampleRate; - if (((networkOptions.logClientInfo.present() && networkOptions.logClientInfo.get()) || BUGGIFY) && g_random->random01() < clientSamplingProbability) + if (((networkOptions.logClientInfo.present() && networkOptions.logClientInfo.get()) || BUGGIFY) && g_random->random01() < clientSamplingProbability && (!g_network->isSimulated() || !g_simulator.speedUpSimulation)) return Reference(new TransactionLogInfo()); else return Reference(); diff --git a/fdbrpc/BlobStore.actor.cpp b/fdbrpc/BlobStore.actor.cpp index f1ab6a95d3..f8229e4cca 100644 --- a/fdbrpc/BlobStore.actor.cpp +++ b/fdbrpc/BlobStore.actor.cpp @@ -57,6 +57,10 @@ BlobStoreEndpoint::BlobKnobs::BlobKnobs() { request_timeout = CLIENT_KNOBS->BLOBSTORE_REQUEST_TIMEOUT; requests_per_second = CLIENT_KNOBS->BLOBSTORE_REQUESTS_PER_SECOND; concurrent_requests = CLIENT_KNOBS->BLOBSTORE_CONCURRENT_REQUESTS; + list_requests_per_second = CLIENT_KNOBS->BLOBSTORE_LIST_REQUESTS_PER_SECOND; + write_requests_per_second = CLIENT_KNOBS->BLOBSTORE_WRITE_REQUESTS_PER_SECOND; + read_requests_per_second = CLIENT_KNOBS->BLOBSTORE_READ_REQUESTS_PER_SECOND; + delete_requests_per_second = CLIENT_KNOBS->BLOBSTORE_DELETE_REQUESTS_PER_SECOND; multipart_max_part_size = CLIENT_KNOBS->BLOBSTORE_MULTIPART_MAX_PART_SIZE; multipart_min_part_size = CLIENT_KNOBS->BLOBSTORE_MULTIPART_MIN_PART_SIZE; concurrent_uploads = CLIENT_KNOBS->BLOBSTORE_CONCURRENT_UPLOADS; @@ -79,6 +83,10 @@ bool BlobStoreEndpoint::BlobKnobs::set(StringRef name, int value) { TRY_PARAM(request_tries, rt); TRY_PARAM(request_timeout, rto); TRY_PARAM(requests_per_second, rps); + TRY_PARAM(list_requests_per_second, lrps); + TRY_PARAM(write_requests_per_second, wrps); + TRY_PARAM(read_requests_per_second, rrps); + TRY_PARAM(delete_requests_per_second, drps); TRY_PARAM(concurrent_requests, cr); TRY_PARAM(multipart_max_part_size, maxps); TRY_PARAM(multipart_min_part_size, minps); @@ -107,6 +115,10 @@ std::string BlobStoreEndpoint::BlobKnobs::getURLParameters() const { _CHECK_PARAM(request_tries, rt); _CHECK_PARAM(request_timeout, rto); _CHECK_PARAM(requests_per_second, rps); + _CHECK_PARAM(list_requests_per_second, lrps); + _CHECK_PARAM(write_requests_per_second, wrps); + _CHECK_PARAM(read_requests_per_second, rrps); + _CHECK_PARAM(delete_requests_per_second, drps); _CHECK_PARAM(concurrent_requests, cr); _CHECK_PARAM(multipart_max_part_size, maxps); _CHECK_PARAM(multipart_min_part_size, minps); @@ -195,6 +207,8 @@ std::string BlobStoreEndpoint::getResourceURL(std::string resource) { } ACTOR Future objectExists_impl(Reference b, std::string bucket, std::string object) { + Void _ = wait(b->requestRateRead->getAllowance(1)); + std::string resource = std::string("/") + bucket + "/" + object; HTTP::Headers headers; @@ -207,6 +221,8 @@ Future BlobStoreEndpoint::objectExists(std::string const &bucket, std::str } ACTOR Future deleteObject_impl(Reference b, std::string bucket, std::string object) { + Void _ = wait(b->requestRateDelete->getAllowance(1)); + std::string resource = std::string("/") + bucket + "/" + object; HTTP::Headers headers; Reference r = wait(b->doRequest("DELETE", resource, headers, NULL, 0, {200, 204, 404})); @@ -273,9 +289,10 @@ Future BlobStoreEndpoint::deleteRecursively(std::string const &bucket, std } ACTOR Future createBucket_impl(Reference b, std::string bucket) { + Void _ = wait(b->requestRateWrite->getAllowance(1)); + std::string resource = std::string("/") + bucket; HTTP::Headers headers; - Reference r = wait(b->doRequest("PUT", resource, headers, NULL, 0, {200, 409})); return Void(); } @@ -285,6 +302,8 @@ Future BlobStoreEndpoint::createBucket(std::string const &bucket) { } ACTOR Future objectSize_impl(Reference b, std::string bucket, std::string object) { + Void _ = wait(b->requestRateRead->getAllowance(1)); + std::string resource = std::string("/") + bucket + "/" + object; HTTP::Headers headers; @@ -789,6 +808,8 @@ void BlobStoreEndpoint::setAuthHeaders(std::string const &verb, std::string cons } ACTOR Future readEntireFile_impl(Reference bstore, std::string bucket, std::string object) { + Void _ = wait(bstore->requestRateRead->getAllowance(1)); + std::string resource = std::string("/") + bucket + "/" + object; HTTP::Headers headers; Reference r = wait(bstore->doRequest("GET", resource, headers, NULL, 0, {200, 404})); @@ -805,6 +826,7 @@ ACTOR Future writeEntireFileFromBuffer_impl(Reference b if(contentLen > bstore->knobs.multipart_max_part_size) throw file_too_large(); + Void _ = wait(bstore->requestRateWrite->getAllowance(1)); Void _ = wait(bstore->concurrentUploads.take()); state FlowLock::Releaser uploadReleaser(bstore->concurrentUploads, 1); @@ -856,6 +878,8 @@ Future BlobStoreEndpoint::writeEntireFileFromBuffer(std::string const &buc ACTOR Future readObject_impl(Reference bstore, std::string bucket, std::string object, void *data, int length, int64_t offset) { if(length <= 0) return 0; + Void _ = wait(bstore->requestRateRead->getAllowance(1)); + std::string resource = std::string("/") + bucket + "/" + object; HTTP::Headers headers; headers["Range"] = format("bytes=%lld-%lld", offset, offset + length - 1); @@ -874,6 +898,8 @@ Future BlobStoreEndpoint::readObject(std::string const &bucket, std::string } ACTOR static Future beginMultiPartUpload_impl(Reference bstore, std::string bucket, std::string object) { + Void _ = wait(bstore->requestRateWrite->getAllowance(1)); + std::string resource = std::string("/") + bucket + "/" + object + "?uploads"; HTTP::Headers headers; Reference r = wait(bstore->doRequest("POST", resource, headers, NULL, 0, {200})); @@ -892,6 +918,7 @@ Future BlobStoreEndpoint::beginMultiPartUpload(std::string const &b } ACTOR Future uploadPart_impl(Reference bstore, std::string bucket, std::string object, std::string uploadID, unsigned int partNumber, UnsentPacketQueue *pContent, int contentLen, std::string contentMD5) { + Void _ = wait(bstore->requestRateWrite->getAllowance(1)); Void _ = wait(bstore->concurrentUploads.take()); state FlowLock::Releaser uploadReleaser(bstore->concurrentUploads, 1); @@ -921,6 +948,7 @@ Future BlobStoreEndpoint::uploadPart(std::string const &bucket, std ACTOR Future finishMultiPartUpload_impl(Reference bstore, std::string bucket, std::string object, std::string uploadID, BlobStoreEndpoint::MultiPartSetT parts) { state UnsentPacketQueue part_list(); // NonCopyable state var so must be declared at top of actor + Void _ = wait(bstore->requestRateWrite->getAllowance(1)); std::string manifest = ""; for(auto &p : parts) diff --git a/fdbrpc/BlobStore.h b/fdbrpc/BlobStore.h index 000f0569d0..8adfbe83a2 100644 --- a/fdbrpc/BlobStore.h +++ b/fdbrpc/BlobStore.h @@ -55,6 +55,10 @@ public: request_tries, request_timeout, requests_per_second, + list_requests_per_second, + write_requests_per_second, + read_requests_per_second, + delete_requests_per_second, multipart_max_part_size, multipart_min_part_size, concurrent_requests, @@ -78,6 +82,10 @@ public: "request_tries (or rt) Number of times to try each request until a parseable HTTP response other than 429 is received.", "request_timeout (or rto) Number of seconds to wait for a request to succeed after a connection is established.", "requests_per_second (or rps) Max number of requests to start per second.", + "list_requests_per_second (or lrps) Max number of list requests to start per second.", + "write_requests_per_second (or wrps) Max number of write requests to start per second.", + "read_requests_per_second (or rrps) Max number of read requests to start per second.", + "delete_requests_per_second (or drps) Max number of delete requests to start per second.", "multipart_max_part_size (or maxps) Max part size for multipart uploads.", "multipart_min_part_size (or minps) Min part size for multipart uploads.", "concurrent_requests (or cr) Max number of total requests in progress at once, regardless of operation-specific concurrency limits.", @@ -97,6 +105,10 @@ public: BlobStoreEndpoint(std::string const &host, std::string service, std::string const &key, std::string const &secret, BlobKnobs const &knobs = BlobKnobs()) : host(host), service(service), key(key), secret(secret), lookupSecret(secret.empty()), knobs(knobs), requestRate(new SpeedLimit(knobs.requests_per_second, 1)), + requestRateList(new SpeedLimit(knobs.list_requests_per_second, 1)), + requestRateWrite(new SpeedLimit(knobs.write_requests_per_second, 1)), + requestRateRead(new SpeedLimit(knobs.read_requests_per_second, 1)), + requestRateDelete(new SpeedLimit(knobs.delete_requests_per_second, 1)), sendRate(new SpeedLimit(knobs.max_send_bytes_per_second, 1)), recvRate(new SpeedLimit(knobs.max_recv_bytes_per_second, 1)), concurrentRequests(knobs.concurrent_requests), @@ -135,6 +147,10 @@ public: // Speed and concurrency limits Reference requestRate; + Reference requestRateList; + Reference requestRateWrite; + Reference requestRateRead; + Reference requestRateDelete; Reference sendRate; Reference recvRate; FlowLock concurrentRequests; diff --git a/fdbrpc/Locality.cpp b/fdbrpc/Locality.cpp index 7076934102..f528ade376 100644 --- a/fdbrpc/Locality.cpp +++ b/fdbrpc/Locality.cpp @@ -137,6 +137,8 @@ ProcessClass::Fitness ProcessClass::machineClassFitness( ClusterRole role ) cons return ProcessClass::OkayFit; case ProcessClass::ProxyClass: return ProcessClass::OkayFit; + case ProcessClass::LogRouterClass: + return ProcessClass::OkayFit; case ProcessClass::UnsetClass: return ProcessClass::UnsetFit; case ProcessClass::TesterClass: diff --git a/fdbrpc/Locality.h b/fdbrpc/Locality.h index 19b4eb24b6..7b404413e9 100644 --- a/fdbrpc/Locality.h +++ b/fdbrpc/Locality.h @@ -27,7 +27,7 @@ struct ProcessClass { // This enum is stored in restartInfo.ini for upgrade tests, so be very careful about changing the existing items! enum ClassType { UnsetClass, StorageClass, TransactionClass, ResolutionClass, TesterClass, ProxyClass, MasterClass, StatelessClass, LogClass, ClusterControllerClass, LogRouterClass, InvalidClass = -1 }; - enum Fitness { BestFit, GoodFit, OkayFit, UnsetFit, WorstFit, ExcludeFit, NeverAssign }; //cannot be larger than 7 because of leader election mask + enum Fitness { BestFit, GoodFit, UnsetFit, OkayFit, WorstFit, ExcludeFit, NeverAssign }; //cannot be larger than 7 because of leader election mask enum ClusterRole { Storage, TLog, Proxy, Master, Resolver, LogRouter, ClusterController }; enum ClassSource { CommandLineSource, AutoSource, DBSource, InvalidSource = -1 }; int16_t _class; diff --git a/fdbrpc/TLSConnection.actor.cpp b/fdbrpc/TLSConnection.actor.cpp index 3c09128101..45b04f097b 100644 --- a/fdbrpc/TLSConnection.actor.cpp +++ b/fdbrpc/TLSConnection.actor.cpp @@ -91,7 +91,7 @@ TLSConnection::TLSConnection( Reference const& conn, Referencecreate_session // to have used its provided logging function to have logged // the error - throw internal_error(); + throw tls_error(); } handshook = handshake(this); } @@ -325,7 +325,7 @@ Reference TLSOptions::get_policy(PolicyType type) { if (platform::getEnvironmentVar("FDB_TLS_VERIFY_PEERS", verifyPeerString)) set_verify_peers({ verifyPeerString }); else - set_verify_peers({ std::string("Check.Valid=0")}); + set_verify_peers({ std::string("Check.Valid=1")}); } if (!ca_set) { std::string caFile; diff --git a/fdbserver/ClusterController.actor.cpp b/fdbserver/ClusterController.actor.cpp index 392835214f..11f5683121 100644 --- a/fdbserver/ClusterController.actor.cpp +++ b/fdbserver/ClusterController.actor.cpp @@ -339,6 +339,43 @@ public: return results; } + //FIXME: This logic will fallback unnecessarily when usable dcs > 1 because it does not check all combinations of potential satellite locations + std::vector> getWorkersForSatelliteLogs( const DatabaseConfiguration& conf, const RegionInfo& region, std::map< Optional>, int>& id_used, bool& satelliteFallback, bool checkStable = false ) { + int startDC = 0; + loop { + if(startDC > 0 && startDC >= region.satellites.size() + 1 - (satelliteFallback ? region.satelliteTLogUsableDcsFallback : region.satelliteTLogUsableDcs)) { + if(satelliteFallback || region.satelliteTLogUsableDcsFallback == 0) { + throw no_more_servers(); + } else { + if(now() - startTime < SERVER_KNOBS->WAIT_FOR_GOOD_REMOTE_RECRUITMENT_DELAY) { + throw operation_failed(); + } + satelliteFallback = true; + startDC = 0; + } + } + + try { + std::set> satelliteDCs; + for(int s = startDC; s < std::min(startDC + (satelliteFallback ? region.satelliteTLogUsableDcsFallback : region.satelliteTLogUsableDcs), region.satellites.size()); s++) { + satelliteDCs.insert(region.satellites[s].dcId); + } + + if(satelliteFallback) { + return getWorkersForTlogs( conf, region.satelliteTLogReplicationFactorFallback, conf.getDesiredSatelliteLogs(region.dcId)*region.satelliteTLogUsableDcsFallback/region.satelliteTLogUsableDcs, region.satelliteTLogPolicyFallback, id_used, checkStable, satelliteDCs ); + } else { + return getWorkersForTlogs( conf, region.satelliteTLogReplicationFactor, conf.getDesiredSatelliteLogs(region.dcId), region.satelliteTLogPolicy, id_used, checkStable, satelliteDCs ); + } + } catch (Error &e) { + if(e.code() != error_code_no_more_servers) { + throw; + } + } + + startDC++; + } + } + WorkerFitnessInfo getWorkerForRoleInDatacenter(Optional> const& dcId, ProcessClass::ClusterRole role, ProcessClass::Fitness unacceptableFitness, DatabaseConfiguration const& conf, std::map< Optional>, int>& id_used, bool checkStable = false ) { std::map, vector>> fitness_workers; @@ -439,7 +476,14 @@ public: return false; } + bool betterCount (RoleFitness const& r) const { + if(count > r.count) return true; + return worstFit < r.worstFit; + } + bool operator == (RoleFitness const& r) const { return worstFit == r.worstFit && bestFit == r.bestFit && count == r.count; } + + std::string toString() const { return format("%d %d &d", bestFit, worstFit, count); } }; std::set>> getDatacenters( DatabaseConfiguration const& conf, bool checkStable = false ) { @@ -459,7 +503,6 @@ public: std::set> remoteDC; remoteDC.insert(req.dcId); - auto remoteLogs = getWorkersForTlogs( req.configuration, req.configuration.getRemoteTLogReplicationFactor(), req.configuration.getDesiredRemoteLogs(), req.configuration.getRemoteTLogPolicy(), id_used, false, remoteDC ); for(int i = 0; i < remoteLogs.size(); i++) { @@ -471,9 +514,13 @@ public: result.logRouters.push_back(logRouters[i].first); } - if( now() - startTime < SERVER_KNOBS->WAIT_FOR_GOOD_RECRUITMENT_DELAY && - ( ( RoleFitness(remoteLogs, ProcessClass::TLog) > RoleFitness(SERVER_KNOBS->EXPECTED_TLOG_FITNESS, req.configuration.getDesiredLogs()) ) || - ( RoleFitness(logRouters, ProcessClass::LogRouter) > RoleFitness(SERVER_KNOBS->EXPECTED_LOG_ROUTER_FITNESS, req.logRouterCount) ) ) ) { + if(!remoteStartTime.present()) { + remoteStartTime = now(); + } + + if( now() - remoteStartTime.get() < SERVER_KNOBS->WAIT_FOR_GOOD_REMOTE_RECRUITMENT_DELAY && + ( ( RoleFitness(SERVER_KNOBS->EXPECTED_TLOG_FITNESS, req.configuration.getDesiredRemoteLogs()).betterCount(RoleFitness(remoteLogs, ProcessClass::TLog)) ) || + ( RoleFitness(SERVER_KNOBS->EXPECTED_LOG_ROUTER_FITNESS, req.logRouterCount).betterCount(RoleFitness(logRouters, ProcessClass::LogRouter)) ) ) ) { throw operation_failed(); } @@ -487,11 +534,11 @@ public: id_used[clusterControllerProcessId]++; ASSERT(dcId.present()); - + std::set> primaryDC; primaryDC.insert(dcId); result.dcId = dcId; - + RegionInfo region; for(auto& r : req.configuration.regions) { if(r.dcId == dcId.get()) { @@ -499,14 +546,14 @@ public: break; } } - + if(req.recruitSeedServers) { auto primaryStorageServers = getWorkersForSeedServers( req.configuration, req.configuration.storagePolicy, dcId ); for(int i = 0; i < primaryStorageServers.size(); i++) { result.storageServers.push_back(primaryStorageServers[i].first); } } - + auto tlogs = getWorkersForTlogs( req.configuration, req.configuration.tLogReplicationFactor, req.configuration.getDesiredLogs(), req.configuration.tLogPolicy, id_used, false, primaryDC ); for(int i = 0; i < tlogs.size(); i++) { result.tLogs.push_back(tlogs[i].first); @@ -514,31 +561,9 @@ public: std::vector> satelliteLogs; if(region.satelliteTLogReplicationFactor > 0) { - int startDC = 0; - loop { - if(startDC > 0 && startDC >= region.satellites.size() + 1 - region.satelliteTLogUsableDcs) { - throw no_more_servers(); - } - - try { - std::set> satelliteDCs; - for(int s = startDC; s < std::min(startDC + region.satelliteTLogUsableDcs, region.satellites.size()); s++) { - satelliteDCs.insert(region.satellites[s].dcId); - } - - satelliteLogs = getWorkersForTlogs( req.configuration, region.satelliteTLogReplicationFactor, req.configuration.getDesiredSatelliteLogs(dcId), region.satelliteTLogPolicy, id_used, false, satelliteDCs ); - - for(int i = 0; i < satelliteLogs.size(); i++) { - result.satelliteTLogs.push_back(satelliteLogs[i].first); - } - break; - } catch (Error &e) { - if(e.code() != error_code_no_more_servers) { - throw; - } - } - - startDC++; + satelliteLogs = getWorkersForSatelliteLogs( req.configuration, region, id_used, result.satelliteFallback ); + for(int i = 0; i < satelliteLogs.size(); i++) { + result.satelliteTLogs.push_back(satelliteLogs[i].first); } } @@ -562,10 +587,10 @@ public: } if( now() - startTime < SERVER_KNOBS->WAIT_FOR_GOOD_RECRUITMENT_DELAY && - ( RoleFitness(tlogs, ProcessClass::TLog) > RoleFitness(SERVER_KNOBS->EXPECTED_TLOG_FITNESS, req.configuration.getDesiredLogs()) || - ( region.satelliteTLogReplicationFactor > 0 && RoleFitness(satelliteLogs, ProcessClass::TLog) > RoleFitness(SERVER_KNOBS->EXPECTED_TLOG_FITNESS, req.configuration.getDesiredSatelliteLogs(dcId)) ) || - RoleFitness(proxies, ProcessClass::Proxy) > RoleFitness(SERVER_KNOBS->EXPECTED_PROXY_FITNESS, req.configuration.getDesiredProxies()) || - RoleFitness(resolvers, ProcessClass::Resolver) > RoleFitness(SERVER_KNOBS->EXPECTED_RESOLVER_FITNESS, req.configuration.getDesiredResolvers()) ) ) { + ( RoleFitness(SERVER_KNOBS->EXPECTED_TLOG_FITNESS, req.configuration.getDesiredLogs()).betterCount(RoleFitness(tlogs, ProcessClass::TLog)) || + ( region.satelliteTLogReplicationFactor > 0 && RoleFitness(SERVER_KNOBS->EXPECTED_TLOG_FITNESS, req.configuration.getDesiredSatelliteLogs(dcId)).betterCount(RoleFitness(satelliteLogs, ProcessClass::TLog)) ) || + RoleFitness(SERVER_KNOBS->EXPECTED_PROXY_FITNESS, req.configuration.getDesiredProxies()).betterCount(RoleFitness(proxies, ProcessClass::Proxy)) || + RoleFitness(SERVER_KNOBS->EXPECTED_RESOLVER_FITNESS, req.configuration.getDesiredResolvers()).betterCount(RoleFitness(resolvers, ProcessClass::Resolver)) ) ) { return operation_failed(); } @@ -593,7 +618,11 @@ public: } throw no_more_servers(); } catch( Error& e ) { - if (e.code() != error_code_no_more_servers || regions[1].priority < 0 || now() - startTime < SERVER_KNOBS->WAIT_FOR_GOOD_RECRUITMENT_DELAY) { + if(now() - startTime < SERVER_KNOBS->WAIT_FOR_GOOD_REMOTE_RECRUITMENT_DELAY && (!clusterControllerDcId.present() || regions[1].dcId != clusterControllerDcId.get())) { + throw operation_failed(); + } + + if (e.code() != error_code_no_more_servers || regions[1].priority < 0) { throw; } TraceEvent(SevWarn, "AttemptingRecruitmentInRemoteDC", id).error(e); @@ -703,8 +732,8 @@ public: .detail("DesiredResolvers", req.configuration.getDesiredResolvers()).detail("ActualResolvers", result.resolvers.size()); if( now() - startTime < SERVER_KNOBS->WAIT_FOR_GOOD_RECRUITMENT_DELAY && - ( RoleFitness(tlogs, ProcessClass::TLog) > RoleFitness(SERVER_KNOBS->EXPECTED_TLOG_FITNESS, req.configuration.getDesiredLogs()) || - bestFitness > RoleFitness(std::min(SERVER_KNOBS->EXPECTED_PROXY_FITNESS, SERVER_KNOBS->EXPECTED_RESOLVER_FITNESS), std::max(SERVER_KNOBS->EXPECTED_PROXY_FITNESS, SERVER_KNOBS->EXPECTED_RESOLVER_FITNESS), req.configuration.getDesiredProxies()+req.configuration.getDesiredResolvers()) ) ) { + ( RoleFitness(SERVER_KNOBS->EXPECTED_TLOG_FITNESS, req.configuration.getDesiredLogs()).betterCount(RoleFitness(tlogs, ProcessClass::TLog)) || + RoleFitness(std::min(SERVER_KNOBS->EXPECTED_PROXY_FITNESS, SERVER_KNOBS->EXPECTED_RESOLVER_FITNESS), std::max(SERVER_KNOBS->EXPECTED_PROXY_FITNESS, SERVER_KNOBS->EXPECTED_RESOLVER_FITNESS), req.configuration.getDesiredProxies()+req.configuration.getDesiredResolvers()).betterCount(bestFitness) ) ) { throw operation_failed(); } @@ -716,22 +745,19 @@ public: if(desiredDcIds.get().present() && desiredDcIds.get().get().size() == 2 && desiredDcIds.get().get()[0].get() == regions[0].dcId && desiredDcIds.get().get()[1].get() == regions[1].dcId) { return; } - + try { std::map< Optional>, int> id_used; getWorkerForRoleInDatacenter(regions[0].dcId, ProcessClass::ClusterController, ProcessClass::ExcludeFit, db.config, id_used, true); getWorkerForRoleInDatacenter(regions[0].dcId, ProcessClass::Master, ProcessClass::ExcludeFit, db.config, id_used, true); - + std::set> primaryDC; primaryDC.insert(regions[0].dcId); getWorkersForTlogs(db.config, db.config.tLogReplicationFactor, db.config.desiredTLogCount, db.config.tLogPolicy, id_used, true, primaryDC); - + if(regions[0].satelliteTLogReplicationFactor > 0) { - std::set> satelliteDCs; - for(auto &s : regions[0].satellites) { - satelliteDCs.insert(s.dcId); - } - getWorkersForTlogs(db.config, regions[0].satelliteTLogReplicationFactor, db.config.getDesiredSatelliteLogs(regions[0].dcId), regions[0].satelliteTLogPolicy, id_used, true, satelliteDCs); + bool satelliteFallback = false; + getWorkersForSatelliteLogs(db.config, regions[0], id_used, satelliteFallback, true); } getWorkerForRoleInDatacenter( regions[0].dcId, ProcessClass::Resolver, ProcessClass::ExcludeFit, db.config, id_used, true ); @@ -856,7 +882,6 @@ public: return true; std::set> primaryDC; - std::set> satelliteDCs; std::set> remoteDC; RegionInfo region; @@ -871,12 +896,6 @@ public: region = r; } } - - if(region.satelliteTLogReplicationFactor > 0) { - for(auto &s : region.satellites) { - satelliteDCs.insert(s.dcId); - } - } } // Check tLog fitness @@ -885,23 +904,38 @@ public: if(oldTLogFit < newTLogFit) return false; - RoleFitness oldSatelliteTLogFit(satellite_tlogs, ProcessClass::TLog); - RoleFitness newSatelliteTLogFit(region.satelliteTLogReplicationFactor > 0 ? getWorkersForTlogs(db.config, region.satelliteTLogReplicationFactor, db.config.getDesiredSatelliteLogs(clusterControllerDcId), region.satelliteTLogPolicy, id_used, true, satelliteDCs) : satellite_tlogs, ProcessClass::TLog); + bool oldSatelliteFallback = false; + for(auto& logSet : dbi.logSystemConfig.tLogs) { + if(logSet.isLocal && logSet.locality == tagLocalitySatellite) { + oldSatelliteFallback = logSet.tLogPolicy->info() != region.satelliteTLogPolicy->info(); + ASSERT(!oldSatelliteFallback || logSet.tLogPolicy->info() == region.satelliteTLogPolicyFallback->info()); + break; + } + } - if(oldSatelliteTLogFit < newSatelliteTLogFit) return false; + RoleFitness oldSatelliteTLogFit(satellite_tlogs, ProcessClass::TLog); + bool newSatelliteFallback = false; + RoleFitness newSatelliteTLogFit(region.satelliteTLogReplicationFactor > 0 ? getWorkersForSatelliteLogs(db.config, region, id_used, newSatelliteFallback, true) : satellite_tlogs, ProcessClass::TLog); + + if(oldSatelliteTLogFit < newSatelliteTLogFit) + return false; + if(!oldSatelliteFallback && newSatelliteFallback) + return false; RoleFitness oldRemoteTLogFit(remote_tlogs, ProcessClass::TLog); RoleFitness newRemoteTLogFit((db.config.usableRegions > 1 && dbi.recoveryState == RecoveryState::REMOTE_RECOVERED) ? getWorkersForTlogs(db.config, db.config.getRemoteTLogReplicationFactor(), db.config.getDesiredRemoteLogs(), db.config.getRemoteTLogPolicy(), id_used, true, remoteDC) : remote_tlogs, ProcessClass::TLog); if(oldRemoteTLogFit < newRemoteTLogFit) return false; + int oldRouterCount = oldTLogFit.count * std::max(1, db.config.desiredLogRouterCount / std::max(1,oldTLogFit.count)); + int newRouterCount = newTLogFit.count * std::max(1, db.config.desiredLogRouterCount / std::max(1,newTLogFit.count)); RoleFitness oldLogRoutersFit(log_routers, ProcessClass::LogRouter); - RoleFitness newLogRoutersFit((db.config.usableRegions > 1 && dbi.recoveryState == RecoveryState::REMOTE_RECOVERED) ? getWorkersForRoleInDatacenter( *remoteDC.begin(), ProcessClass::LogRouter, newTLogFit.count, db.config, id_used, Optional(), true ) : log_routers, ProcessClass::LogRouter); + RoleFitness newLogRoutersFit((db.config.usableRegions > 1 && dbi.recoveryState == RecoveryState::REMOTE_RECOVERED) ? getWorkersForRoleInDatacenter( *remoteDC.begin(), ProcessClass::LogRouter, newRouterCount, db.config, id_used, Optional(), true ) : log_routers, ProcessClass::LogRouter); - if(oldLogRoutersFit.count < oldTLogFit.count) { + if(oldLogRoutersFit.count < oldRouterCount) { oldLogRoutersFit.worstFit = ProcessClass::NeverAssign; } - if(newLogRoutersFit.count < newTLogFit.count) { + if(newLogRoutersFit.count < newRouterCount) { newLogRoutersFit.worstFit = ProcessClass::NeverAssign; } @@ -922,14 +956,14 @@ public: if(oldInFit.betterFitness(newInFit)) return false; - if(oldTLogFit > newTLogFit || oldInFit > newInFit || oldSatelliteTLogFit > newSatelliteTLogFit || oldRemoteTLogFit > newRemoteTLogFit || oldLogRoutersFit > newLogRoutersFit) { + if(oldTLogFit > newTLogFit || oldInFit > newInFit || (oldSatelliteFallback && !newSatelliteFallback) || oldSatelliteTLogFit > newSatelliteTLogFit || oldRemoteTLogFit > newRemoteTLogFit || oldLogRoutersFit > newLogRoutersFit) { TraceEvent("BetterMasterExists", id).detail("OldMasterFit", oldMasterFit).detail("NewMasterFit", mworker.fitness) - .detail("OldTLogFitC", oldTLogFit.count).detail("NewTLogFitC", newTLogFit.count) - .detail("OldTLogWorstFitT", oldTLogFit.worstFit).detail("NewTLogWorstFitT", newTLogFit.worstFit) - .detail("OldTLogBestFitT", oldTLogFit.bestFit).detail("NewTLogBestFitT", newTLogFit.bestFit) - .detail("OldInFitW", oldInFit.worstFit).detail("NewInFitW", newInFit.worstFit) - .detail("OldInFitB", oldInFit.bestFit).detail("NewInFitB", newInFit.bestFit) - .detail("OldInFitC", oldInFit.count).detail("NewInFitC", newInFit.count); + .detail("OldTLogFit", oldTLogFit.toString()).detail("NewTLogFit", newTLogFit.toString()) + .detail("OldInFit", oldInFit.toString()).detail("NewInFit", newInFit.toString()) + .detail("OldSatelliteFit", oldSatelliteTLogFit.toString()).detail("NewSatelliteFit", newSatelliteTLogFit.toString()) + .detail("OldRemoteFit", oldRemoteTLogFit.toString()).detail("NewRemoteFit", newRemoteTLogFit.toString()) + .detail("OldRouterFit", oldLogRoutersFit.toString()).detail("NewRouterFit", newLogRoutersFit.toString()) + .detail("OldSatelliteFallback", oldSatelliteFallback).detail("NewSatelliteFallback", newSatelliteFallback); return true; } @@ -945,24 +979,25 @@ public: Optional> clusterControllerProcessId; Optional> clusterControllerDcId; AsyncVar>>> desiredDcIds; //desired DC priorities - AsyncVar>>> changingDcIds; //current DC priorities for everyone other than the cluster controller process - AsyncVar>>> changedDcIds; //current DC priority for the cluster controller process + AsyncVar>>>> changingDcIds; //current DC priorities to change first, and whether that is the cluster controller + AsyncVar>>>> changedDcIds; //current DC priorities to change second, and whether the cluster controller has been changed UID id; std::vector outstandingRecruitmentRequests; std::vector outstandingRemoteRecruitmentRequests; std::vector> outstandingStorageRequests; ActorCollection ac; UpdateWorkerList updateWorkerList; - Future betterMasterExistsChecker; + Future outstandingRequestChecker; DBInfo db; Database cx; double startTime; + Optional remoteStartTime; Version datacenterVersionDifference; bool versionDifferenceUpdated; explicit ClusterControllerData( ClusterControllerFullInterface ccInterface ) - : id(ccInterface.id()), ac(false), betterMasterExistsChecker(Void()), gotProcessClasses(false), gotFullyRecoveredConfig(false), startTime(now()), datacenterVersionDifference(0), versionDifferenceUpdated(false) + : id(ccInterface.id()), ac(false), outstandingRequestChecker(Void()), gotProcessClasses(false), gotFullyRecoveredConfig(false), startTime(now()), datacenterVersionDifference(0), versionDifferenceUpdated(false) { auto serverInfo = db.serverInfo->get(); serverInfo.id = g_random->randomUniqueID(); @@ -1214,31 +1249,34 @@ void checkOutstandingStorageRequests( ClusterControllerData* self ) { } } -ACTOR Future doCheckOutstandingMasterRequests( ClusterControllerData* self ) { - Void _ = wait( delay(SERVER_KNOBS->CHECK_BETTER_MASTER_INTERVAL) ); - if (self->betterMasterExists()) { - if (!self->db.forceMasterFailure.isSet()) { - self->db.forceMasterFailure.send( Void() ); - TraceEvent("MasterRegistrationKill", self->id).detail("MasterId", self->db.serverInfo->get().master.id()); +ACTOR Future doCheckOutstandingRequests( ClusterControllerData* self ) { + try { + Void _ = wait( delay(SERVER_KNOBS->CHECK_OUTSTANDING_INTERVAL) ); + + checkOutstandingRecruitmentRequests( self ); + checkOutstandingRemoteRecruitmentRequests( self ); + checkOutstandingStorageRequests( self ); + + self->checkRecoveryStalled(); + if (self->betterMasterExists()) { + if (!self->db.forceMasterFailure.isSet()) { + self->db.forceMasterFailure.send( Void() ); + TraceEvent("MasterRegistrationKill", self->id).detail("MasterId", self->db.serverInfo->get().master.id()); + } + } + } catch( Error &e ) { + if(e.code() != error_code_operation_failed && e.code() != error_code_no_more_servers) { + TraceEvent(SevError, "CheckOutstandingError").error(e); } } return Void(); } -void checkOutstandingMasterRequests( ClusterControllerData* self ) { - self->checkRecoveryStalled(); - - if( !self->betterMasterExistsChecker.isReady() ) +void checkOutstandingRequests( ClusterControllerData* self ) { + if( !self->outstandingRequestChecker.isReady() ) return; - self->betterMasterExistsChecker = doCheckOutstandingMasterRequests(self); -} - -void checkOutstandingRequests( ClusterControllerData* self ) { - checkOutstandingRecruitmentRequests( self ); - checkOutstandingRemoteRecruitmentRequests( self ); - checkOutstandingStorageRequests( self ); - checkOutstandingMasterRequests( self ); + self->outstandingRequestChecker = doCheckOutstandingRequests(self); } ACTOR Future rebootAndCheck( ClusterControllerData* cluster, Optional> processID ) { @@ -1252,7 +1290,7 @@ ACTOR Future rebootAndCheck( ClusterControllerData* cluster, Optionalid_worker.end()) { watcher->second.reboots--; if( watcher->second.reboots < 2 ) - checkOutstandingMasterRequests( cluster ); + checkOutstandingRequests( cluster ); } return Void(); @@ -1481,7 +1519,7 @@ ACTOR Future clusterRecruitRemoteFromConfiguration( ClusterControllerData* req.reply.send( self->findRemoteWorkersForConfiguration( req ) ); return Void(); } catch (Error& e) { - if (e.code() == error_code_no_more_servers && now() - self->startTime >= SERVER_KNOBS->WAIT_FOR_GOOD_RECRUITMENT_DELAY) { + if (e.code() == error_code_no_more_servers && self->remoteStartTime.present() && now() - self->remoteStartTime.get() >= SERVER_KNOBS->WAIT_FOR_GOOD_REMOTE_RECRUITMENT_DELAY) { self->outstandingRemoteRecruitmentRequests.push_back( req ); TraceEvent(SevWarn, "RecruitRemoteFromConfigurationNotAvailable", self->id).error(e); return Void(); @@ -1575,7 +1613,7 @@ void clusterRegisterMaster( ClusterControllerData* self, RegisterMasterRequest c self->db.serverInfo->set( dbInfo ); } - checkOutstandingMasterRequests(self); + checkOutstandingRequests(self); } void registerWorker( RegisterWorkerRequest req, ClusterControllerData *self ) { @@ -1589,11 +1627,21 @@ void registerWorker( RegisterWorkerRequest req, ClusterControllerData *self ) { if ( w.address() == g_network->getLocalAddress() ) { self->clusterControllerProcessId = w.locality.processId(); self->clusterControllerDcId = w.locality.dcId(); - if(self->changedDcIds.get().present()) { - newPriorityInfo.dcFitness = ClusterControllerPriorityInfo::calculateDCFitness( w.locality.dcId(), self->changedDcIds.get().get() ); + if(self->changingDcIds.get().first) { + if(self->changingDcIds.get().second.present()) { + newPriorityInfo.dcFitness = ClusterControllerPriorityInfo::calculateDCFitness( w.locality.dcId(), self->changingDcIds.get().second.get() ); + } + } else if(self->changedDcIds.get().second.present()) { + newPriorityInfo.dcFitness = ClusterControllerPriorityInfo::calculateDCFitness( w.locality.dcId(), self->changedDcIds.get().second.get() ); + } + } else { + if(!self->changingDcIds.get().first) { + if(self->changingDcIds.get().second.present()) { + newPriorityInfo.dcFitness = ClusterControllerPriorityInfo::calculateDCFitness( w.locality.dcId(), self->changingDcIds.get().second.get() ); + } + } else if(self->changedDcIds.get().second.present()) { + newPriorityInfo.dcFitness = ClusterControllerPriorityInfo::calculateDCFitness( w.locality.dcId(), self->changedDcIds.get().second.get() ); } - } else if(self->changingDcIds.get().present()) { - newPriorityInfo.dcFitness = ClusterControllerPriorityInfo::calculateDCFitness( w.locality.dcId(), self->changingDcIds.get().get() ); } // Check process class and exclusive property @@ -1900,38 +1948,91 @@ ACTOR Future updatedChangingDatacenters(ClusterControllerData *self) { //do not change the cluster controller until all the processes have had a chance to register Void _ = wait( delay(SERVER_KNOBS->WAIT_FOR_GOOD_RECRUITMENT_DELAY) ); loop { - self->changingDcIds.set(self->desiredDcIds.get()); - if(self->changingDcIds.get().present()) { - for ( auto& it : self->id_worker ) { - uint8_t fitness = ClusterControllerPriorityInfo::calculateDCFitness( it.second.interf.locality.dcId(), self->changingDcIds.get().get() ); - if ( it.first != self->clusterControllerProcessId && it.second.priorityInfo.dcFitness != fitness ) { - it.second.priorityInfo.dcFitness = fitness; - if(!it.second.reply.isSet()) { - it.second.reply.send( RegisterWorkerReply( it.second.processClass, it.second.priorityInfo ) ); + state Future onChange = self->desiredDcIds.onChange(); + if(!self->desiredDcIds.get().present()) { + self->changingDcIds.set(std::make_pair(false,self->desiredDcIds.get())); + } else { + auto& worker = self->id_worker[self->clusterControllerProcessId]; + uint8_t newFitness = ClusterControllerPriorityInfo::calculateDCFitness( worker.interf.locality.dcId(), self->desiredDcIds.get().get() ); + self->changingDcIds.set(std::make_pair(worker.priorityInfo.dcFitness > newFitness,self->desiredDcIds.get())); + + if ( worker.priorityInfo.dcFitness > newFitness ) { + worker.priorityInfo.dcFitness = newFitness; + if(!worker.reply.isSet()) { + worker.reply.send( RegisterWorkerReply( worker.processClass, worker.priorityInfo ) ); + } + } else { + state int currentFit = ProcessClass::BestFit; + while(currentFit <= ProcessClass::NeverAssign) { + bool updated = false; + for ( auto& it : self->id_worker ) { + if( ( !it.second.priorityInfo.isExcluded && it.second.priorityInfo.processClassFitness == currentFit ) || currentFit == ProcessClass::NeverAssign ) { + uint8_t fitness = ClusterControllerPriorityInfo::calculateDCFitness( it.second.interf.locality.dcId(), self->changingDcIds.get().second.get() ); + if ( it.first != self->clusterControllerProcessId && it.second.priorityInfo.dcFitness != fitness ) { + updated = true; + it.second.priorityInfo.dcFitness = fitness; + if(!it.second.reply.isSet()) { + it.second.reply.send( RegisterWorkerReply( it.second.processClass, it.second.priorityInfo ) ); + } + } + } } + if(updated && currentFit < ProcessClass::NeverAssign) { + Void _ = wait( delay(SERVER_KNOBS->CC_CLASS_DELAY) ); + } + currentFit++; } } } - - Void _ = wait(self->desiredDcIds.onChange()); + + Void _ = wait(onChange); } } ACTOR Future updatedChangedDatacenters(ClusterControllerData *self) { state Future changeDelay = delay(SERVER_KNOBS->CC_CHANGE_DELAY); + state Future onChange = self->changingDcIds.onChange(); loop { choose { - when( Void _ = wait(self->changingDcIds.onChange()) ) { changeDelay = delay(SERVER_KNOBS->CC_CHANGE_DELAY); } + when( Void _ = wait(onChange) ) { + changeDelay = delay(SERVER_KNOBS->CC_CHANGE_DELAY); + onChange = self->changingDcIds.onChange(); + } when( Void _ = wait(changeDelay) ) { changeDelay = Never(); - self->changedDcIds.set(self->changingDcIds.get()); - if(self->changedDcIds.get().present()) { - auto& worker = self->id_worker[self->clusterControllerProcessId]; - uint8_t fitness = ClusterControllerPriorityInfo::calculateDCFitness( worker.interf.locality.dcId(), self->changedDcIds.get().get() ); - if ( worker.priorityInfo.dcFitness != fitness ) { - worker.priorityInfo.dcFitness = fitness; - if(!worker.reply.isSet()) { - worker.reply.send( RegisterWorkerReply( worker.processClass, worker.priorityInfo ) ); + onChange = self->changingDcIds.onChange(); + + self->changedDcIds.set(self->changingDcIds.get()); + if(self->changedDcIds.get().second.present()) { + if( !self->changedDcIds.get().first ) { + auto& worker = self->id_worker[self->clusterControllerProcessId]; + uint8_t newFitness = ClusterControllerPriorityInfo::calculateDCFitness( worker.interf.locality.dcId(), self->changedDcIds.get().second.get() ); + if( worker.priorityInfo.dcFitness != newFitness ) { + worker.priorityInfo.dcFitness = newFitness; + if(!worker.reply.isSet()) { + worker.reply.send( RegisterWorkerReply( worker.processClass, worker.priorityInfo ) ); + } + } + } else { + state int currentFit = ProcessClass::BestFit; + while(currentFit <= ProcessClass::NeverAssign) { + bool updated = false; + for ( auto& it : self->id_worker ) { + if( ( !it.second.priorityInfo.isExcluded && it.second.priorityInfo.processClassFitness == currentFit ) || currentFit == ProcessClass::NeverAssign ) { + uint8_t fitness = ClusterControllerPriorityInfo::calculateDCFitness( it.second.interf.locality.dcId(), self->changedDcIds.get().second.get() ); + if ( it.first != self->clusterControllerProcessId && it.second.priorityInfo.dcFitness != fitness ) { + updated = true; + it.second.priorityInfo.dcFitness = fitness; + if(!it.second.reply.isSet()) { + it.second.reply.send( RegisterWorkerReply( it.second.processClass, it.second.priorityInfo ) ); + } + } + } + } + if(updated && currentFit < ProcessClass::NeverAssign) { + Void _ = wait( delay(SERVER_KNOBS->CC_CLASS_DELAY) ); + } + currentFit++; } } } @@ -1941,6 +2042,7 @@ ACTOR Future updatedChangedDatacenters(ClusterControllerData *self) { } ACTOR Future updateDatacenterVersionDifference( ClusterControllerData *self ) { + state double lastLogTime = 0; loop { self->versionDifferenceUpdated = false; if(self->db.serverInfo->get().recoveryState >= RecoveryState::FULLY_RECOVERED && self->db.config.usableRegions == 1) { @@ -1977,12 +2079,12 @@ ACTOR Future updateDatacenterVersionDifference( ClusterControllerData *sel Void _ = wait(self->db.serverInfo->onChange()); continue; } - + state Future onChange = self->db.serverInfo->onChange(); loop { state Future primaryMetrics = primaryLog.get().getQueuingMetrics.getReply( TLogQueuingMetricsRequest() ); state Future remoteMetrics = remoteLog.get().getQueuingMetrics.getReply( TLogQueuingMetricsRequest() ); - + Void _ = wait( ( success(primaryMetrics) && success(remoteMetrics) ) || onChange ); if(onChange.isReady()) { break; @@ -1990,6 +2092,10 @@ ACTOR Future updateDatacenterVersionDifference( ClusterControllerData *sel self->versionDifferenceUpdated = true; self->datacenterVersionDifference = primaryMetrics.get().v - remoteMetrics.get().v; + if(now() - lastLogTime > SERVER_KNOBS->CLUSTER_CONTROLLER_LOGGING_DELAY) { + lastLogTime = now(); + TraceEvent("DatacenterVersionDifference", self->id).detail("Difference", self->datacenterVersionDifference); + } Void _ = wait( delay(SERVER_KNOBS->VERSION_LAG_METRIC_INTERVAL) || onChange ); if(onChange.isReady()) { diff --git a/fdbserver/ClusterRecruitmentInterface.h b/fdbserver/ClusterRecruitmentInterface.h index e07de7e444..599919224a 100644 --- a/fdbserver/ClusterRecruitmentInterface.h +++ b/fdbserver/ClusterRecruitmentInterface.h @@ -89,10 +89,13 @@ struct RecruitFromConfigurationReply { vector storageServers; vector oldLogRouters; Optional dcId; + bool satelliteFallback; + + RecruitFromConfigurationReply() : satelliteFallback(false) {} template void serialize( Ar& ar ) { - ar & tLogs & satelliteTLogs & proxies & resolvers & storageServers & oldLogRouters & dcId; + ar & tLogs & satelliteTLogs & proxies & resolvers & storageServers & oldLogRouters & dcId & satelliteFallback; } }; diff --git a/fdbserver/DataDistribution.actor.cpp b/fdbserver/DataDistribution.actor.cpp index 6fe6b60cee..bcae4ed210 100644 --- a/fdbserver/DataDistribution.actor.cpp +++ b/fdbserver/DataDistribution.actor.cpp @@ -404,7 +404,7 @@ ACTOR Future> getInitialDataDistribution( Dat // for each range for(int i = 0; i < keyServers.size() - 1; i++) { - ShardInfo info( keyServers[i].key ); + DDShardInfo info( keyServers[i].key ); decodeKeyServersValue( keyServers[i].value, src, dest ); if(remoteDcIds.size()) { auto srcIter = team_cache.find(src); @@ -471,7 +471,7 @@ ACTOR Future> getInitialDataDistribution( Dat } // a dummy shard at the end with no keys or servers makes life easier for trackInitialShards() - result->shards.push_back( ShardInfo(allKeys.end) ); + result->shards.push_back( DDShardInfo(allKeys.end) ); return result; } diff --git a/fdbserver/DataDistribution.h b/fdbserver/DataDistribution.h index 677b4830b4..76446cf818 100644 --- a/fdbserver/DataDistribution.h +++ b/fdbserver/DataDistribution.h @@ -175,7 +175,8 @@ private: void insert(Team team, KeyRange const& range); }; -struct ShardInfo { +// DDShardInfo is so named to avoid link-time name collision with ShardInfo within the StorageServer +struct DDShardInfo { Key key; vector primarySrc; vector remoteSrc; @@ -183,7 +184,7 @@ struct ShardInfo { vector remoteDest; bool hasDest; - explicit ShardInfo(Key key) : key(key), hasDest(false) {} + explicit DDShardInfo(Key key) : key(key), hasDest(false) {} }; struct InitialDataDistribution : ReferenceCounted { @@ -191,7 +192,7 @@ struct InitialDataDistribution : ReferenceCounted { vector> allServers; std::set> primaryTeams; std::set> remoteTeams; - vector shards; + vector shards; }; Future dataDistribution( diff --git a/fdbserver/Knobs.cpp b/fdbserver/Knobs.cpp index 8307469b4a..4b7b1c3cd1 100644 --- a/fdbserver/Knobs.cpp +++ b/fdbserver/Knobs.cpp @@ -248,7 +248,6 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs) { init( COMMIT_BATCHES_MEM_TO_TOTAL_MEM_SCALE_FACTOR, 10.0 ); // Master Server - init( MASTER_LOGGING_DELAY, 1.0 ); // masterCommitter() in the master server will allow lower priority tasks (e.g. DataDistibution) // by delay()ing for this amount of time between accepted batches of TransactionRequests. init( COMMIT_SLEEP_TIME, 0.0001 ); if( randomize && BUGGIFY ) COMMIT_SLEEP_TIME = 0; @@ -267,6 +266,7 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs) { init( LAST_LIMITED_RATIO, 0.6 ); //Cluster Controller + init( CLUSTER_CONTROLLER_LOGGING_DELAY, 5.0 ); init( MASTER_FAILURE_REACTION_TIME, 0.4 ); if( randomize && BUGGIFY ) MASTER_FAILURE_REACTION_TIME = 10.0; init( MASTER_FAILURE_SLOPE_DURING_RECOVERY, 0.1 ); init( WORKER_COORDINATION_PING_DELAY, 60 ); @@ -274,19 +274,21 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs) { init( SHUTDOWN_TIMEOUT, 600 ); if( randomize && BUGGIFY ) SHUTDOWN_TIMEOUT = 60.0; init( MASTER_SPIN_DELAY, 1.0 ); if( randomize && BUGGIFY ) MASTER_SPIN_DELAY = 10.0; init( CC_CHANGE_DELAY, 0.1 ); - init( WAIT_FOR_GOOD_RECRUITMENT_DELAY, 0.1 ); + init( CC_CLASS_DELAY, 0.01 ); + init( WAIT_FOR_GOOD_RECRUITMENT_DELAY, 1.0 ); + init( WAIT_FOR_GOOD_REMOTE_RECRUITMENT_DELAY, 5.0 ); init( ATTEMPT_RECRUITMENT_DELAY, 0.035 ); init( WORKER_FAILURE_TIME, 1.0 ); if( randomize && BUGGIFY ) WORKER_FAILURE_TIME = 10.0; - init( CHECK_BETTER_MASTER_INTERVAL, 1.0 ); if( randomize && BUGGIFY ) CHECK_BETTER_MASTER_INTERVAL = 0.001; + init( CHECK_OUTSTANDING_INTERVAL, 0.5 ); if( randomize && BUGGIFY ) CHECK_OUTSTANDING_INTERVAL = 0.001; init( VERSION_LAG_METRIC_INTERVAL, 0.5 ); if( randomize && BUGGIFY ) VERSION_LAG_METRIC_INTERVAL = 10.0; init( MAX_VERSION_DIFFERENCE, 20 * VERSIONS_PER_SECOND ); init( INCOMPATIBLE_PEERS_LOGGING_INTERVAL, 600 ); if( randomize && BUGGIFY ) INCOMPATIBLE_PEERS_LOGGING_INTERVAL = 60.0; - init( EXPECTED_MASTER_FITNESS, ProcessClass::GoodFit ); - init( EXPECTED_TLOG_FITNESS, ProcessClass::GoodFit ); - init( EXPECTED_LOG_ROUTER_FITNESS, ProcessClass::GoodFit ); - init( EXPECTED_PROXY_FITNESS, ProcessClass::GoodFit ); - init( EXPECTED_RESOLVER_FITNESS, ProcessClass::GoodFit ); + init( EXPECTED_MASTER_FITNESS, ProcessClass::UnsetFit ); + init( EXPECTED_TLOG_FITNESS, ProcessClass::UnsetFit ); + init( EXPECTED_LOG_ROUTER_FITNESS, ProcessClass::UnsetFit ); + init( EXPECTED_PROXY_FITNESS, ProcessClass::UnsetFit ); + init( EXPECTED_RESOLVER_FITNESS, ProcessClass::UnsetFit ); init( RECRUITMENT_TIMEOUT, 600 ); if( randomize && BUGGIFY ) RECRUITMENT_TIMEOUT = g_random->coinflip() ? 60.0 : 1.0; init( POLICY_RATING_TESTS, 200 ); if( randomize && BUGGIFY ) POLICY_RATING_TESTS = 20; @@ -343,7 +345,7 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs) { //Storage Server init( STORAGE_LOGGING_DELAY, 5.0 ); init( STORAGE_SERVER_POLL_METRICS_DELAY, 1.0 ); - init( FUTURE_VERSION_DELAY, 1.0 ); if( randomize && BUGGIFY ) FUTURE_VERSION_DELAY = 0.001; + init( FUTURE_VERSION_DELAY, 1.0 ); init( STORAGE_LIMIT_BYTES, 500000 ); init( BUGGIFY_LIMIT_BYTES, 1000 ); init( FETCH_BLOCK_BYTES, 2e6 ); diff --git a/fdbserver/Knobs.h b/fdbserver/Knobs.h index 676ac9ea3b..b956d740f9 100644 --- a/fdbserver/Knobs.h +++ b/fdbserver/Knobs.h @@ -192,7 +192,6 @@ public: double PROXY_SPIN_DELAY; // Master Server - double MASTER_LOGGING_DELAY; double COMMIT_SLEEP_TIME; double MIN_BALANCE_TIME; int64_t MIN_BALANCE_DIFFERENCE; @@ -208,6 +207,7 @@ public: int64_t RESOLVER_STATE_MEMORY_LIMIT; //Cluster Controller + double CLUSTER_CONTROLLER_LOGGING_DELAY; double MASTER_FAILURE_REACTION_TIME; double MASTER_FAILURE_SLOPE_DURING_RECOVERY; int WORKER_COORDINATION_PING_DELAY; @@ -215,10 +215,12 @@ public: double SHUTDOWN_TIMEOUT; double MASTER_SPIN_DELAY; double CC_CHANGE_DELAY; + double CC_CLASS_DELAY; double WAIT_FOR_GOOD_RECRUITMENT_DELAY; + double WAIT_FOR_GOOD_REMOTE_RECRUITMENT_DELAY; double ATTEMPT_RECRUITMENT_DELAY; double WORKER_FAILURE_TIME; - double CHECK_BETTER_MASTER_INTERVAL; + double CHECK_OUTSTANDING_INTERVAL; double INCOMPATIBLE_PEERS_LOGGING_INTERVAL; double VERSION_LAG_METRIC_INTERVAL; int64_t MAX_VERSION_DIFFERENCE; diff --git a/fdbserver/LeaderElection.actor.cpp b/fdbserver/LeaderElection.actor.cpp index b7b9a78e41..174d4819aa 100644 --- a/fdbserver/LeaderElection.actor.cpp +++ b/fdbserver/LeaderElection.actor.cpp @@ -82,7 +82,10 @@ ACTOR Future tryBecomeLeaderInternal( ServerCoordinators coordinators, Val state bool iAmLeader = false; state UID prevChangeID; - if( asyncPriorityInfo->get().processClassFitness > ProcessClass::UnsetFit || asyncPriorityInfo->get().dcFitness == ClusterControllerPriorityInfo::FitnessBad || asyncPriorityInfo->get().isExcluded ) { + + if(asyncPriorityInfo->get().dcFitness == ClusterControllerPriorityInfo::FitnessBad || asyncPriorityInfo->get().dcFitness == ClusterControllerPriorityInfo::FitnessRemote || asyncPriorityInfo->get().isExcluded) { + Void _ = wait( delay(SERVER_KNOBS->WAIT_FOR_GOOD_REMOTE_RECRUITMENT_DELAY) ); + } else if( asyncPriorityInfo->get().processClassFitness > ProcessClass::UnsetFit ) { Void _ = wait( delay(SERVER_KNOBS->WAIT_FOR_GOOD_RECRUITMENT_DELAY) ); } diff --git a/fdbserver/LogRouter.actor.cpp b/fdbserver/LogRouter.actor.cpp index 4a81554d1b..a0f76fda5d 100644 --- a/fdbserver/LogRouter.actor.cpp +++ b/fdbserver/LogRouter.actor.cpp @@ -37,17 +37,17 @@ struct LogRouterData { struct TagData : NonCopyable, public ReferenceCounted { std::deque> version_messages; Version popped; - Version knownCommittedVersion; + Version durableKnownCommittedVersion; Tag tag; - TagData( Tag tag, Version popped, Version knownCommittedVersion ) : tag(tag), popped(popped), knownCommittedVersion(knownCommittedVersion) {} + TagData( Tag tag, Version popped, Version durableKnownCommittedVersion ) : tag(tag), popped(popped), durableKnownCommittedVersion(durableKnownCommittedVersion) {} - TagData(TagData&& r) noexcept(true) : version_messages(std::move(r.version_messages)), tag(r.tag), popped(r.popped), knownCommittedVersion(r.knownCommittedVersion) {} + TagData(TagData&& r) noexcept(true) : version_messages(std::move(r.version_messages)), tag(r.tag), popped(r.popped), durableKnownCommittedVersion(r.durableKnownCommittedVersion) {} void operator= (TagData&& r) noexcept(true) { version_messages = std::move(r.version_messages); tag = r.tag; popped = r.popped; - knownCommittedVersion = r.knownCommittedVersion; + durableKnownCommittedVersion = r.durableKnownCommittedVersion; } // Erase messages not needed to update *from* versions >= before (thus, messages with toversion <= before) @@ -79,6 +79,7 @@ struct LogRouterData { NotifiedVersion version; NotifiedVersion minPopped; Version startVersion; + Version minKnownCommittedVersion; Deque>>> messageBlocks; Tag routerTag; bool allowPops; @@ -101,7 +102,7 @@ struct LogRouterData { return newTagData; } - LogRouterData(UID dbgid, InitializeLogRouterRequest req) : dbgid(dbgid), routerTag(req.routerTag), logSystem(new AsyncVar>()), version(req.startVersion-1), minPopped(req.startVersion-1), startVersion(req.startVersion), allowPops(false) { + LogRouterData(UID dbgid, InitializeLogRouterRequest req) : dbgid(dbgid), routerTag(req.routerTag), logSystem(new AsyncVar>()), version(req.startVersion-1), minPopped(req.startVersion-1), startVersion(req.startVersion), allowPops(false), minKnownCommittedVersion(0) { //setup just enough of a logSet to be able to call getPushLocations logSet.logServers.resize(req.tLogLocalities.size()); logSet.tLogPolicy = req.tLogPolicy; @@ -162,7 +163,7 @@ void commitMessages( LogRouterData* self, Version version, const std::vectormessageBlocks.push_back( std::make_pair(version, block) ); @@ -193,6 +194,8 @@ ACTOR Future pullAsyncData( LogRouterData *self ) { } } + self->minKnownCommittedVersion = std::max(self->minKnownCommittedVersion, r->getMinKnownCommittedVersion()); + state Version ver = 0; state std::vector messages; while (true) { @@ -306,6 +309,7 @@ ACTOR Future logRouterPeekMessages( LogRouterData* self, TLogPeekRequest r TLogPeekReply reply; reply.maxKnownVersion = self->version.get(); + reply.minKnownCommittedVersion = self->minKnownCommittedVersion; reply.messages = messages.toStringRef(); reply.popped = self->minPopped.get() >= self->startVersion ? self->minPopped.get() : 0; reply.end = endVersion; @@ -318,10 +322,10 @@ ACTOR Future logRouterPeekMessages( LogRouterData* self, TLogPeekRequest r ACTOR Future logRouterPop( LogRouterData* self, TLogPopRequest req ) { auto tagData = self->getTagData(req.tag); if (!tagData) { - tagData = self->createTagData(req.tag, req.to, req.knownCommittedVersion); + tagData = self->createTagData(req.tag, req.to, req.durableKnownCommittedVersion); } else if (req.to > tagData->popped) { tagData->popped = req.to; - tagData->knownCommittedVersion = req.knownCommittedVersion; + tagData->durableKnownCommittedVersion = req.durableKnownCommittedVersion; Void _ = wait(tagData->eraseMessagesBefore( req.to, self, TaskTLogPop )); } @@ -330,7 +334,7 @@ ACTOR Future logRouterPop( LogRouterData* self, TLogPopRequest req ) { for( auto it : self->tag_data ) { if(it) { minPopped = std::min( it->popped, minPopped ); - minKnownCommittedVersion = std::min( it->knownCommittedVersion, minKnownCommittedVersion ); + minKnownCommittedVersion = std::min( it->durableKnownCommittedVersion, minKnownCommittedVersion ); } } @@ -340,10 +344,7 @@ ACTOR Future logRouterPop( LogRouterData* self, TLogPopRequest req ) { } if(self->logSystem->get() && self->allowPops) { - //The knownCommittedVersion might not be committed on the primary logs, so subtracting max_read_transaction_life_versions will ensure it is committed. - //We then need to subtract max_read_transaction_life_versions again ensure we do not pop below the knownCommittedVersion of the primary logs. - //FIXME: if we get the true knownCommittedVersion when peeking from the primary logs we only need to subtract max_read_transaction_life_versions once. - self->logSystem->get()->pop(minKnownCommittedVersion - 2*SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS, self->routerTag); + self->logSystem->get()->pop(std::min(minKnownCommittedVersion, self->minKnownCommittedVersion), self->routerTag); } req.reply.send(Void()); self->minPopped.set(std::max(minPopped, self->minPopped.get())); diff --git a/fdbserver/LogSystem.h b/fdbserver/LogSystem.h index 373a2ea82b..89a7cb5a43 100644 --- a/fdbserver/LogSystem.h +++ b/fdbserver/LogSystem.h @@ -76,7 +76,8 @@ public: void populateSatelliteTagLocations(int logRouterTags, int oldLogRouterTags) { satelliteTagLocations.clear(); satelliteTagLocations.resize(std::max(logRouterTags,oldLogRouterTags) + 1); - + + std::map server_usedBest; std::set> used_servers; for(int i = 0; i < tLogLocalities.size(); i++) { used_servers.insert(std::make_pair(0,i)); @@ -109,6 +110,16 @@ public: for(auto& entry : resultEntries) { resultPairs.push_back(*serverMap->getObject(entry)); } + int firstBestUsed = server_usedBest[resultPairs[0].second]; + for(int i = 1; i < resultPairs.size(); i++) { + int thisBestUsed = server_usedBest[resultPairs[i].second]; + if(thisBestUsed < firstBestUsed) { + std::swap(resultPairs[0], resultPairs[i]); + firstBestUsed = thisBestUsed; + } + } + server_usedBest[resultPairs[0].second]++; + for(auto& res : resultPairs) { satelliteTagLocations[team].push_back(res.second); used_servers.erase(res); @@ -126,20 +137,31 @@ public: } void checkSatelliteTagLocations() { + std::vector usedBest; std::vector used; + usedBest.resize(tLogLocalities.size()); used.resize(tLogLocalities.size()); for(auto team : satelliteTagLocations) { + usedBest[team[0]]++; for(auto loc : team) { used[loc]++; } } + + int minUsedBest = satelliteTagLocations.size(); + int maxUsedBest = 0; + for(auto i : usedBest) { + minUsedBest = std::min(minUsedBest, i); + maxUsedBest = std::max(maxUsedBest, i); + } + int minUsed = satelliteTagLocations.size(); int maxUsed = 0; for(auto i : used) { minUsed = std::min(minUsed, i); maxUsed = std::max(maxUsed, i); } - TraceEvent(maxUsed - minUsed > 1 ? (g_network->isSimulated() ? SevError : SevWarnAlways) : SevInfo, "CheckSatelliteTagLocations").detail("MinUsed", minUsed).detail("MaxUsed", maxUsed); + TraceEvent(((maxUsed - minUsed > 1) || (maxUsedBest - minUsedBest > 1)) ? (g_network->isSimulated() ? SevError : SevWarnAlways) : SevInfo, "CheckSatelliteTagLocations").detail("MinUsed", minUsed).detail("MaxUsed", maxUsed).detail("MinUsedBest", minUsedBest).detail("MaxUsedBest", maxUsedBest); } int bestLocationFor( Tag tag ) { @@ -314,6 +336,8 @@ struct ILogSystem { // Returns the maximum version known to have been pushed (not necessarily durably) into the log system (0 is always a possible result!) virtual Version getMaxKnownVersion() { return 0; } + virtual Version getMinKnownCommittedVersion() = 0; + virtual void addref() = 0; virtual void delref() = 0; @@ -358,6 +382,7 @@ struct ILogSystem { virtual bool isExhausted(); virtual LogMessageVersion version(); virtual Version popped(); + virtual Version getMinKnownCommittedVersion(); virtual void addref() { ReferenceCounted::addref(); @@ -411,6 +436,7 @@ struct ILogSystem { virtual bool isExhausted(); virtual LogMessageVersion version(); virtual Version popped(); + virtual Version getMinKnownCommittedVersion(); virtual void addref() { ReferenceCounted::addref(); @@ -455,6 +481,7 @@ struct ILogSystem { virtual bool isExhausted(); virtual LogMessageVersion version(); virtual Version popped(); + virtual Version getMinKnownCommittedVersion(); virtual void addref() { ReferenceCounted::addref(); @@ -488,6 +515,7 @@ struct ILogSystem { virtual bool isExhausted(); virtual LogMessageVersion version(); virtual Version popped(); + virtual Version getMinKnownCommittedVersion(); virtual void addref() { ReferenceCounted::addref(); @@ -516,7 +544,7 @@ struct ILogSystem { // Never returns normally, but throws an error if the subsystem stops working //Future push( UID bundle, int64_t seq, VectorRef messages ); - virtual Future push( Version prevVersion, Version version, Version knownCommittedVersion, struct LogPushData& data, Optional debugID = Optional() ) = 0; + virtual Future push( Version prevVersion, Version version, Version knownCommittedVersion, Version minKnownCommittedVersion, struct LogPushData& data, Optional debugID = Optional() ) = 0; // Waits for the version number of the bundle (in this epoch) to be prevVersion (i.e. for all pushes ordered earlier) // Puts the given messages into the bundle, each with the given tags, and with message versions (version, 0) - (version, N) // Changes the version number of the bundle to be version (unblocking the next push) @@ -535,7 +563,7 @@ struct ILogSystem { // Same contract as peek(), but blocks until the preferred log server(s) for the given tag are available (and is correspondingly less expensive) virtual Reference peekLogRouter( UID dbgid, Version begin, Tag tag ) = 0; - // Same contract as peek(), but can only peek from the logs elected in the same generation. + // Same contract as peek(), but can only peek from the logs elected in the same generation. // If the preferred log server is down, a different log from the same generation will merge results locally before sending them to the log router. virtual void pop( Version upTo, Tag tag, Version knownCommittedVersion = 0, int8_t popLocality = tagLocalityInvalid ) = 0; @@ -548,8 +576,8 @@ struct ILogSystem { virtual Future endEpoch() = 0; // Ends the current epoch without starting a new one - static Reference fromServerDBInfo( UID const& dbgid, struct ServerDBInfo const& db, bool usePreviousEpochEnd = false, Optional>> addActor = Optional>>() ); - static Reference fromLogSystemConfig( UID const& dbgid, struct LocalityData const&, struct LogSystemConfig const&, bool excludeRemote = false, bool usePreviousEpochEnd = false, Optional>> addActor = Optional>>() ); + static Reference fromServerDBInfo( UID const& dbgid, struct ServerDBInfo const& db, bool useRecoveredAt = false, Optional>> addActor = Optional>>() ); + static Reference fromLogSystemConfig( UID const& dbgid, struct LocalityData const&, struct LogSystemConfig const&, bool excludeRemote = false, bool useRecoveredAt = false, Optional>> addActor = Optional>>() ); // Constructs a new ILogSystem implementation from the given ServerDBInfo/LogSystemConfig. Might return a null reference if there isn't a fully recovered log system available. // The caller can peek() the returned log system and can push() if it has version numbers reserved for it and prevVersions diff --git a/fdbserver/LogSystemConfig.h b/fdbserver/LogSystemConfig.h index 9a5b863751..7ef5b6e34a 100644 --- a/fdbserver/LogSystemConfig.h +++ b/fdbserver/LogSystemConfig.h @@ -156,11 +156,11 @@ struct LogSystemConfig { int32_t expectedLogSets; UID recruitmentID; bool stopped; - Optional previousEpochEndVersion; + Optional recoveredAt; LogSystemConfig() : logSystemType(0), logRouterTags(0), expectedLogSets(0), stopped(false) {} - std::string toString() const { + std::string toString() const { return format("type: %d oldGenerations: %d tags: %d %s", logSystemType, oldTLogs.size(), logRouterTags, describe(tLogs).c_str()); } @@ -217,7 +217,7 @@ struct LogSystemConfig { bool operator == ( const LogSystemConfig& rhs ) const { return isEqual(rhs); } bool isEqual(LogSystemConfig const& r) const { - return logSystemType == r.logSystemType && tLogs == r.tLogs && oldTLogs == r.oldTLogs && expectedLogSets == r.expectedLogSets && logRouterTags == r.logRouterTags && recruitmentID == r.recruitmentID && stopped == r.stopped && previousEpochEndVersion == r.previousEpochEndVersion; + return logSystemType == r.logSystemType && tLogs == r.tLogs && oldTLogs == r.oldTLogs && expectedLogSets == r.expectedLogSets && logRouterTags == r.logRouterTags && recruitmentID == r.recruitmentID && stopped == r.stopped && recoveredAt == r.recoveredAt; } bool isEqualIds(LogSystemConfig const& r) const { @@ -248,7 +248,7 @@ struct LogSystemConfig { template void serialize( Ar& ar ) { - ar & logSystemType & tLogs & logRouterTags & oldTLogs & expectedLogSets & recruitmentID & stopped & previousEpochEndVersion; + ar & logSystemType & tLogs & logRouterTags & oldTLogs & expectedLogSets & recruitmentID & stopped & recoveredAt; } }; diff --git a/fdbserver/LogSystemPeekCursor.actor.cpp b/fdbserver/LogSystemPeekCursor.actor.cpp index eab21c6783..8fb40ecf46 100644 --- a/fdbserver/LogSystemPeekCursor.actor.cpp +++ b/fdbserver/LogSystemPeekCursor.actor.cpp @@ -26,6 +26,7 @@ ILogSystem::ServerPeekCursor::ServerPeekCursor( Reference>> const& interf, Tag tag, Version begin, Version end, bool returnIfBlocked, bool parallelGetMore ) : interf(interf), tag(tag), messageVersion(begin), end(end), hasMsg(false), rd(results.arena, results.messages, Unversioned()), randomID(g_random->randomUniqueID()), poppedVersion(0), returnIfBlocked(returnIfBlocked), sequence(0), parallelGetMore(parallelGetMore) { this->results.maxKnownVersion = 0; + this->results.minKnownCommittedVersion = 0; //TraceEvent("SPC_Starting", randomID).detail("Tag", tag.toString()).detail("Begin", begin).detail("End", end).backtrace(); } @@ -34,6 +35,7 @@ ILogSystem::ServerPeekCursor::ServerPeekCursor( TLogPeekReply const& results, Lo { //TraceEvent("SPC_Clone", randomID); this->results.maxKnownVersion = 0; + this->results.minKnownCommittedVersion = 0; if(hasMsg) nextMessage(); @@ -141,6 +143,7 @@ ACTOR Future serverPeekParallelGetMore( ILogSystem::ServerPeekCursor* self } loop { + state Version expectedBegin = self->messageVersion.version; try { while(self->futureResults.size() < SERVER_KNOBS->PARALLEL_GET_MORE_REQUESTS && self->interf->get().present()) { self->futureResults.push_back( brokenPromiseToNever( self->interf->get().interf().peekMessages.getReply(TLogPeekRequest(self->messageVersion.version,self->tag,self->returnIfBlocked, std::make_pair(self->randomID, self->sequence++)), taskID) ) ); @@ -148,6 +151,10 @@ ACTOR Future serverPeekParallelGetMore( ILogSystem::ServerPeekCursor* self choose { when( TLogPeekReply res = wait( self->interf->get().present() ? self->futureResults.front() : Never() ) ) { + if(res.begin.get() != expectedBegin) { + throw timed_out(); + } + expectedBegin = res.end; self->futureResults.pop_front(); self->results = res; if(res.popped.present()) @@ -254,6 +261,8 @@ bool ILogSystem::ServerPeekCursor::isExhausted() { LogMessageVersion ILogSystem::ServerPeekCursor::version() { return messageVersion; } // Call only after nextMessage(). The sequence of the current message, or results.end if nextMessage() has returned false. +Version ILogSystem::ServerPeekCursor::getMinKnownCommittedVersion() { return results.minKnownCommittedVersion; } + Version ILogSystem::ServerPeekCursor::popped() { return poppedVersion; } ILogSystem::MergedPeekCursor::MergedPeekCursor( vector< Reference > const& serverCursors, Version begin, bool collectTags ) @@ -485,6 +494,10 @@ bool ILogSystem::MergedPeekCursor::isExhausted() { LogMessageVersion ILogSystem::MergedPeekCursor::version() { return messageVersion; } +Version ILogSystem::MergedPeekCursor::getMinKnownCommittedVersion() { + return serverCursors[currentCursor]->getMinKnownCommittedVersion(); +} + Version ILogSystem::MergedPeekCursor::popped() { Version poppedVersion = 0; for (auto& c : serverCursors) @@ -776,6 +789,10 @@ bool ILogSystem::SetPeekCursor::isExhausted() { LogMessageVersion ILogSystem::SetPeekCursor::version() { return messageVersion; } +Version ILogSystem::SetPeekCursor::getMinKnownCommittedVersion() { + return serverCursors[currentSet][currentCursor]->getMinKnownCommittedVersion(); +} + Version ILogSystem::SetPeekCursor::popped() { Version poppedVersion = 0; for (auto& cursors : serverCursors) { @@ -858,6 +875,10 @@ LogMessageVersion ILogSystem::MultiCursor::version() { return cursors.back()->version(); } +Version ILogSystem::MultiCursor::getMinKnownCommittedVersion() { + return cursors.back()->getMinKnownCommittedVersion(); +} + Version ILogSystem::MultiCursor::popped() { return std::max(poppedVersion, cursors.back()->popped()); } diff --git a/fdbserver/MasterProxyServer.actor.cpp b/fdbserver/MasterProxyServer.actor.cpp index 64168b25f7..68c8babd3c 100644 --- a/fdbserver/MasterProxyServer.actor.cpp +++ b/fdbserver/MasterProxyServer.actor.cpp @@ -175,7 +175,8 @@ struct ProxyCommitData { LogSystemDiskQueueAdapter* logAdapter; Reference logSystem; IKeyValueStore* txnStateStore; - NotifiedVersion committedVersion; // Provided that this recovery has succeeded or will succeed, this version is fully committed (durable) + NotifiedVersion committedVersion; // Provided that this recovery has succeeded or will succeed, this version is fully committed (durable) + Version minKnownCommittedVersion; // No version smaller than this one will be used as the known committed version during recovery Version version; // The version at which txnStateStore is up to date Promise validState; // Set once txnStateStore and version are valid double lastVersionTime; @@ -223,9 +224,9 @@ struct ProxyCommitData { } ProxyCommitData(UID dbgid, MasterInterface master, RequestStream getConsistentReadVersion, Version recoveryTransactionVersion, RequestStream commit, Reference> db, bool firstProxy) - : dbgid(dbgid), stats(dbgid, &version, &committedVersion, &commitBatchesMemBytesCount), master(master), + : dbgid(dbgid), stats(dbgid, &version, &committedVersion, &commitBatchesMemBytesCount), master(master), logAdapter(NULL), txnStateStore(NULL), - committedVersion(recoveryTransactionVersion), version(0), + committedVersion(recoveryTransactionVersion), version(0), minKnownCommittedVersion(0), lastVersionTime(0), commitVersionRequestNumber(1), mostRecentProcessedRequestNumber(0), getConsistentReadVersion(getConsistentReadVersion), commit(commit), lastCoalesceTime(0), localCommitBatchesStarted(0), locked(false), firstProxy(firstProxy), @@ -794,8 +795,7 @@ ACTOR Future commitBatch( if ( prevVersion && commitVersion - prevVersion < SERVER_KNOBS->MAX_VERSIONS_IN_FLIGHT/2 ) debug_advanceMaxCommittedVersion(UID(), commitVersion); - Future loggingComplete = self->logSystem->push( prevVersion, commitVersion, self->committedVersion.get(), toCommit, debugID ) - || self->committedVersion.whenAtLeast( commitVersion+1 ); + Future loggingComplete = self->logSystem->push( prevVersion, commitVersion, self->committedVersion.get(), self->minKnownCommittedVersion, toCommit, debugID ); if (!forceRecovery) { ASSERT(self->latestLocalCommitBatchLogging.get() == localBatchNumber-1); @@ -803,12 +803,25 @@ ACTOR Future commitBatch( } /////// Phase 4: Logging (network bound; pipelined up to MAX_READ_TRANSACTION_LIFE_VERSIONS (limited by loop above)) - Void _ = wait(loggingComplete); + + try { + choose { + when(Version ver = wait(loggingComplete)) { + self->minKnownCommittedVersion = std::max(self->minKnownCommittedVersion, ver); + } + when(Void _ = wait(self->committedVersion.whenAtLeast( commitVersion+1 ))) {} + } + } catch(Error &e) { + if(e.code() == error_code_broken_promise) { + throw master_tlog_failed(); + } + throw; + } Void _ = wait(yield()); self->logSystem->pop(msg.popTo, txsTag); - /////// Phase 5: Replies (CPU bound; no particular order required, though ordered execution would be best for latency) + /////// Phase 5: Replies (CPU bound; no particular order required, though ordered execution would be best for latency) if ( prevVersion && commitVersion - prevVersion < SERVER_KNOBS->MAX_VERSIONS_IN_FLIGHT/2 ) debug_advanceMinCommittedVersion(UID(), commitVersion); @@ -826,7 +839,7 @@ ACTOR Future commitBatch( if( commitVersion > self->committedVersion.get() ) { self->locked = lockedAfter; self->committedVersion.set(commitVersion); - } + } if (forceRecovery) { TraceEvent(SevWarn, "RestartingTxnSubsystem", self->dbgid).detail("Stage", "ProxyShutdown"); @@ -1170,7 +1183,7 @@ ACTOR Future masterProxyServerCore( state Future lastCommitComplete = Void(); state PromiseStream> addActor; - state Future onError = actorCollection(addActor.getFuture()); + state Future onError = transformError( actorCollection(addActor.getFuture()), broken_promise(), master_tlog_failed() ); state double lastCommit = 0; state std::set txnSequences; state Sequence maxSequence = std::numeric_limits::max(); diff --git a/fdbserver/OldTLogServer.actor.cpp b/fdbserver/OldTLogServer.actor.cpp index 2fd7710554..cb19a31118 100644 --- a/fdbserver/OldTLogServer.actor.cpp +++ b/fdbserver/OldTLogServer.actor.cpp @@ -952,6 +952,7 @@ namespace oldTLog { TLogPeekReply reply; reply.maxKnownVersion = logData->version.get(); + reply.minKnownCommittedVersion = 0; if(poppedVer > req.begin) { reply.popped = poppedVer; reply.end = poppedVer; @@ -974,6 +975,7 @@ namespace oldTLog { } else { sequenceData.send(reply.end); } + reply.begin = req.begin; } req.reply.send( reply ); diff --git a/fdbserver/QuietDatabase.actor.cpp b/fdbserver/QuietDatabase.actor.cpp index d5e288e6cb..29d1607aaf 100644 --- a/fdbserver/QuietDatabase.actor.cpp +++ b/fdbserver/QuietDatabase.actor.cpp @@ -68,13 +68,13 @@ ACTOR Future getMasterWorker( Database cx, Reference getDataInFlight( Database cx, WorkerInterface masterWorker ) { try { TraceEvent("DataInFlight").detail("Database", printable(cx->dbName)).detail("Stage", "ContactingMaster"); - Standalone md = wait( timeoutError(masterWorker.eventLogRequest.getReply( + TraceEventFields md = wait( timeoutError(masterWorker.eventLogRequest.getReply( EventLogRequest( StringRef( cx->dbName.toString() + "/TotalDataInFlight" ) ) ), 1.0 ) ); int64_t dataInFlight; - sscanf(extractAttribute(md.toString(), "TotalBytes").c_str(), "%lld", &dataInFlight); + sscanf(md.getValue("TotalBytes").c_str(), "%lld", &dataInFlight); return dataInFlight; } catch( Error &e ) { - TraceEvent("QuietDatabaseFailure", masterWorker.id()).detail("Reason", "Failed to extract DataInFlight"); + TraceEvent("QuietDatabaseFailure", masterWorker.id()).detail("Reason", "Failed to extract DataInFlight").error(e); throw; } @@ -89,13 +89,13 @@ ACTOR Future getDataInFlight( Database cx, Reference md ) { +int64_t getQueueSize( TraceEventFields md ) { double inputRate, durableRate; double inputRoughness, durableRoughness; int64_t inputBytes, durableBytes; - sscanf(extractAttribute(md.toString(), "BytesInput").c_str(), "%lf %lf %lld", &inputRate, &inputRoughness, &inputBytes); - sscanf(extractAttribute(md.toString(), "BytesDurable").c_str(), "%lf %lf %lld", &durableRate, &durableRoughness, &durableBytes); + sscanf(md.getValue("BytesInput").c_str(), "%lf %lf %lld", &inputRate, &inputRoughness, &inputBytes); + sscanf(md.getValue("BytesDurable").c_str(), "%lf %lf %lld", &durableRate, &durableRoughness, &durableBytes); return inputBytes - durableBytes; } @@ -110,7 +110,7 @@ ACTOR Future getMaxTLogQueueSize( Database cx, Reference>> messages; + state std::vector> messages; state std::vector tlogs = dbInfo->get().logSystemConfig.allPresentLogs(); for(int i = 0; i < tlogs.size(); i++) { auto itr = workersMap.find(tlogs[i].address()); @@ -182,7 +182,7 @@ ACTOR Future getMaxStorageServerQueueSize( Database cx, Reference>> messages; + state std::vector> messages; for(int i = 0; i < servers.size(); i++) { auto itr = workersMap.find(servers[i].address()); if(itr == workersMap.end()) { @@ -224,17 +224,17 @@ ACTOR Future getDataDistributionQueueSize( Database cx, WorkerInterface try { TraceEvent("DataDistributionQueueSize").detail("Database", printable(cx->dbName)).detail("Stage", "ContactingMaster"); - Standalone movingDataMessage = wait( timeoutError(masterWorker.eventLogRequest.getReply( + TraceEventFields movingDataMessage = wait( timeoutError(masterWorker.eventLogRequest.getReply( EventLogRequest( StringRef( cx->dbName.toString() + "/MovingData") ) ), 1.0 ) ); - TraceEvent("DataDistributionQueueSize").detail("Database", printable(cx->dbName)).detail("Stage", "GotString").detail("Result", printable(movingDataMessage)).detail("TrackLatest", printable( StringRef( cx->dbName.toString() + "/MovingData") ) ); + TraceEvent("DataDistributionQueueSize").detail("Database", printable(cx->dbName)).detail("Stage", "GotString")/*.detail("Result", printable(movingDataMessage))*/.detail("TrackLatest", printable( StringRef( cx->dbName.toString() + "/MovingData") ) ); int64_t inQueue; - sscanf(extractAttribute(movingDataMessage.toString(), "InQueue").c_str(), "%lld", &inQueue); + sscanf(movingDataMessage.getValue("InQueue").c_str(), "%lld", &inQueue); if(reportInFlight) { int64_t inFlight; - sscanf(extractAttribute(movingDataMessage.toString(), "InFlight").c_str(), "%lld", &inFlight); + sscanf(movingDataMessage.getValue("InFlight").c_str(), "%lld", &inFlight); inQueue += inFlight; } @@ -258,10 +258,10 @@ ACTOR Future getDataDistributionActive( Database cx, WorkerInterface maste try { TraceEvent("DataDistributionActive").detail("Database", printable(cx->dbName)).detail("Stage", "ContactingMaster"); - Standalone activeMessage = wait( timeoutError(masterWorker.eventLogRequest.getReply( + TraceEventFields activeMessage = wait( timeoutError(masterWorker.eventLogRequest.getReply( EventLogRequest( StringRef( cx->dbName.toString() + "/DDTrackerStarting") ) ), 1.0 ) ); - return extractAttribute(activeMessage.toString(), "State") == "Active"; + return activeMessage.getValue("State") == "Active"; } catch( Error &e ) { TraceEvent("QuietDatabaseFailure", masterWorker.id()).detail("Reason", "Failed to extract DataDistributionActive"); throw; @@ -273,10 +273,10 @@ ACTOR Future getStorageServersRecruiting( Database cx, ReferencedbName)).detail("Stage", "ContactingMaster"); - Standalone recruitingMessage = wait( timeoutError(masterWorker.eventLogRequest.getReply( + TraceEventFields recruitingMessage = wait( timeoutError(masterWorker.eventLogRequest.getReply( EventLogRequest( StringRef( cx->dbName.toString() + "/StorageServerRecruitment_" + dbInfo->get().master.id().toString()) ) ), 1.0 ) ); - return extractAttribute(recruitingMessage.toString(), "State") == "Recruiting"; + return recruitingMessage.getValue("State") == "Recruiting"; } catch( Error &e ) { TraceEvent("QuietDatabaseFailure", masterWorker.id()).detail("Reason", "Failed to extract StorageServersRecruiting").detail("MasterID", dbInfo->get().master.id()); throw; diff --git a/fdbserver/SimulatedCluster.actor.cpp b/fdbserver/SimulatedCluster.actor.cpp index 747eb4cd7a..7ffebfc50d 100644 --- a/fdbserver/SimulatedCluster.actor.cpp +++ b/fdbserver/SimulatedCluster.actor.cpp @@ -119,6 +119,7 @@ T simulate( const T& in ) { static void simInitTLS(Reference tlsOptions) { tlsOptions->set_cert_data( certBytes ); tlsOptions->set_key_data( certBytes ); + tlsOptions->set_verify_peers(std::vector(1, "Check.Valid=0")); tlsOptions->register_network(); } @@ -864,6 +865,7 @@ void SimulationConfig::generateNormalConfig(int minimumReplication) { ASSERT(false); // Programmer forgot to adjust cases. } + if (g_random->random01() < 0.25) db.desiredLogRouterCount = g_random->randomInt(1,7); if (g_random->random01() < 0.25) db.remoteDesiredTLogCount = g_random->randomInt(1,7); } @@ -935,25 +937,16 @@ void setupSimulatedSystem( vector> *systemActors, std::string baseF g_simulator.remoteTLogPolicy = simconfig.db.getRemoteTLogPolicy(); g_simulator.usableRegions = simconfig.db.usableRegions; - if(simconfig.db.regions.size() == 2) { - g_simulator.primaryDcId = simconfig.db.regions[0].dcId; - g_simulator.remoteDcId = simconfig.db.regions[1].dcId; - g_simulator.hasSatelliteReplication = simconfig.db.regions[0].satelliteTLogReplicationFactor > 0; - ASSERT((!simconfig.db.regions[0].satelliteTLogPolicy && !simconfig.db.regions[1].satelliteTLogPolicy) || simconfig.db.regions[0].satelliteTLogPolicy->info() == simconfig.db.regions[1].satelliteTLogPolicy->info()); - g_simulator.satelliteTLogPolicy = simconfig.db.regions[0].satelliteTLogPolicy; - g_simulator.satelliteTLogWriteAntiQuorum = simconfig.db.regions[0].satelliteTLogWriteAntiQuorum; - - for(auto s : simconfig.db.regions[0].satellites) { - g_simulator.primarySatelliteDcIds.push_back(s.dcId); - } - for(auto s : simconfig.db.regions[1].satellites) { - g_simulator.remoteSatelliteDcIds.push_back(s.dcId); - } - } else if(simconfig.db.regions.size() == 1) { + if(simconfig.db.regions.size() > 0) { g_simulator.primaryDcId = simconfig.db.regions[0].dcId; g_simulator.hasSatelliteReplication = simconfig.db.regions[0].satelliteTLogReplicationFactor > 0; - g_simulator.satelliteTLogPolicy = simconfig.db.regions[0].satelliteTLogPolicy; - g_simulator.satelliteTLogWriteAntiQuorum = simconfig.db.regions[0].satelliteTLogWriteAntiQuorum; + if(simconfig.db.regions[0].satelliteTLogUsableDcsFallback > 0) { + g_simulator.satelliteTLogPolicy = simconfig.db.regions[0].satelliteTLogPolicyFallback; + g_simulator.satelliteTLogWriteAntiQuorum = simconfig.db.regions[0].satelliteTLogWriteAntiQuorumFallback; + } else { + g_simulator.satelliteTLogPolicy = simconfig.db.regions[0].satelliteTLogPolicy; + g_simulator.satelliteTLogWriteAntiQuorum = simconfig.db.regions[0].satelliteTLogWriteAntiQuorum; + } for(auto s : simconfig.db.regions[0].satellites) { g_simulator.primarySatelliteDcIds.push_back(s.dcId); @@ -962,7 +955,16 @@ void setupSimulatedSystem( vector> *systemActors, std::string baseF g_simulator.hasSatelliteReplication = false; g_simulator.satelliteTLogWriteAntiQuorum = 0; } - + + if(simconfig.db.regions.size() == 2) { + g_simulator.remoteDcId = simconfig.db.regions[1].dcId; + ASSERT((!simconfig.db.regions[0].satelliteTLogPolicy && !simconfig.db.regions[1].satelliteTLogPolicy) || simconfig.db.regions[0].satelliteTLogPolicy->info() == simconfig.db.regions[1].satelliteTLogPolicy->info()); + + for(auto s : simconfig.db.regions[1].satellites) { + g_simulator.remoteSatelliteDcIds.push_back(s.dcId); + } + } + ASSERT(g_simulator.storagePolicy && g_simulator.tLogPolicy); ASSERT(!g_simulator.hasSatelliteReplication || g_simulator.satelliteTLogPolicy); TraceEvent("SimulatorConfig").detail("ConfigString", printable(StringRef(startingConfigString))); diff --git a/fdbserver/Status.actor.cpp b/fdbserver/Status.actor.cpp index a223fb7f3d..680cc4e095 100644 --- a/fdbserver/Status.actor.cpp +++ b/fdbserver/Status.actor.cpp @@ -75,137 +75,28 @@ extern int limitReasonEnd; extern const char* limitReasonName[]; extern const char* limitReasonDesc[]; -// Returns -1 if it fails to find a quoted string at the start of xml; returns the position beyond the close quote -// If decoded is not NULL, writes the decoded attribute value there -int decodeQuotedAttributeValue( StringRef xml, std::string* decoded ) { - if (decoded) decoded->clear(); - if (!xml.size() || xml[0] != '"') return -1; - int pos = 1; +struct WorkerEvents : std::map {}; - loop { - if (pos == xml.size()) return -1; // No closing quote - if (xml[pos]=='"') { pos++; break; } // Success - - uint8_t out = xml[pos]; - if (xml[pos] == '&') { - if (xml.substr(pos).startsWith(LiteralStringRef("&"))) { out = '&'; pos += 5; } - else if (xml.substr(pos).startsWith(LiteralStringRef("<"))) { out = '<'; pos += 4; } - else if (xml.substr(pos).startsWith(LiteralStringRef("""))) { out = '"'; pos += 6; } - else return -1; - } else - pos++; - if (decoded) decoded->push_back(out); - } - - return pos; -} - -// return false on failure; outputs decoded attribute value to `ret` -bool tryExtractAttribute( StringRef expanded, StringRef attributeToExtract, std::string& ret ) { - // This is only expected to parse the XML that Trace.cpp actually generates; we haven't looked at the standard to even find out what it doesn't try to do - - int pos = 0; - // Consume '<' - if (pos == expanded.size() || expanded[pos] != '<') return false; - pos++; - // Consume tag name - while (pos != expanded.size() && expanded[pos] != ' ' && expanded[pos] != '/' && expanded[pos] != '>') pos++; - - while (pos != expanded.size() && expanded[pos] != '>' && expanded[pos] != '/') { - // Consume whitespace - while (pos != expanded.size() && expanded[pos] == ' ') pos++; - - // We should be looking at an attribute or the end of the string; find '=' at the end of the attribute, if any - int eq_or_end = pos; - while (eq_or_end != expanded.size() && expanded[eq_or_end]!='=' && expanded[eq_or_end]!='>') eq_or_end++; - - if ( expanded.substr(pos, eq_or_end-pos) == attributeToExtract ) { - // Found the attribute we want; decode the value - int end = decodeQuotedAttributeValue(expanded.substr(eq_or_end+1), &ret); - if (end<0) { ret.clear(); return false; } - return true; - } - - // We don't want this attribute, but we need to skip over its value - // It looks like this *could* just be a scan for '"' characters - int end = decodeQuotedAttributeValue(expanded.substr(eq_or_end+1), NULL); - if (end<0) return false; - pos = (eq_or_end+1)+end; - } - return false; -} - -// Throws attribute_not_found if the key is not found -std::string extractAttribute( StringRef expanded, StringRef attributeToExtract ) { - std::string ret; - if (!tryExtractAttribute(expanded, attributeToExtract, ret)) - throw attribute_not_found(); - return ret; -} -std::string extractAttribute( std::string const& expanded, std::string const& attributeToExtract ) { - return extractAttribute(StringRef(expanded), StringRef(attributeToExtract)); -} - -TEST_CASE("fdbserver/Status/extractAttribute/basic") { - std::string a; - - ASSERT( tryExtractAttribute( - LiteralStringRef(""), - LiteralStringRef("A"), - a) && a == LiteralStringRef("\"a\"")); - - ASSERT( tryExtractAttribute( - LiteralStringRef(""), - LiteralStringRef("B"), - a) && a == LiteralStringRef("\\") ); - - ASSERT( tryExtractAttribute( - LiteralStringRef(""), - LiteralStringRef("Version"), - a) && a == LiteralStringRef("3.0.0-PRERELEASE") ); - - ASSERT( !tryExtractAttribute( - LiteralStringRef(""), - LiteralStringRef("ersion"), - a) ); - - return Void(); -} - -TEST_CASE("fdbserver/Status/extractAttribute/fuzz") { - // This is just looking for anything that crashes or infinite loops - std::string out; - for(int i=0; i<100000; i++) - { - std::string s = ""; - s[ g_random->randomInt(0, s.size()) ] = g_random->randomChoice(LiteralStringRef("\" =q0\\&")); - tryExtractAttribute(s, LiteralStringRef("Version"), out); - } - return Void(); -} - -struct WorkerEvents : std::map {}; - -ACTOR static Future< Optional > latestEventOnWorker(WorkerInterface worker, std::string eventName) { +ACTOR static Future< Optional > latestEventOnWorker(WorkerInterface worker, std::string eventName) { try { EventLogRequest req = eventName.size() > 0 ? EventLogRequest(Standalone(eventName)) : EventLogRequest(); - ErrorOr> eventTrace = wait( errorOr(timeoutError(worker.eventLogRequest.getReply(req), 2.0))); + ErrorOr eventTrace = wait( errorOr(timeoutError(worker.eventLogRequest.getReply(req), 2.0))); if (eventTrace.isError()){ - return Optional(); + return Optional(); } - return eventTrace.get().toString(); + return eventTrace.get(); } catch (Error &e){ if (e.code() == error_code_actor_cancelled) throw; - return Optional(); + return Optional(); } } ACTOR static Future< Optional< std::pair> > > latestEventOnWorkers(std::vector> workers, std::string eventName) { try { - state vector>>> eventTraces; + state vector>> eventTraces; for (int c = 0; c < workers.size(); c++) { EventLogRequest req = eventName.size() > 0 ? EventLogRequest(Standalone(eventName)) : EventLogRequest(); eventTraces.push_back(errorOr(timeoutError(workers[c].first.eventLogRequest.getReply(req), 2.0))); @@ -217,13 +108,13 @@ ACTOR static Future< Optional< std::pair> > WorkerEvents results; for (int i = 0; i < eventTraces.size(); i++) { - ErrorOr> v = eventTraces[i].get(); + const ErrorOr& v = eventTraces[i].get(); if (v.isError()){ failed.insert(workers[i].first.address().toString()); - results[workers[i].first.address()] = ""; + results[workers[i].first.address()] = TraceEventFields(); } else { - results[workers[i].first.address()] = v.get().toString(); + results[workers[i].first.address()] = v.get(); } } @@ -342,21 +233,21 @@ static StatusObject getLocalityInfo(const LocalityData& locality) { return localityObj; } -static StatusObject getError(std::string error) { +static StatusObject getError(const TraceEventFields& errorFields) { StatusObject statusObj; try { - if (error.size()) { - double time = atof(extractAttribute(error, "Time").c_str()); + if (errorFields.size()) { + double time = atof(errorFields.getValue("Time").c_str()); statusObj["time"] = time; - statusObj["raw_log_message"] = error; + statusObj["raw_log_message"] = errorFields.toString(); - std::string type = extractAttribute(error, "Type"); + std::string type = errorFields.getValue("Type"); statusObj["type"] = type; std::string description = type; std::string errorName; - if (tryExtractAttribute(error, LiteralStringRef("Error"), errorName)) { + if(errorFields.tryGetValue("Error", errorName)) { statusObj["name"] = errorName; description += ": " + errorName; } @@ -374,7 +265,7 @@ static StatusObject getError(std::string error) { } } catch (Error &e){ - TraceEvent(SevError, "StatusGetErrorError").error(e).detail("RawError", error); + TraceEvent(SevError, "StatusGetErrorError").error(e).detail("RawError", errorFields.toString()); } return statusObj; } @@ -385,7 +276,7 @@ static StatusObject machineStatusFetcher(WorkerEvents mMetrics, vector dcIds; std::map locality; for (auto worker : workers){ @@ -401,12 +292,12 @@ static StatusObject machineStatusFetcher(WorkerEvents mMetrics, vectorsecond; + const TraceEventFields& event = it->second; try { std::string address = toIPString(it->first.ip); // We will use the "physical" caluculated machine ID here to limit exposure to machineID repurposing - std::string machineId = extractAttribute(event, "MachineID"); + std::string machineId = event.getValue("MachineID"); // If this machine ID does not already exist in the machineMap, add it if (!machineMap.count(machineId)) { @@ -424,23 +315,23 @@ static StatusObject machineStatusFetcher(WorkerEvents mMetrics, vector 0){ @@ -451,17 +342,17 @@ static StatusObject machineStatusFetcher(WorkerEvents mMetrics, vector 0){ retransSegsObj["hz"] = metric / elapsed; @@ -512,50 +403,62 @@ struct RolesInfo { obj["role"] = role; return roles.insert( make_pair(address, obj ))->second; } - StatusObject& addRole(std::string const& role, StorageServerInterface& iface, std::string const& metrics, Version maxTLogVersion) { + StatusObject& addRole(std::string const& role, StorageServerInterface& iface, TraceEventFields const& metrics, Version maxTLogVersion) { StatusObject obj; obj["id"] = iface.id().shortString(); obj["role"] = role; try { - obj["stored_bytes"] = parseInt64(extractAttribute(metrics, "BytesStored")); - obj["kvstore_used_bytes"] = parseInt64(extractAttribute(metrics, "KvstoreBytesUsed")); - obj["kvstore_free_bytes"] = parseInt64(extractAttribute(metrics, "KvstoreBytesFree")); - obj["kvstore_available_bytes"] = parseInt64(extractAttribute(metrics, "KvstoreBytesAvailable")); - obj["kvstore_total_bytes"] = parseInt64(extractAttribute(metrics, "KvstoreBytesTotal")); - obj["input_bytes"] = parseCounter(extractAttribute(metrics, "BytesInput")); - obj["durable_bytes"] = parseCounter(extractAttribute(metrics, "BytesDurable")); - obj["query_queue_max"] = parseInt(extractAttribute(metrics, "QueryQueueMax")); - obj["finished_queries"] = parseCounter(extractAttribute(metrics, "FinishedQueries")); + obj["stored_bytes"] = parseInt64(metrics.getValue("BytesStored")); + obj["kvstore_used_bytes"] = parseInt64(metrics.getValue("KvstoreBytesUsed")); + obj["kvstore_free_bytes"] = parseInt64(metrics.getValue("KvstoreBytesFree")); + obj["kvstore_available_bytes"] = parseInt64(metrics.getValue("KvstoreBytesAvailable")); + obj["kvstore_total_bytes"] = parseInt64(metrics.getValue("KvstoreBytesTotal")); + obj["input_bytes"] = parseCounter(metrics.getValue("BytesInput")); + obj["durable_bytes"] = parseCounter(metrics.getValue("BytesDurable")); + obj["query_queue_max"] = parseInt(metrics.getValue("QueryQueueMax")); + obj["finished_queries"] = parseCounter(metrics.getValue("FinishedQueries")); - Version version = parseInt64(extractAttribute(metrics, "Version")); + Version version = parseInt64(metrics.getValue("Version")); obj["data_version"] = version; + int64_t versionLag = parseInt64(metrics.getValue("VersionLag")); if(maxTLogVersion > 0) { - obj["data_version_lag"] = std::max(0, maxTLogVersion - version); + // It's possible that the storage server hasn't talked to the logs recently, in which case it may not be aware of how far behind it is. + // To account for that, we also compute the version difference between each storage server and the tlog with the largest version. + // + // Because this data is only logged periodically, this difference will likely be an overestimate for the lag. We subtract off the logging interval + // in order to make this estimate a bounded underestimate instead. + versionLag = std::max(versionLag, maxTLogVersion - version - SERVER_KNOBS->STORAGE_LOGGING_DELAY * SERVER_KNOBS->VERSIONS_PER_SECOND); } + StatusObject dataLag; + dataLag["versions"] = versionLag; + dataLag["seconds"] = versionLag / (double)SERVER_KNOBS->VERSIONS_PER_SECOND; + + obj["data_lag"] = dataLag; + } catch (Error& e) { if(e.code() != error_code_attribute_not_found) throw e; } return roles.insert( make_pair(iface.address(), obj ))->second; } - StatusObject& addRole(std::string const& role, TLogInterface& iface, std::string const& metrics) { + StatusObject& addRole(std::string const& role, TLogInterface& iface, TraceEventFields const& metrics) { StatusObject obj; obj["id"] = iface.id().shortString(); obj["role"] = role; try { - obj["kvstore_used_bytes"] = parseInt64(extractAttribute(metrics, "KvstoreBytesUsed")); - obj["kvstore_free_bytes"] = parseInt64(extractAttribute(metrics, "KvstoreBytesFree")); - obj["kvstore_available_bytes"] = parseInt64(extractAttribute(metrics, "KvstoreBytesAvailable")); - obj["kvstore_total_bytes"] = parseInt64(extractAttribute(metrics, "KvstoreBytesTotal")); - obj["queue_disk_used_bytes"] = parseInt64(extractAttribute(metrics, "QueueDiskBytesUsed")); - obj["queue_disk_free_bytes"] = parseInt64(extractAttribute(metrics, "QueueDiskBytesFree")); - obj["queue_disk_available_bytes"] = parseInt64(extractAttribute(metrics, "QueueDiskBytesAvailable")); - obj["queue_disk_total_bytes"] = parseInt64(extractAttribute(metrics, "QueueDiskBytesTotal")); - obj["input_bytes"] = parseCounter(extractAttribute(metrics, "BytesInput")); - obj["durable_bytes"] = parseCounter(extractAttribute(metrics, "BytesDurable")); - obj["data_version"] = parseInt64(extractAttribute(metrics, "Version")); + obj["kvstore_used_bytes"] = parseInt64(metrics.getValue("KvstoreBytesUsed")); + obj["kvstore_free_bytes"] = parseInt64(metrics.getValue("KvstoreBytesFree")); + obj["kvstore_available_bytes"] = parseInt64(metrics.getValue("KvstoreBytesAvailable")); + obj["kvstore_total_bytes"] = parseInt64(metrics.getValue("KvstoreBytesTotal")); + obj["queue_disk_used_bytes"] = parseInt64(metrics.getValue("QueueDiskBytesUsed")); + obj["queue_disk_free_bytes"] = parseInt64(metrics.getValue("QueueDiskBytesFree")); + obj["queue_disk_available_bytes"] = parseInt64(metrics.getValue("QueueDiskBytesAvailable")); + obj["queue_disk_total_bytes"] = parseInt64(metrics.getValue("QueueDiskBytesTotal")); + obj["input_bytes"] = parseCounter(metrics.getValue("BytesInput")); + obj["durable_bytes"] = parseCounter(metrics.getValue("BytesDurable")); + obj["data_version"] = parseInt64(metrics.getValue("Version")); } catch (Error& e) { if(e.code() != error_code_attribute_not_found) throw e; @@ -586,8 +489,8 @@ ACTOR static Future processStatusFetcher( WorkerEvents traceFileOpenErrors, WorkerEvents programStarts, std::map processIssues, - vector> storageServers, - vector> tLogs, + vector> storageServers, + vector> tLogs, Database cx, Optional configuration, std::set *incomplete_reasons) { @@ -604,10 +507,10 @@ ACTOR static Future processStatusFetcher( Void _ = wait(yield()); if (traceFileErrorsItr->second.size()){ try { - // Have event string, parse it and turn it into a message object describing the trace file opening error - std::string event = traceFileErrorsItr->second; - std::string fileName = extractAttribute(event, "Filename"); - StatusObject msgObj = makeMessage("file_open_error", format("Could not open file '%s' (%s).", fileName.c_str(), extractAttribute(event, "Error").c_str()).c_str()); + // Have event fields, parse it and turn it into a message object describing the trace file opening error + const TraceEventFields& event = traceFileErrorsItr->second; + std::string fileName = event.getValue("Filename"); + StatusObject msgObj = makeMessage("file_open_error", format("Could not open file '%s' (%s).", fileName.c_str(), event.getValue("Error").c_str()).c_str()); msgObj["file_name"] = fileName; // Map the address of the worker to the error message object @@ -626,11 +529,11 @@ ACTOR static Future processStatusFetcher( state std::map>, MachineMemoryInfo>::iterator memInfo = machineMemoryUsage.insert(std::make_pair(workerItr->first.locality.machineId(), MachineMemoryInfo())).first; try { ASSERT(pMetrics.count(workerItr->first.address())); - std::string processMetrics = pMetrics[workerItr->first.address()]; + const TraceEventFields& processMetrics = pMetrics[workerItr->first.address()]; if(memInfo->second.valid()) { if(processMetrics.size() > 0) { - memInfo->second.memoryUsage += parseDouble(extractAttribute(processMetrics, "Memory")); + memInfo->second.memoryUsage += parseDouble(processMetrics.getValue("Memory")); ++memInfo->second.numProcesses; } else @@ -656,7 +559,7 @@ ACTOR static Future processStatusFetcher( } } - state std::vector>::iterator log; + state std::vector>::iterator log; state Version maxTLogVersion = 0; for(log = tLogs.begin(); log != tLogs.end(); ++log) { StatusObject const& roleStatus = roles.addRole( "log", log->first, log->second ); @@ -666,7 +569,7 @@ ACTOR static Future processStatusFetcher( Void _ = wait(yield()); } - state std::vector>::iterator ss; + state std::vector>::iterator ss; state std::map ssLag; for(ss = storageServers.begin(); ss != storageServers.end(); ++ss) { StatusObject const& roleStatus = roles.addRole( "storage", ss->first, ss->second, maxTLogVersion ); @@ -692,45 +595,45 @@ ACTOR static Future processStatusFetcher( processMap[printable(workerItr->first.locality.processId())] = StatusObject(); NetworkAddress address = workerItr->first.address(); - std::string event = pMetrics[workerItr->first.address()]; + const TraceEventFields& event = pMetrics[workerItr->first.address()]; statusObj["address"] = address.toString(); StatusObject memoryObj; if (event.size() > 0) { - std::string zoneID = extractAttribute(event, "ZoneID"); + std::string zoneID = event.getValue("ZoneID"); statusObj["fault_domain"] = zoneID; - std::string MachineID = extractAttribute(event, "MachineID"); + std::string MachineID = event.getValue("MachineID"); statusObj["machine_id"] = MachineID; statusObj["locality"] = getLocalityInfo(workerItr->first.locality); - statusObj["uptime_seconds"] = parseDouble(extractAttribute(event, "UptimeSeconds")); + statusObj["uptime_seconds"] = parseDouble(event.getValue("UptimeSeconds")); - metric = parseDouble(extractAttribute(event, "CPUSeconds")); + metric = parseDouble(event.getValue("CPUSeconds")); double cpu_seconds = metric; // rates are calculated over the last elapsed seconds - metric = parseDouble(extractAttribute(event, "Elapsed")); + metric = parseDouble(event.getValue("Elapsed")); double elapsed = metric; - metric = parseDouble(extractAttribute(event, "DiskIdleSeconds")); + metric = parseDouble(event.getValue("DiskIdleSeconds")); double diskIdleSeconds = metric; - metric = parseDouble(extractAttribute(event, "DiskReads")); + metric = parseDouble(event.getValue("DiskReads")); double diskReads = metric; - metric = parseDouble(extractAttribute(event, "DiskWrites")); + metric = parseDouble(event.getValue("DiskWrites")); double diskWrites = metric; - uint64_t diskReadsCount = parseInt64(extractAttribute(event, "DiskReadsCount")); + uint64_t diskReadsCount = parseInt64(event.getValue("DiskReadsCount")); - uint64_t diskWritesCount = parseInt64(extractAttribute(event, "DiskWritesCount")); + uint64_t diskWritesCount = parseInt64(event.getValue("DiskWritesCount")); - metric = parseDouble(extractAttribute(event, "DiskWriteSectors")); + metric = parseDouble(event.getValue("DiskWriteSectors")); double diskWriteSectors = metric; - metric = parseDouble(extractAttribute(event, "DiskReadSectors")); + metric = parseDouble(event.getValue("DiskReadSectors")); double diskReadSectors = metric; StatusObject diskObj; @@ -757,39 +660,39 @@ ACTOR static Future processStatusFetcher( diskObj["writes"] = writesObj; } - diskObj["total_bytes"] = parseInt64(extractAttribute(event, "DiskTotalBytes")); - diskObj["free_bytes"] = parseInt64(extractAttribute(event, "DiskFreeBytes")); + diskObj["total_bytes"] = parseInt64(event.getValue("DiskTotalBytes")); + diskObj["free_bytes"] = parseInt64(event.getValue("DiskFreeBytes")); statusObj["disk"] = diskObj; StatusObject networkObj; - networkObj["current_connections"] = parseInt64(extractAttribute(event, "CurrentConnections")); + networkObj["current_connections"] = parseInt64(event.getValue("CurrentConnections")); StatusObject connections_established; - connections_established["hz"] = parseDouble(extractAttribute(event, "ConnectionsEstablished")); + connections_established["hz"] = parseDouble(event.getValue("ConnectionsEstablished")); networkObj["connections_established"] = connections_established; StatusObject connections_closed; - connections_closed["hz"] = parseDouble(extractAttribute(event, "ConnectionsClosed")); + connections_closed["hz"] = parseDouble(event.getValue("ConnectionsClosed")); networkObj["connections_closed"] = connections_closed; StatusObject connection_errors; - connection_errors["hz"] = parseDouble(extractAttribute(event, "ConnectionErrors")); + connection_errors["hz"] = parseDouble(event.getValue("ConnectionErrors")); networkObj["connection_errors"] = connection_errors; - metric = parseDouble(extractAttribute(event, "MbpsSent")); + metric = parseDouble(event.getValue("MbpsSent")); StatusObject megabits_sent; megabits_sent["hz"] = metric; networkObj["megabits_sent"] = megabits_sent; - metric = parseDouble(extractAttribute(event, "MbpsReceived")); + metric = parseDouble(event.getValue("MbpsReceived")); StatusObject megabits_received; megabits_received["hz"] = metric; networkObj["megabits_received"] = megabits_received; statusObj["network"] = networkObj; - metric = parseDouble(extractAttribute(event, "Memory")); + metric = parseDouble(event.getValue("Memory")); memoryObj["used_bytes"] = metric; - metric = parseDouble(extractAttribute(event, "UnusedAllocatedMemory")); + metric = parseDouble(event.getValue("UnusedAllocatedMemory")); memoryObj["unused_allocated_memory"] = metric; } @@ -797,16 +700,16 @@ ACTOR static Future processStatusFetcher( auto const& psxml = programStarts.at(address); if(psxml.size() > 0) { - int64_t memLimit = parseInt64(extractAttribute(psxml, "MemoryLimit")); + int64_t memLimit = parseInt64(psxml.getValue("MemoryLimit")); memoryObj["limit_bytes"] = memLimit; std::string version; - if (tryExtractAttribute(psxml, LiteralStringRef("Version"), version)) { + if (psxml.tryGetValue("Version", version)) { statusObj["version"] = version; } std::string commandLine; - if (tryExtractAttribute(psxml, LiteralStringRef("CommandLine"), commandLine)) { + if (psxml.tryGetValue("CommandLine", commandLine)) { statusObj["command_line"] = commandLine; } } @@ -815,7 +718,7 @@ ACTOR static Future processStatusFetcher( // if this process address is in the machine metrics if (mMetrics.count(address) && mMetrics[address].size()){ double availableMemory; - availableMemory = parseDouble(extractAttribute(mMetrics[address], "AvailableMemory")); + availableMemory = parseDouble(mMetrics[address].getValue("AvailableMemory")); auto machineMemInfo = machineMemoryUsage[workerItr->first.locality.machineId()]; if (machineMemInfo.valid()) { @@ -918,8 +821,8 @@ ACTOR static Future recoveryStateStatusFetcher(std::pair md = wait( timeoutError(mWorker.first.eventLogRequest.getReply( EventLogRequest( LiteralStringRef("MasterRecoveryState") ) ), 1.0) ); - state int mStatusCode = parseInt( extractAttribute(md, LiteralStringRef("StatusCode")) ); + TraceEventFields md = wait( timeoutError(mWorker.first.eventLogRequest.getReply( EventLogRequest( LiteralStringRef("MasterRecoveryState") ) ), 1.0) ); + state int mStatusCode = parseInt( md.getValue("StatusCode") ); if (mStatusCode < 0 || mStatusCode >= RecoveryStatus::END) throw attribute_not_found(); @@ -927,9 +830,9 @@ ACTOR static Future recoveryStateStatusFetcher(std::pair recoveryStateStatusFetcher(std::pair dataStatusFetcher(std::pair>> futures; + std::vector> futures; // TODO: Should this be serial? futures.push_back(timeoutError(mWorker.first.eventLogRequest.getReply(EventLogRequest(StringRef(dbName + "/DDTrackerStarting"))), 1.0)); futures.push_back(timeoutError(mWorker.first.eventLogRequest.getReply(EventLogRequest(StringRef(dbName + "/DDTrackerStats"))), 1.0)); - std::vector> dataInfo = wait(getAll(futures)); + std::vector dataInfo = wait(getAll(futures)); - Standalone startingStats = dataInfo[0]; - state Standalone dataStats = dataInfo[1]; + TraceEventFields startingStats = dataInfo[0]; + state TraceEventFields dataStats = dataInfo[1]; - if (startingStats.size() && extractAttribute(startingStats, LiteralStringRef("State")) != "Active") { + if (startingStats.size() && startingStats.getValue("State") != "Active") { stateSectionObj["name"] = "initializing"; stateSectionObj["description"] = "(Re)initializing automatic data distribution"; } else { - state Standalone md = wait(timeoutError(mWorker.first.eventLogRequest.getReply(EventLogRequest(StringRef(dbName + "/MovingData"))), 1.0)); + state TraceEventFields md = wait(timeoutError(mWorker.first.eventLogRequest.getReply(EventLogRequest(StringRef(dbName + "/MovingData"))), 1.0)); // If we have a MovingData message, parse it. if (md.size()) { - int64_t partitionsInQueue = parseInt64(extractAttribute(md, LiteralStringRef("InQueue"))); - int64_t partitionsInFlight = parseInt64(extractAttribute(md, LiteralStringRef("InFlight"))); - int64_t averagePartitionSize = parseInt64(extractAttribute(md, LiteralStringRef("AverageShardSize"))); - int64_t totalBytesWritten = parseInt64(extractAttribute(md, LiteralStringRef("BytesWritten"))); - int highestPriority = parseInt(extractAttribute(md, LiteralStringRef("HighestPriority"))); + int64_t partitionsInQueue = parseInt64(md.getValue("InQueue")); + int64_t partitionsInFlight = parseInt64(md.getValue("InFlight")); + int64_t averagePartitionSize = parseInt64(md.getValue("AverageShardSize")); + int64_t totalBytesWritten = parseInt64(md.getValue("BytesWritten")); + int highestPriority = parseInt(md.getValue("HighestPriority")); if( averagePartitionSize >= 0 ) { StatusObject moving_data; @@ -1253,9 +1156,9 @@ ACTOR static Future dataStatusFetcher(std::pair -static Future>> getServerMetrics(vector servers, std::unordered_map address_workers, std::string suffix) { - state vector>> futures; +static Future>> getServerMetrics(vector servers, std::unordered_map address_workers, std::string suffix) { + state vector>> futures; for (auto s : servers) { futures.push_back(latestEventOnWorker(address_workers[s.address()], s.id().toString() + suffix)); } Void _ = wait(waitForAll(futures)); - vector> results; + vector> results; for (int i = 0; i < servers.size(); i++) { - results.push_back(std::make_pair(servers[i], futures[i].get().present() ? futures[i].get().get() : "")); + results.push_back(std::make_pair(servers[i], futures[i].get().present() ? futures[i].get().get() : TraceEventFields())); } return results; } -ACTOR static Future>> getStorageServersAndMetrics(Database cx, std::unordered_map address_workers) { +ACTOR static Future>> getStorageServersAndMetrics(Database cx, std::unordered_map address_workers) { vector servers = wait(timeoutError(getStorageServers(cx, true), 5.0)); - vector> results = wait(getServerMetrics(servers, address_workers, "/StorageMetrics")); + vector> results = wait(getServerMetrics(servers, address_workers, "/StorageMetrics")); return results; } -ACTOR static Future>> getTLogsAndMetrics(Reference> db, std::unordered_map address_workers) { +ACTOR static Future>> getTLogsAndMetrics(Reference> db, std::unordered_map address_workers) { vector servers = db->get().logSystemConfig.allPresentLogs(); - vector> results = wait(getServerMetrics(servers, address_workers, "/TLogMetrics")); + vector> results = wait(getServerMetrics(servers, address_workers, "/TLogMetrics")); return results; } @@ -1334,6 +1237,7 @@ static int getExtraTLogEligibleMachines(vector::max(); for(auto& region : configuration.regions) { extraTlogEligibleMachines = std::min( extraTlogEligibleMachines, dcId_machine[region.dcId].size() - std::max(configuration.remoteTLogReplicationFactor, std::max(configuration.tLogReplicationFactor, configuration.storageTeamSize) ) ); + //FIXME: does not take into account fallback satellite policies if(region.satelliteTLogReplicationFactor > 0) { int totalSatelliteEligible = 0; for(auto& sat : region.satellites) { @@ -1346,7 +1250,7 @@ static int getExtraTLogEligibleMachines(vector workloadStatusFetcher(Reference> db, vector> workers, std::pair mWorker, - std::string dbName, StatusObject *qos, StatusObject *data_overlay, std::set *incomplete_reasons, Future>>> storageServerFuture) + std::string dbName, StatusObject *qos, StatusObject *data_overlay, std::set *incomplete_reasons, Future>>> storageServerFuture) { state StatusObject statusObj; state StatusObject operationsObj; @@ -1355,7 +1259,7 @@ ACTOR static Future workloadStatusFetcher(Reference>> proxyStatFutures; + vector> proxyStatFutures; std::map> workersMap; for (auto w : workers) { workersMap[w.first.address()] = w; @@ -1367,16 +1271,16 @@ ACTOR static Future workloadStatusFetcher(Reference> proxyStats = wait(getAll(proxyStatFutures)); + vector proxyStats = wait(getAll(proxyStatFutures)); StatusObject mutations=makeCounter(), mutationBytes=makeCounter(), txnConflicts=makeCounter(), txnStartOut=makeCounter(), txnCommitOutSuccess=makeCounter(); for (auto &ps : proxyStats) { - mutations = addCounters( mutations, parseCounter(extractAttribute(ps, LiteralStringRef("Mutations"))) ); - mutationBytes = addCounters( mutationBytes, parseCounter(extractAttribute(ps, LiteralStringRef("MutationBytes"))) ); - txnConflicts = addCounters( txnConflicts, parseCounter(extractAttribute(ps, LiteralStringRef("TxnConflicts"))) ); - txnStartOut = addCounters( txnStartOut, parseCounter(extractAttribute(ps, LiteralStringRef("TxnStartOut"))) ); - txnCommitOutSuccess = addCounters( txnCommitOutSuccess, parseCounter(extractAttribute(ps, LiteralStringRef("TxnCommitOutSuccess"))) ); + mutations = addCounters( mutations, parseCounter(ps.getValue("Mutations")) ); + mutationBytes = addCounters( mutationBytes, parseCounter(ps.getValue("MutationBytes")) ); + txnConflicts = addCounters( txnConflicts, parseCounter(ps.getValue("TxnConflicts")) ); + txnStartOut = addCounters( txnStartOut, parseCounter(ps.getValue("TxnStartOut")) ); + txnCommitOutSuccess = addCounters( txnCommitOutSuccess, parseCounter(ps.getValue("TxnCommitOutSuccess")) ); } operationsObj["writes"] = mutations; @@ -1397,19 +1301,19 @@ ACTOR static Future workloadStatusFetcher(Reference md = wait( timeoutError(mWorker.first.eventLogRequest.getReply( EventLogRequest(StringRef(dbName+"/RkUpdate") ) ), 1.0) ); - double tpsLimit = parseDouble(extractAttribute(md, LiteralStringRef("TPSLimit"))); - double transPerSec = parseDouble(extractAttribute(md, LiteralStringRef("ReleasedTPS"))); - int ssCount = parseInt(extractAttribute(md, LiteralStringRef("StorageServers"))); - int tlogCount = parseInt(extractAttribute(md, LiteralStringRef("TLogs"))); - int64_t worstFreeSpaceStorageServer = parseInt64(extractAttribute(md, LiteralStringRef("WorstFreeSpaceStorageServer"))); - int64_t worstFreeSpaceTLog = parseInt64(extractAttribute(md, LiteralStringRef("WorstFreeSpaceTLog"))); - int64_t worstStorageServerQueue = parseInt64(extractAttribute(md, LiteralStringRef("WorstStorageServerQueue"))); - int64_t limitingStorageServerQueue = parseInt64(extractAttribute(md, LiteralStringRef("LimitingStorageServerQueue"))); - int64_t worstTLogQueue = parseInt64(extractAttribute(md, LiteralStringRef("WorstTLogQueue"))); - int64_t totalDiskUsageBytes = parseInt64(extractAttribute(md, LiteralStringRef("TotalDiskUsageBytes"))); - int64_t worstVersionLag = parseInt64(extractAttribute(md, LiteralStringRef("WorstStorageServerVersionLag"))); - int64_t limitingVersionLag = parseInt64(extractAttribute(md, LiteralStringRef("LimitingStorageServerVersionLag"))); + TraceEventFields md = wait( timeoutError(mWorker.first.eventLogRequest.getReply( EventLogRequest(StringRef(dbName+"/RkUpdate") ) ), 1.0) ); + double tpsLimit = parseDouble(md.getValue("TPSLimit")); + double transPerSec = parseDouble(md.getValue("ReleasedTPS")); + int ssCount = parseInt(md.getValue("StorageServers")); + int tlogCount = parseInt(md.getValue("TLogs")); + int64_t worstFreeSpaceStorageServer = parseInt64(md.getValue("WorstFreeSpaceStorageServer")); + int64_t worstFreeSpaceTLog = parseInt64(md.getValue("WorstFreeSpaceTLog")); + int64_t worstStorageServerQueue = parseInt64(md.getValue("WorstStorageServerQueue")); + int64_t limitingStorageServerQueue = parseInt64(md.getValue("LimitingStorageServerQueue")); + int64_t worstTLogQueue = parseInt64(md.getValue("WorstTLogQueue")); + int64_t totalDiskUsageBytes = parseInt64(md.getValue("TotalDiskUsageBytes")); + int64_t worstVersionLag = parseInt64(md.getValue("WorstStorageServerVersionLag")); + int64_t limitingVersionLag = parseInt64(md.getValue("LimitingStorageServerVersionLag")); (*data_overlay)["total_disk_used_bytes"] = totalDiskUsageBytes; if(ssCount > 0) { @@ -1428,13 +1332,13 @@ ACTOR static Future workloadStatusFetcher(Reference tpsLimit * 0.8) { // If reason is known, set qos.performance_limited_by, otherwise omit if (reason >= 0 && reason < limitReasonEnd) { perfLimit = makeMessage(limitReasonName[reason], limitReasonDesc[reason]); - std::string reason_server_id = extractAttribute(md, LiteralStringRef("ReasonServerID")); + std::string reason_server_id = md.getValue("ReasonServerID"); if (!reason_server_id.empty()) perfLimit["reason_server_id"] = reason_server_id; } @@ -1455,7 +1359,7 @@ ACTOR static Future workloadStatusFetcher(Reference>> storageServers = wait(storageServerFuture); + ErrorOr>> storageServers = wait(storageServerFuture); if(!storageServers.present()) { throw storageServers.getError(); } @@ -1465,9 +1369,9 @@ ACTOR static Future workloadStatusFetcher(Reference clusterGetStatus( } state std::map processIssues = getProcessIssuesAsMessages(workerIssues); - state vector> storageServers; - state vector> tLogs; + state vector> storageServers; + state vector> tLogs; state StatusObject qos; state StatusObject data_overlay; @@ -1863,8 +1767,8 @@ ACTOR Future clusterGetStatus( state std::unordered_map address_workers; for (auto worker : workers) address_workers[worker.first.address()] = worker.first; - state Future>>> storageServerFuture = errorOr(getStorageServersAndMetrics(cx, address_workers)); - state Future>>> tLogFuture = errorOr(getTLogsAndMetrics(db, address_workers)); + state Future>>> storageServerFuture = errorOr(getStorageServersAndMetrics(cx, address_workers)); + state Future>>> tLogFuture = errorOr(getTLogsAndMetrics(db, address_workers)); state int minReplicasRemaining = -1; std::vector> futures2; @@ -1915,7 +1819,7 @@ ACTOR Future clusterGetStatus( } // Need storage servers now for processStatusFetcher() below. - ErrorOr>> _storageServers = wait(storageServerFuture); + ErrorOr>> _storageServers = wait(storageServerFuture); if (_storageServers.present()) { storageServers = _storageServers.get(); } @@ -1923,7 +1827,7 @@ ACTOR Future clusterGetStatus( messages.push_back(makeMessage("storage_servers_error", "Timed out trying to retrieve storage servers.")); // ...also tlogs - ErrorOr>> _tLogs = wait(tLogFuture); + ErrorOr>> _tLogs = wait(tLogFuture); if (_tLogs.present()) { tLogs = _tLogs.get(); } diff --git a/fdbserver/Status.h b/fdbserver/Status.h index 8d1e8a3bd3..96c5157098 100644 --- a/fdbserver/Status.h +++ b/fdbserver/Status.h @@ -30,7 +30,6 @@ typedef std::map< NetworkAddress, std::pair > ProcessIssuesMap; typedef std::map< NetworkAddress, Standalone> > ClientVersionMap; -std::string extractAttribute( std::string const& expanded, std::string const& attributeToExtract ); Future clusterGetStatus( Reference> const& db, Database const& cx, vector> const& workers, ProcessIssuesMap const& workerIssues, ProcessIssuesMap const& clientIssues, ClientVersionMap const& clientVersionMap, std::map const& traceLogGroupMap, ServerCoordinators const& coordinators, std::vector const& incompatibleConnections, Version const& datacenterVersionDifference ); diff --git a/fdbserver/TLogInterface.h b/fdbserver/TLogInterface.h index e185dc55f3..593527e648 100644 --- a/fdbserver/TLogInterface.h +++ b/fdbserver/TLogInterface.h @@ -127,7 +127,7 @@ struct VerUpdateRef { VerUpdateRef( Arena& to, const VerUpdateRef& from ) : version(from.version), mutations( to, from.mutations ), isPrivateData( from.isPrivateData ) {} int expectedSize() const { return mutations.expectedSize(); } - template + template void serialize( Ar& ar ) { ar & version & mutations & isPrivateData; } @@ -139,10 +139,12 @@ struct TLogPeekReply { Version end; Optional popped; Version maxKnownVersion; + Version minKnownCommittedVersion; + Optional begin; template void serialize(Ar& ar) { - ar & arena & messages & end & popped & maxKnownVersion; + ar & arena & messages & end & popped & maxKnownVersion & minKnownCommittedVersion & begin; } }; @@ -166,16 +168,16 @@ struct TLogPeekRequest { struct TLogPopRequest { Arena arena; Version to; - Version knownCommittedVersion; + Version durableKnownCommittedVersion; Tag tag; ReplyPromise reply; - TLogPopRequest( Version to, Version knownCommittedVersion, Tag tag ) : to(to), knownCommittedVersion(knownCommittedVersion), tag(tag) {} + TLogPopRequest( Version to, Version durableKnownCommittedVersion, Tag tag ) : to(to), durableKnownCommittedVersion(durableKnownCommittedVersion), tag(tag) {} TLogPopRequest() {} template void serialize(Ar& ar) { - ar & arena & to & knownCommittedVersion & tag & reply; + ar & arena & to & durableKnownCommittedVersion & tag & reply; } }; @@ -198,19 +200,19 @@ struct TagMessagesRef { struct TLogCommitRequest { Arena arena; - Version prevVersion, version, knownCommittedVersion; + Version prevVersion, version, knownCommittedVersion, minKnownCommittedVersion; StringRef messages;// Each message prefixed by a 4-byte length - ReplyPromise reply; + ReplyPromise reply; Optional debugID; TLogCommitRequest() {} - TLogCommitRequest( const Arena& a, Version prevVersion, Version version, Version knownCommittedVersion, StringRef messages, Optional debugID ) - : arena(a), prevVersion(prevVersion), version(version), knownCommittedVersion(knownCommittedVersion), messages(messages), debugID(debugID) {} - template + TLogCommitRequest( const Arena& a, Version prevVersion, Version version, Version knownCommittedVersion, Version minKnownCommittedVersion, StringRef messages, Optional debugID ) + : arena(a), prevVersion(prevVersion), version(version), knownCommittedVersion(knownCommittedVersion), minKnownCommittedVersion(minKnownCommittedVersion), messages(messages), debugID(debugID) {} + template void serialize( Ar& ar ) { - ar & prevVersion & version & knownCommittedVersion & messages & reply & arena & debugID; + ar & prevVersion & version & knownCommittedVersion & minKnownCommittedVersion & messages & reply & arena & debugID; } }; diff --git a/fdbserver/TLogServer.actor.cpp b/fdbserver/TLogServer.actor.cpp index 56e4d307bf..7247d2195f 100644 --- a/fdbserver/TLogServer.actor.cpp +++ b/fdbserver/TLogServer.actor.cpp @@ -360,7 +360,7 @@ struct LogData : NonCopyable, public ReferenceCounted { VersionMetricHandle persistentDataVersion, persistentDataDurableVersion; // The last version number in the portion of the log (written|durable) to persistentData NotifiedVersion version, queueCommittedVersion; Version queueCommittingVersion; - Version knownCommittedVersion, durableKnownCommittedVersion; + Version knownCommittedVersion, durableKnownCommittedVersion, minKnownCommittedVersion; Deque>>> messageBlocks; std::vector>> tag_data; //tag.locality | tag.id @@ -409,7 +409,7 @@ struct LogData : NonCopyable, public ReferenceCounted { explicit LogData(TLogData* tLogData, TLogInterface interf, Tag remoteTag, bool isPrimary, int logRouterTags, UID recruitmentID) : tLogData(tLogData), knownCommittedVersion(1), logId(interf.id()), cc("TLog", interf.id().toString()), bytesInput("BytesInput", cc), bytesDurable("BytesDurable", cc), remoteTag(remoteTag), isPrimary(isPrimary), logRouterTags(logRouterTags), recruitmentID(recruitmentID), - logSystem(new AsyncVar>()), logRouterPoppedVersion(0), durableKnownCommittedVersion(0), + logSystem(new AsyncVar>()), logRouterPoppedVersion(0), durableKnownCommittedVersion(0), minKnownCommittedVersion(0), // These are initialized differently on init() or recovery recoveryCount(), stopped(false), initialized(false), queueCommittingVersion(0), newPersistentDataVersion(invalidVersion), unrecoveredBefore(1), recoveredAt(1), unpoppedRecoveredTags(0), logRouterPopToVersion(0), locality(tagLocalityInvalid) @@ -990,6 +990,7 @@ ACTOR Future tLogPeekMessages( TLogData* self, TLogPeekRequest req, Refere if(poppedVer > req.begin) { TLogPeekReply rep; rep.maxKnownVersion = logData->version.get(); + rep.minKnownCommittedVersion = logData->minKnownCommittedVersion; rep.popped = poppedVer; rep.end = poppedVer; @@ -1006,6 +1007,7 @@ ACTOR Future tLogPeekMessages( TLogData* self, TLogPeekRequest req, Refere } else { sequenceData.send(rep.end); } + rep.begin = req.begin; } req.reply.send( rep ); @@ -1048,6 +1050,7 @@ ACTOR Future tLogPeekMessages( TLogData* self, TLogPeekRequest req, Refere TLogPeekReply reply; reply.maxKnownVersion = logData->version.get(); + reply.minKnownCommittedVersion = logData->minKnownCommittedVersion; reply.messages = messages.toStringRef(); reply.end = endVersion; @@ -1066,6 +1069,7 @@ ACTOR Future tLogPeekMessages( TLogData* self, TLogPeekRequest req, Refere } else { sequenceData.send(reply.end); } + reply.begin = req.begin; } req.reply.send( reply ); @@ -1166,7 +1170,7 @@ ACTOR Future tLogCommit( g_traceBatch.addEvent("CommitDebug", tlogDebugID.get().first(), "TLog.tLogCommit.BeforeWaitForVersion"); } - logData->knownCommittedVersion = std::max(logData->knownCommittedVersion, req.knownCommittedVersion); + logData->minKnownCommittedVersion = std::max(logData->minKnownCommittedVersion, req.minKnownCommittedVersion); Void _ = wait( logData->version.whenAtLeast( req.prevVersion ) ); @@ -1199,10 +1203,12 @@ ACTOR Future tLogCommit( TraceEvent("TLogCommit", logData->logId).detail("Version", req.version); commitMessages(logData, req.version, req.arena, req.messages, self->bytesInput); + logData->knownCommittedVersion = std::max(logData->knownCommittedVersion, req.knownCommittedVersion); + // Log the changes to the persistent queue, to be committed by commitQueue() TLogQueueEntryRef qe; qe.version = req.version; - qe.knownCommittedVersion = req.knownCommittedVersion; + qe.knownCommittedVersion = logData->knownCommittedVersion; qe.messages = req.messages; qe.id = logData->logId; self->persistentQueue->push( qe, logData ); @@ -1232,7 +1238,7 @@ ACTOR Future tLogCommit( if(req.debugID.present()) g_traceBatch.addEvent("CommitDebug", tlogDebugID.get().first(), "TLog.tLogCommit.After"); - req.reply.send( Void() ); + req.reply.send( logData->durableKnownCommittedVersion ); return Void(); } @@ -1452,7 +1458,7 @@ void removeLog( TLogData* self, Reference logData ) { } } -ACTOR Future pullAsyncData( TLogData* self, Reference logData, std::vector tags, Version beginVersion, Optional endVersion, bool poppedIsKnownCommitted ) { +ACTOR Future pullAsyncData( TLogData* self, Reference logData, std::vector tags, Version beginVersion, Optional endVersion, bool poppedIsKnownCommitted, bool parallelGetMore ) { state Future dbInfoChange = Void(); state Reference r; state Version tagAt = beginVersion; @@ -1462,14 +1468,11 @@ ACTOR Future pullAsyncData( TLogData* self, Reference logData, st loop { choose { when(Void _ = wait( r ? r->getMore(TaskTLogCommit) : Never() ) ) { - if(poppedIsKnownCommitted) { - logData->knownCommittedVersion = std::max(logData->knownCommittedVersion, r->popped()); - } break; } when( Void _ = wait( dbInfoChange ) ) { if( logData->logSystem->get() ) { - r = logData->logSystem->get()->peek( logData->logId, tagAt, tags ); + r = logData->logSystem->get()->peek( logData->logId, tagAt, tags, parallelGetMore ); } else { r = Reference(); } @@ -1504,6 +1507,11 @@ ACTOR Future pullAsyncData( TLogData* self, Reference logData, st if(endVersion.present() && ver > endVersion.get()) { return Void(); } + + if(poppedIsKnownCommitted) { + logData->knownCommittedVersion = std::max(logData->knownCommittedVersion, r->popped()); + } + commitMessages(logData, ver, messages, self->bytesInput); // Log the changes to the persistent queue, to be committed by commitQueue() @@ -1534,6 +1542,11 @@ ACTOR Future pullAsyncData( TLogData* self, Reference logData, st if(endVersion.present() && ver > endVersion.get()) { return Void(); } + + if(poppedIsKnownCommitted) { + logData->knownCommittedVersion = std::max(logData->knownCommittedVersion, r->popped()); + } + // Log the changes to the persistent queue, to be committed by commitQueue() TLogQueueEntryRef qe; qe.version = ver; @@ -1591,7 +1604,7 @@ ACTOR Future tLogCore( TLogData* self, Reference logData, TLogInt if(!logData->isPrimary) { std::vector tags; tags.push_back(logData->remoteTag); - logData->addActor.send( pullAsyncData(self, logData, tags, logData->unrecoveredBefore, Optional(), true) ); + logData->addActor.send( pullAsyncData(self, logData, tags, logData->unrecoveredBefore, Optional(), true, false) ); } try { @@ -1949,10 +1962,10 @@ ACTOR Future tLogStart( TLogData* self, InitializeTLogRequest req, Localit logData->logRouterPopToVersion = req.recoverAt; std::vector tags; tags.push_back(logData->remoteTag); - Void _ = wait(pullAsyncData(self, logData, tags, logData->unrecoveredBefore, req.recoverAt, true) || logData->removed); + Void _ = wait(pullAsyncData(self, logData, tags, logData->unrecoveredBefore, req.recoverAt, true, false) || logData->removed); } else if(!req.recoverTags.empty()) { ASSERT(logData->unrecoveredBefore > req.knownCommittedVersion); - Void _ = wait(pullAsyncData(self, logData, req.recoverTags, req.knownCommittedVersion + 1, req.recoverAt, false) || logData->removed); + Void _ = wait(pullAsyncData(self, logData, req.recoverTags, req.knownCommittedVersion + 1, req.recoverAt, false, true) || logData->removed); } } diff --git a/fdbserver/TagPartitionedLogSystem.actor.cpp b/fdbserver/TagPartitionedLogSystem.actor.cpp index 90dc787f2d..fcc94f79f9 100644 --- a/fdbserver/TagPartitionedLogSystem.actor.cpp +++ b/fdbserver/TagPartitionedLogSystem.actor.cpp @@ -30,17 +30,15 @@ #include "fdbrpc/ReplicationUtils.h" #include "RecoveryState.h" -ACTOR static Future reportTLogCommitErrors( Future commitReply, UID debugID ) { - try { - Void _ = wait(commitReply); - return Void(); - } catch (Error& e) { - if (e.code() == error_code_broken_promise) - throw master_tlog_failed(); - else if (e.code() != error_code_actor_cancelled && e.code() != error_code_tlog_stopped) - TraceEvent(SevError, "MasterTLogCommitRequestError", debugID).error(e); - throw; +ACTOR Future minVersionWhenReady( Future f, std::vector> replies) { + Void _ = wait(f); + Version minVersion = std::numeric_limits::max(); + for(auto& reply : replies) { + if(reply.isReady() && !reply.isError()) { + minVersion = std::min(minVersion, reply.get()); + } } + return minVersion; } struct OldLogData { @@ -79,8 +77,8 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted epochEndVersion; - Optional previousEpochEndVersion; + Optional recoverAt; + Optional recoveredAt; Version knownCommittedVersion; LocalityData locality; std::map< std::pair, std::pair > outstandingPops; // For each currently running popFromLog actor, (log server #, tag)->popped version @@ -122,7 +120,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted fromLogSystemConfig( UID const& dbgid, LocalityData const& locality, LogSystemConfig const& lsConf, bool excludeRemote, bool usePreviousEpochEnd, Optional>> addActor ) { + static Reference fromLogSystemConfig( UID const& dbgid, LocalityData const& locality, LogSystemConfig const& lsConf, bool excludeRemote, bool useRecoveredAt, Optional>> addActor ) { ASSERT( lsConf.logSystemType == 2 || (lsConf.logSystemType == 0 && !lsConf.tLogs.size()) ); //ASSERT(lsConf.epoch == epoch); //< FIXME Reference logSystem( new TagPartitionedLogSystem(dbgid, locality, addActor) ); @@ -132,8 +130,8 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCountedlogRouterTags = lsConf.logRouterTags; logSystem->recruitmentID = lsConf.recruitmentID; logSystem->stopped = lsConf.stopped; - if(usePreviousEpochEnd) { - logSystem->previousEpochEndVersion = lsConf.previousEpochEndVersion; + if(useRecoveredAt) { + logSystem->recoveredAt = lsConf.recoveredAt; } for( int i = 0; i < lsConf.tLogs.size(); i++ ) { TLogSet const& tLogSet = lsConf.tLogs[i]; @@ -391,27 +389,26 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted push( Version prevVersion, Version version, Version knownCommittedVersion, LogPushData& data, Optional debugID ) { + virtual Future push( Version prevVersion, Version version, Version knownCommittedVersion, Version minKnownCommittedVersion, LogPushData& data, Optional debugID ) { // FIXME: Randomize request order as in LegacyLogSystem? vector> quorumResults; + vector> allReplies; int location = 0; for(auto& it : tLogs) { if(it->isLocal && it->logServers.size()) { vector> tLogCommitResults; for(int loc=0; loc< it->logServers.size(); loc++) { - Future commitMessage = reportTLogCommitErrors( - it->logServers[loc]->get().interf().commit.getReply( - TLogCommitRequest( data.getArena(), prevVersion, version, knownCommittedVersion, data.getMessages(location), debugID ), TaskTLogCommitReply ), - getDebugID()); - addActor.get().send(commitMessage); - tLogCommitResults.push_back(commitMessage); + allReplies.push_back( it->logServers[loc]->get().interf().commit.getReply( TLogCommitRequest( data.getArena(), prevVersion, version, knownCommittedVersion, minKnownCommittedVersion, data.getMessages(location), debugID ), TaskTLogCommitReply ) ); + Future commitSuccess = success(allReplies.back()); + addActor.get().send(commitSuccess); + tLogCommitResults.push_back(commitSuccess); location++; } quorumResults.push_back( quorum( tLogCommitResults, tLogCommitResults.size() - it->tLogWriteAntiQuorum ) ); } } - return waitForAll(quorumResults); + return minVersionWhenReady( waitForAll(quorumResults), allReplies); } Reference peekAll( UID dbgid, Version begin, Version end, Tag tag, bool parallelGetMore, bool throwIfDead ) { @@ -498,7 +495,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted peekRemote( UID dbgid, Version begin, Tag tag, bool parallelGetMore ) { int bestSet = -1; - Version lastBegin = previousEpochEndVersion.present() ? previousEpochEndVersion.get() + 1 : 0; + Version lastBegin = recoveredAt.present() ? recoveredAt.get() + 1 : 0; for(int t = 0; t < tLogs.size(); t++) { if(tLogs[t]->isLocal) { lastBegin = std::max(lastBegin, tLogs[t]->startVersion); @@ -757,23 +754,23 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted( new ILogSystem::SetPeekCursor( localSets, bestSet, localSets[bestSet]->bestLocationFor( tag ), tag, begin, firstOld && previousEpochEndVersion.present() ? previousEpochEndVersion.get() + 1 : old.epochEnd, true ) ); + return Reference( new ILogSystem::SetPeekCursor( localSets, bestSet, localSets[bestSet]->bestLocationFor( tag ), tag, begin, firstOld && recoveredAt.present() ? recoveredAt.get() + 1 : old.epochEnd, true ) ); } firstOld = false; } return Reference( new ILogSystem::ServerPeekCursor( Reference>>(), tag, begin, getPeekEnd(), false, false ) ); } - void popLogRouter( Version upTo, Tag tag, Version knownCommittedVersion, int8_t popLocality ) { //FIXME: do not need to pop all generations of old logs + void popLogRouter( Version upTo, Tag tag, Version durableKnownCommittedVersion, int8_t popLocality ) { //FIXME: do not need to pop all generations of old logs if (!upTo) return; for(auto& t : tLogs) { if(t->locality == popLocality) { for(auto& log : t->logRouters) { Version prev = outstandingPops[std::make_pair(log->get().id(),tag)].first; if (prev < upTo) - outstandingPops[std::make_pair(log->get().id(),tag)] = std::make_pair(upTo, knownCommittedVersion); + outstandingPops[std::make_pair(log->get().id(),tag)] = std::make_pair(upTo, durableKnownCommittedVersion); if (prev == 0) { popActors.add( popFromLog( this, log, tag, 0.0 ) ); //Fast pop time because log routers can only hold 5 seconds of data. } @@ -787,7 +784,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCountedlogRouters) { Version prev = outstandingPops[std::make_pair(log->get().id(),tag)].first; if (prev < upTo) - outstandingPops[std::make_pair(log->get().id(),tag)] = std::make_pair(upTo, knownCommittedVersion); + outstandingPops[std::make_pair(log->get().id(),tag)] = std::make_pair(upTo, durableKnownCommittedVersion); if (prev == 0) popActors.add( popFromLog( this, log, tag, 0.0 ) ); } @@ -796,10 +793,10 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCountedlogServers) { Version prev = outstandingPops[std::make_pair(log->get().id(),tag)].first; if (prev < upTo) - outstandingPops[std::make_pair(log->get().id(),tag)] = std::make_pair(upTo, knownCommittedVersion); + outstandingPops[std::make_pair(log->get().id(),tag)] = std::make_pair(upTo, durableKnownCommittedVersion); if (prev == 0) popActors.add( popFromLog( this, log, tag, 1.0 ) ); //< FIXME: knob } @@ -931,7 +928,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted logSet = tLogs[i]; if(logSet->isLocal || remoteLogsWrittenToCoreState) { @@ -1034,12 +1031,12 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted::max(); @@ -1169,7 +1166,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted logSystem( new TagPartitionedLogSystem(dbgid, locality) ); logSystem->logSystemType = prevState.logSystemType; - logSystem->epochEndVersion = 0; + logSystem->recoverAt = 0; logSystem->knownCommittedVersion = 0; logSystem->stopped = true; outLogSystem->set(logSystem); @@ -1308,7 +1305,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCountedlogSystemType = prevState.logSystemType; logSystem->rejoins = rejoins; logSystem->lockResults = lockResults; - logSystem->epochEndVersion = minEnd; + logSystem->recoverAt = minEnd; logSystem->knownCommittedVersion = knownCommittedVersion; logSystem->remoteLogsWrittenToCoreState = true; logSystem->stopped = true; @@ -1552,7 +1549,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCountedrecruitmentID; req.storeType = configuration.tLogDataStoreType; req.recoverFrom = oldLogSystem->getLogSystemConfig(); - req.recoverAt = oldLogSystem->epochEndVersion.get(); + req.recoverAt = oldLogSystem->recoverAt.get(); req.knownCommittedVersion = oldLogSystem->knownCommittedVersion; req.epoch = recoveryCount; req.remoteTag = Tag(tagLocalityRemoteLog, i); @@ -1598,12 +1595,12 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted logSystem( new TagPartitionedLogSystem(oldLogSystem->getDebugID(), oldLogSystem->locality) ); logSystem->logSystemType = 2; logSystem->expectedLogSets = 1; - logSystem->previousEpochEndVersion = oldLogSystem->epochEndVersion; + logSystem->recoveredAt = oldLogSystem->recoverAt; logSystem->recruitmentID = g_random->randomUniqueID(); oldLogSystem->recruitmentID = logSystem->recruitmentID; if(configuration.usableRegions > 1) { - logSystem->logRouterTags = recr.tLogs.size(); + logSystem->logRouterTags = recr.tLogs.size() * std::max(1, configuration.desiredLogRouterCount / std::max(1,recr.tLogs.size())); logSystem->expectedLogSets++; } else { logSystem->logRouterTags = 0; @@ -1620,9 +1617,15 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted 0) { logSystem->tLogs.push_back( Reference( new LogSet() ) ); - logSystem->tLogs[1]->tLogWriteAntiQuorum = region.satelliteTLogWriteAntiQuorum; - logSystem->tLogs[1]->tLogReplicationFactor = region.satelliteTLogReplicationFactor; - logSystem->tLogs[1]->tLogPolicy = region.satelliteTLogPolicy; + if(recr.satelliteFallback) { + logSystem->tLogs[1]->tLogWriteAntiQuorum = region.satelliteTLogWriteAntiQuorumFallback; + logSystem->tLogs[1]->tLogReplicationFactor = region.satelliteTLogReplicationFactorFallback; + logSystem->tLogs[1]->tLogPolicy = region.satelliteTLogPolicyFallback; + } else { + logSystem->tLogs[1]->tLogWriteAntiQuorum = region.satelliteTLogWriteAntiQuorum; + logSystem->tLogs[1]->tLogReplicationFactor = region.satelliteTLogReplicationFactor; + logSystem->tLogs[1]->tLogPolicy = region.satelliteTLogPolicy; + } logSystem->tLogs[1]->isLocal = true; logSystem->tLogs[1]->locality = tagLocalitySatellite; logSystem->tLogs[1]->startVersion = oldLogSystem->knownCommittedVersion + 1; @@ -1710,7 +1713,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCountedrecruitmentID; req.storeType = configuration.tLogDataStoreType; req.recoverFrom = oldLogSystem->getLogSystemConfig(); - req.recoverAt = oldLogSystem->epochEndVersion.get(); + req.recoverAt = oldLogSystem->recoverAt.get(); req.knownCommittedVersion = oldLogSystem->knownCommittedVersion; req.epoch = recoveryCount; req.locality = primaryLocality; @@ -1753,7 +1756,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCountedrecruitmentID; req.storeType = configuration.tLogDataStoreType; req.recoverFrom = oldLogSystem->getLogSystemConfig(); - req.recoverAt = oldLogSystem->epochEndVersion.get(); + req.recoverAt = oldLogSystem->recoverAt.get(); req.knownCommittedVersion = oldLogSystem->knownCommittedVersion; req.epoch = recoveryCount; req.locality = tagLocalitySatellite; @@ -2045,11 +2048,11 @@ Future ILogSystem::recoverAndEndEpoch(Reference ILogSystem::fromLogSystemConfig( UID const& dbgid, struct LocalityData const& locality, struct LogSystemConfig const& conf, bool excludeRemote, bool usePreviousEpochEnd, Optional>> addActor ) { +Reference ILogSystem::fromLogSystemConfig( UID const& dbgid, struct LocalityData const& locality, struct LogSystemConfig const& conf, bool excludeRemote, bool useRecoveredAt, Optional>> addActor ) { if (conf.logSystemType == 0) return Reference(); else if (conf.logSystemType == 2) - return TagPartitionedLogSystem::fromLogSystemConfig( dbgid, locality, conf, excludeRemote, usePreviousEpochEnd, addActor ); + return TagPartitionedLogSystem::fromLogSystemConfig( dbgid, locality, conf, excludeRemote, useRecoveredAt, addActor ); else throw internal_error(); } @@ -2063,6 +2066,6 @@ Reference ILogSystem::fromOldLogSystemConfig( UID const& dbgid, stru throw internal_error(); } -Reference ILogSystem::fromServerDBInfo( UID const& dbgid, ServerDBInfo const& dbInfo, bool usePreviousEpochEnd, Optional>> addActor ) { - return fromLogSystemConfig( dbgid, dbInfo.myLocality, dbInfo.logSystemConfig, false, usePreviousEpochEnd, addActor ); +Reference ILogSystem::fromServerDBInfo( UID const& dbgid, ServerDBInfo const& dbInfo, bool useRecoveredAt, Optional>> addActor ) { + return fromLogSystemConfig( dbgid, dbInfo.myLocality, dbInfo.logSystemConfig, false, useRecoveredAt, addActor ); } diff --git a/fdbserver/WorkerInterface.h b/fdbserver/WorkerInterface.h index 7913bd52c2..99cf8c013b 100644 --- a/fdbserver/WorkerInterface.h +++ b/fdbserver/WorkerInterface.h @@ -227,7 +227,7 @@ struct SetMetricsLogRateRequest { struct EventLogRequest { bool getLastError; Standalone eventName; - ReplyPromise< Standalone > reply; + ReplyPromise< TraceEventFields > reply; EventLogRequest() : getLastError(true) {} explicit EventLogRequest( Standalone eventName ) : eventName( eventName ), getLastError( false ) {} diff --git a/fdbserver/masterserver.actor.cpp b/fdbserver/masterserver.actor.cpp index ce1a68ef29..9570f69955 100644 --- a/fdbserver/masterserver.actor.cpp +++ b/fdbserver/masterserver.actor.cpp @@ -311,7 +311,7 @@ ACTOR Future newTLogServers( Reference self, RecruitFromConfig self->dcId_locality[remoteDcId] = loc; } - Future fRemoteWorkers = brokenPromiseToNever( self->clusterController.recruitRemoteFromConfiguration.getReply( RecruitRemoteFromConfigurationRequest( self->configuration, remoteDcId, recr.tLogs.size() ) ) ); + Future fRemoteWorkers = brokenPromiseToNever( self->clusterController.recruitRemoteFromConfiguration.getReply( RecruitRemoteFromConfigurationRequest( self->configuration, remoteDcId, recr.tLogs.size() * std::max(1, self->configuration.desiredLogRouterCount / std::max(1, recr.tLogs.size())) ) ) ); Reference newLogSystem = wait( oldLogSystem->newEpoch( recr, fRemoteWorkers, self->configuration, self->cstate.myDBState.recoveryCount + 1, self->dcId_locality[recr.dcId], self->dcId_locality[remoteDcId], self->allTags, self->recruitmentStalled ) ); self->logSystem = newLogSystem; diff --git a/fdbserver/storageserver.actor.cpp b/fdbserver/storageserver.actor.cpp index 324b440363..2407009cdd 100644 --- a/fdbserver/storageserver.actor.cpp +++ b/fdbserver/storageserver.actor.cpp @@ -349,6 +349,8 @@ public: NotifiedVersion oldestVersion; // See also storageVersion() NotifiedVersion durableVersion; // At least this version will be readable from storage after a power failure + int64_t versionLag; // An estimate for how many versions it takes for the data to move from the logs to this storage server + uint64_t logProtocol; Reference logSystem; @@ -366,6 +368,7 @@ public: AsyncMap watches; int64_t watchBytes; + int64_t numWatches; AsyncVar noRecentUpdates; double lastUpdate; @@ -400,9 +403,10 @@ public: struct Counters { CounterCollection cc; - Counter allQueries, getKeyQueries, getValueQueries, getRangeQueries, finishedQueries, rowsQueried, bytesQueried; + Counter allQueries, getKeyQueries, getValueQueries, getRangeQueries, finishedQueries, rowsQueried, bytesQueried, watchQueries; Counter bytesInput, bytesDurable, bytesFetched, mutationBytes; // Like bytesInput but without MVCC accounting + Counter mutations, setMutations, clearRangeMutations, atomicMutations; Counter updateBatches, updateVersions; Counter loops; @@ -415,10 +419,15 @@ public: finishedQueries("FinishedQueries", cc), rowsQueried("RowsQueried", cc), bytesQueried("BytesQueried", cc), + watchQueries("WatchQueries", cc), bytesInput("BytesInput", cc), bytesDurable("BytesDurable", cc), bytesFetched("BytesFetched", cc), mutationBytes("MutationBytes", cc), + mutations("Mutations", cc), + setMutations("SetMutations", cc), + clearRangeMutations("ClearRangeMutations", cc), + atomicMutations("AtomicMutations", cc), updateBatches("UpdateBatches", cc), updateVersions("UpdateVersions", cc), loops("Loops", cc) @@ -428,6 +437,7 @@ public: specialCounter(cc, "StorageVersion", [self](){ return self->storageVersion(); }); specialCounter(cc, "DurableVersion", [self](){ return self->durableVersion.get(); }); specialCounter(cc, "DesiredOldestVersion", [self](){ return self->desiredOldestVersion.get(); }); + specialCounter(cc, "VersionLag", [self](){ return self->versionLag; }); specialCounter(cc, "FetchKeysFetchActive", [self](){ return self->fetchKeysParallelismLock.activePermits(); }); specialCounter(cc, "FetchKeysWaiting", [self](){ return self->fetchKeysParallelismLock.waiters(); }); @@ -435,6 +445,8 @@ public: specialCounter(cc, "QueryQueueMax", [self](){ return self->getAndResetMaxQueryQueueSize(); }); specialCounter(cc, "BytesStored", [self](){ return self->metrics.byteSample.getEstimate(allKeys); }); + specialCounter(cc, "ActiveWatches", [self](){ return self->numWatches; }); + specialCounter(cc, "WatchBytes", [self](){ return self->watchBytes; }); specialCounter(cc, "KvstoreBytesUsed", [self](){ return self->storage.getStorageBytes().used; }); specialCounter(cc, "KvstoreBytesFree", [self](){ return self->storage.getStorageBytes().free; }); @@ -447,10 +459,11 @@ public: : instanceID(g_random->randomUniqueID().first()), storage(this, storage), db(db), lastTLogVersion(0), lastVersionWithData(0), restoredVersion(0), + versionLag(0), updateEagerReads(0), shardChangeCounter(0), fetchKeysParallelismLock(SERVER_KNOBS->FETCH_KEYS_PARALLELISM_BYTES), - shuttingDown(false), debug_inApplyUpdate(false), debug_lastValidateTime(0), watchBytes(0), + shuttingDown(false), debug_inApplyUpdate(false), debug_lastValidateTime(0), watchBytes(0), numWatches(0), logProtocol(0), counters(this), tag(invalidTag), maxQueryQueue(0), thisServerID(ssi.id()), readQueueSizeMetric(LiteralStringRef("StorageServer.ReadQueueSize")), behind(false), byteSampleClears(false, LiteralStringRef("\xff\xff\xff")), noRecentUpdates(false), @@ -760,6 +773,8 @@ ACTOR Future getValueQ( StorageServer* data, GetValueRequest req ) { ACTOR Future watchValue_impl( StorageServer* data, WatchValueRequest req ) { try { + ++data->counters.watchQueries; + if( req.debugID.present() ) g_traceBatch.addEvent("WatchValueDebug", req.debugID.get().first(), "watchValueQ.Before"); //.detail("TaskID", g_network->getCurrentTask()); @@ -792,11 +807,14 @@ ACTOR Future watchValue_impl( StorageServer* data, WatchValueRequest req ) return Void(); } + ++data->numWatches; data->watchBytes += ( req.key.expectedSize() + req.value.expectedSize() + 1000 ); try { Void _ = wait( watchFuture ); + --data->numWatches; data->watchBytes -= ( req.key.expectedSize() + req.value.expectedSize() + 1000 ); } catch( Error &e ) { + --data->numWatches; data->watchBytes -= ( req.key.expectedSize() + req.value.expectedSize() + 1000 ); throw; } @@ -1809,6 +1827,9 @@ ACTOR Future fetchKeys( StorageServer *data, AddingShard* shard ) { state int debug_nextRetryToLog = 1; state bool isTooOld = false; + //FIXME: The client cache does not notice when servers are added to a team. To read from a local storage server we must refresh the cache manually. + data->cx->invalidateCache(keys); + loop { try { TEST(true); // Fetching keys for transferred shard @@ -1888,6 +1909,11 @@ ACTOR Future fetchKeys( StorageServer *data, AddingShard* shard ) { // Throw away deferred updates from before fetchVersion, since we don't need them to use blocks fetched at that version while (!shard->updates.empty() && shard->updates[0].version <= fetchVersion) shard->updates.pop_front(); + //FIXME: remove when we no longer support upgrades from 5.X + if(debug_getRangeRetries >= 100) { + data->cx->enableLocalityLoadBalance = false; + } + debug_getRangeRetries++; if (debug_nextRetryToLog==debug_getRangeRetries){ debug_nextRetryToLog += std::min(debug_nextRetryToLog, 1024); @@ -1902,6 +1928,9 @@ ACTOR Future fetchKeys( StorageServer *data, AddingShard* shard ) { } } + //FIXME: remove when we no longer support upgrades from 5.X + data->cx->enableLocalityLoadBalance = true; + // We have completed the fetch and write of the data, now we wait for MVCC window to pass. // As we have finished this work, we will allow more work to start... shard->fetchComplete.send(Void()); @@ -2354,6 +2383,7 @@ ACTOR Future update( StorageServer* data, bool* pReceivedUpdate ) ++data->counters.updateBatches; data->lastTLogVersion = cursor->getMaxKnownVersion(); + data->versionLag = std::max(0, data->lastTLogVersion - data->version.get()); ASSERT(*pReceivedUpdate == false); *pReceivedUpdate = true; @@ -2477,6 +2507,28 @@ ACTOR Future update( StorageServer* data, bool* pReceivedUpdate ) updater.applyMutation(data, msg, ver); data->counters.mutationBytes += msg.totalSize(); + ++data->counters.mutations; + switch(msg.type) { + case MutationRef::SetValue: + ++data->counters.setMutations; + break; + case MutationRef::ClearRange: + ++data->counters.clearRangeMutations; + break; + case MutationRef::AddValue: + case MutationRef::And: + case MutationRef::AndV2: + case MutationRef::AppendIfFits: + case MutationRef::ByteMax: + case MutationRef::ByteMin: + case MutationRef::Max: + case MutationRef::Min: + case MutationRef::MinV2: + case MutationRef::Or: + case MutationRef::Xor: + ++data->counters.atomicMutations; + break; + } } else TraceEvent(SevError, "DiscardingPeekedData", data->thisServerID).detail("Mutation", msg.toString()).detail("Version", cloneCursor2->version().toString()); @@ -3140,8 +3192,8 @@ ACTOR Future storageServerCore( StorageServer* self, StorageServerInterfac if( self->db->get().recoveryState >= RecoveryState::FULLY_RECOVERED ) { self->logSystem = ILogSystem::fromServerDBInfo( self->thisServerID, self->db->get() ); if (self->logSystem) { - if(self->logSystem->getLogSystemConfig().oldTLogs.size()) { - self->poppedAllAfter = self->logSystem->getLogSystemConfig().oldTLogs[0].epochEnd; + if(self->db->get().logSystemConfig.recoveredAt.present()) { + self->poppedAllAfter = self->db->get().logSystemConfig.recoveredAt.get(); } self->logCursor = self->logSystem->peekSingle( self->thisServerID, self->version.get() + 1, self->tag, self->history ); self->popVersion( self->durableVersion.get() + 1, true ); diff --git a/fdbserver/worker.actor.cpp b/fdbserver/worker.actor.cpp index dbe6057fcc..038b379f07 100644 --- a/fdbserver/worker.actor.cpp +++ b/fdbserver/worker.actor.cpp @@ -820,11 +820,11 @@ ACTOR Future workerServer( Reference connFile, Refe } } when( EventLogRequest req = waitNext(interf.eventLogRequest.getFuture()) ) { - Standalone e; + TraceEventFields e; if( req.getLastError ) - e = StringRef( latestEventCache.getLatestError() ); + e = latestEventCache.getLatestError(); else - e = StringRef( latestEventCache.get( req.eventName.toString() ) ); + e = latestEventCache.get( req.eventName.toString() ); req.reply.send(e); } when( TraceBatchDumpRequest req = waitNext(interf.traceBatchDumpRequest.getFuture()) ) { diff --git a/fdbserver/workloads/ConsistencyCheck.actor.cpp b/fdbserver/workloads/ConsistencyCheck.actor.cpp index 1cdceb5a4f..53128ccfee 100644 --- a/fdbserver/workloads/ConsistencyCheck.actor.cpp +++ b/fdbserver/workloads/ConsistencyCheck.actor.cpp @@ -369,29 +369,22 @@ struct ConsistencyCheckWorkload : TestWorkload ACTOR Future getKeyLocations(Database cx, vector>> shards, ConsistencyCheckWorkload *self, Promise>> keyLocationPromise) { state Standalone> keyLocations; - state Key beginKey = allKeys.begin; + state Key beginKey = allKeys.begin.withPrefix(keyServersPrefix); + state Key endKey = allKeys.end.withPrefix(keyServersPrefix); state int i = 0; //If the responses are too big, we may use multiple requests to get the key locations. Each request begins where the last left off for ( ; i < shards.size(); i++) { - // skip serverList shards - if (!shards[i].first.begin.startsWith(keyServersPrefix)) { - break; - } - - state Key endKey = shards[i].first.end.startsWith(keyServersPrefix) ? shards[i].first.end.removePrefix(keyServersPrefix) : allKeys.end; - - while(beginKey < endKey) + while(beginKey < shards[i].first.end) { try { Version version = wait(self->getVersion(cx, self)); GetKeyValuesRequest req; - Key prefixBegin = beginKey.withPrefix(keyServersPrefix); - req.begin = firstGreaterOrEqual(prefixBegin); - req.end = firstGreaterOrEqual(keyServersEnd); + req.begin = firstGreaterOrEqual(beginKey); + req.end = firstGreaterOrEqual(std::min(shards[i].first.end, endKey)); req.limit = SERVER_KNOBS->MOVE_KEYS_KRM_LIMIT; req.limitBytes = SERVER_KNOBS->MOVE_KEYS_KRM_LIMIT_BYTES; req.version = version; @@ -443,17 +436,26 @@ struct ConsistencyCheckWorkload : TestWorkload } auto keyValueResponse = keyValueFutures[firstValidStorageServer].get().get(); - Standalone currentLocations = krmDecodeRanges( keyServersPrefix, KeyRangeRef(beginKey, endKey), RangeResultRef( keyValueResponse.data, keyValueResponse.more) ); + Standalone currentLocations = krmDecodeRanges( keyServersPrefix, KeyRangeRef(beginKey.removePrefix(keyServersPrefix), std::min(shards[i].first.end, endKey).removePrefix(keyServersPrefix)), RangeResultRef( keyValueResponse.data, keyValueResponse.more) ); - //Push all but the last item, which will be pushed as the first item next iteration - keyLocations.append_deep(keyLocations.arena(), currentLocations.begin(), currentLocations.size() - 1); + if(keyValueResponse.data.size() && beginKey == keyValueResponse.data[0].key) { + keyLocations.push_back_deep(keyLocations.arena(), currentLocations[0]); + } + + if(currentLocations.size() > 2) { + keyLocations.append_deep(keyLocations.arena(), ¤tLocations[1], currentLocations.size() - 2); + } //Next iteration should pick up where we left off ASSERT(currentLocations.size() > 1); - beginKey = currentLocations.end()[-1].key; + if(!keyValueResponse.more) { + beginKey = shards[i].first.end; + } else { + beginKey = keyValueResponse.data.end()[-1].key; + } //If this is the last iteration, then push the allKeys.end KV pair - if(beginKey == allKeys.end) + if(beginKey >= endKey) keyLocations.push_back_deep(keyLocations.arena(), currentLocations.end()[-1]); } catch(Error &e) @@ -626,7 +628,7 @@ struct ConsistencyCheckWorkload : TestWorkload }*/ //In a quiescent database, check that the team size is the same as the desired team size - if(self->firstClient && self->performQuiescentChecks && sourceStorageServers.size() != configuration.storageTeamSize) + if(self->firstClient && self->performQuiescentChecks && sourceStorageServers.size() != configuration.usableRegions*configuration.storageTeamSize) { TraceEvent("ConsistencyCheck_InvalidTeamSize").detail("ShardBegin", printable(range.begin)).detail("ShardEnd", printable(range.end)).detail("TeamSize", sourceStorageServers.size()).detail("DesiredTeamSize", configuration.storageTeamSize); self->testFailure("Invalid team size"); @@ -971,7 +973,7 @@ struct ConsistencyCheckWorkload : TestWorkload //Min and max shard sizes have a 3 * shardBounds.permittedError.bytes cushion for error since shard sizes are not precise //Shard splits ignore the first key in a shard, so its size shouldn't be considered when checking the upper bound //0xff shards are not checked - if( canSplit && self->performQuiescentChecks && !range.begin.startsWith(keyServersPrefix) && + if( canSplit && sampledKeys > 5 && self->performQuiescentChecks && !range.begin.startsWith(keyServersPrefix) && (sampledBytes < shardBounds.min.bytes - 3 * shardBounds.permittedError.bytes || sampledBytes - firstKeySampledBytes > shardBounds.max.bytes + 3 * shardBounds.permittedError.bytes)) { TraceEvent("ConsistencyCheck_InvalidShardSize").detail("Min", shardBounds.min.bytes).detail("Max", shardBounds.max.bytes).detail("Size", shardBytes) diff --git a/fdbserver/workloads/DDMetrics.actor.cpp b/fdbserver/workloads/DDMetrics.actor.cpp index a956afd3b2..0483b6de00 100644 --- a/fdbserver/workloads/DDMetrics.actor.cpp +++ b/fdbserver/workloads/DDMetrics.actor.cpp @@ -41,10 +41,10 @@ struct DDMetricsWorkload : TestWorkload { WorkerInterface masterWorker = wait(getMasterWorker(cx, self->dbInfo)); TraceEvent("GetHighPriorityReliocationsInFlight").detail("Database", printable(cx->dbName)).detail("Stage", "ContactingMaster"); - Standalone md = wait( timeoutError(masterWorker.eventLogRequest.getReply( + TraceEventFields md = wait( timeoutError(masterWorker.eventLogRequest.getReply( EventLogRequest( StringRef( cx->dbName.toString() + "/MovingData" ) ) ), 1.0 ) ); int relocations; - sscanf(extractAttribute(md.toString(), "HighPriorityRelocations").c_str(), "%d", &relocations); + sscanf(md.getValue("HighPriorityRelocations").c_str(), "%d", &relocations); return relocations; } diff --git a/fdbserver/workloads/WorkerErrors.actor.cpp b/fdbserver/workloads/WorkerErrors.actor.cpp index b575901b3a..2b659011e8 100644 --- a/fdbserver/workloads/WorkerErrors.actor.cpp +++ b/fdbserver/workloads/WorkerErrors.actor.cpp @@ -42,17 +42,17 @@ struct WorkerErrorsWorkload : TestWorkload { virtual void getMetrics( vector& m ) {} - ACTOR Future< std::vector< std::string > > latestEventOnWorkers( std::vector> workers ) { - state vector>> eventTraces; + ACTOR Future< std::vector< TraceEventFields > > latestEventOnWorkers( std::vector> workers ) { + state vector> eventTraces; for(int c = 0; c < workers.size(); c++) { eventTraces.push_back( workers[c].first.eventLogRequest.getReply( EventLogRequest() ) ); } Void _ = wait( timeoutError( waitForAll( eventTraces ), 2.0 ) ); - vector results; + vector results; for(int i = 0; i < eventTraces.size(); i++) { - results.push_back( eventTraces[i].get().toString() ); + results.push_back( eventTraces[i].get() ); } return results; @@ -60,9 +60,9 @@ struct WorkerErrorsWorkload : TestWorkload { ACTOR Future _start(Database cx, WorkerErrorsWorkload *self) { state vector> workers = wait( getWorkers( self->dbInfo ) ); - std::vector errors = wait( self->latestEventOnWorkers( workers ) ); + std::vector errors = wait( self->latestEventOnWorkers( workers ) ); for(auto e : errors) { - printf("%s\n", e.c_str()); + printf("%s\n", e.toString().c_str()); } return Void(); } diff --git a/flow/FileTraceLogWriter.cpp b/flow/FileTraceLogWriter.cpp new file mode 100644 index 0000000000..069b727164 --- /dev/null +++ b/flow/FileTraceLogWriter.cpp @@ -0,0 +1,187 @@ +/* + * FileTraceLogWriter.cpp + * + * This source file is part of the FoundationDB open source project + * + * Copyright 2018 Apple Inc. and the FoundationDB project authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#include "FileTraceLogWriter.h" +#include "flow.h" +#include "ThreadHelper.actor.h" + +#if defined(__unixish__) +#define __open ::open +#define __write ::write +#define __close ::close +#define __fsync ::fsync +#define TRACEFILE_FLAGS O_WRONLY | O_CREAT | O_EXCL +#define TRACEFILE_MODE 0664 +#elif defined(_WIN32) +#include +#undef max +#undef min +#include +#include +#include +#define __open _open +#define __write _write +#define __close _close +#define __fsync _commit +#define TRACEFILE_FLAGS _O_WRONLY | _O_CREAT | _O_EXCL +#define TRACEFILE_MODE _S_IWRITE +#endif + +#include + +FileTraceLogWriter::FileTraceLogWriter(std::string directory, std::string processName, std::string basename, std::string extension, uint64_t maxLogsSize, std::function onError) + : directory(directory), processName(processName), basename(basename), extension(extension), maxLogsSize(maxLogsSize), traceFileFD(-1), index(0), onError(onError) {} + +void FileTraceLogWriter::addref() { + ReferenceCounted::addref(); +} + +void FileTraceLogWriter::delref() { + ReferenceCounted::delref(); +} + +void FileTraceLogWriter::lastError(int err) { + // Whenever we get a serious error writing a trace log, all flush barriers posted between the operation encountering + // the error and the occurrence of the error are unblocked, even though we haven't actually succeeded in flushing. + // Otherwise a permanent write error would make the program block forever. + if (err != 0 && err != EINTR) { + onError(); + } +} + +void FileTraceLogWriter::write(const std::string& str) { + auto ptr = str.c_str(); + int remaining = str.size(); + + while ( remaining ) { + int ret = __write( traceFileFD, ptr, remaining ); + if ( ret > 0 ) { + lastError(0); + remaining -= ret; + ptr += ret; + } else { + lastError(errno); + threadSleep(0.1); + } + } +} + +void FileTraceLogWriter::open() { + cleanupTraceFiles(); + + auto finalname = format("%s.%d.%s", basename.c_str(), ++index, extension.c_str()); + while ( (traceFileFD = __open( finalname.c_str(), TRACEFILE_FLAGS, TRACEFILE_MODE )) == -1 ) { + lastError(errno); + if (errno == EEXIST) + finalname = format("%s.%d.%s", basename.c_str(), ++index, extension.c_str()); + else { + fprintf(stderr, "ERROR: could not create trace log file `%s' (%d: %s)\n", finalname.c_str(), errno, strerror(errno)); + + int errorNum = errno; + onMainThreadVoid([finalname, errorNum]{ + TraceEvent(SevWarnAlways, "TraceFileOpenError") + .detail("Filename", finalname) + .detail("ErrorCode", errorNum) + .detail("Error", strerror(errorNum)) + .trackLatest("TraceFileOpenError"); }, NULL); + threadSleep(FLOW_KNOBS->TRACE_RETRY_OPEN_INTERVAL); + } + } + onMainThreadVoid([]{ latestEventCache.clear("TraceFileOpenError"); }, NULL); + lastError(0); +} + +void FileTraceLogWriter::close() { + if (traceFileFD >= 0) { + while ( __close(traceFileFD) ) threadSleep(0.1); + } +} + +void FileTraceLogWriter::roll() { + close(); + open(); +} + +void FileTraceLogWriter::sync() { + __fsync(traceFileFD); +} + +void FileTraceLogWriter::extractTraceFileNameInfo(std::string const& filename, std::string &root, int &index) { + int split = filename.find_last_of('.', filename.size() - 5); + root = filename.substr(0, split); + if(sscanf(filename.substr(split + 1, filename.size() - split - 4).c_str(), "%d", &index) == EOF) { + index = -1; + } +} + +bool FileTraceLogWriter::compareTraceFileName (std::string const& f1, std::string const& f2) { + std::string root1; + std::string root2; + + int index1; + int index2; + + extractTraceFileNameInfo(f1, root1, index1); + extractTraceFileNameInfo(f2, root2, index2); + + if(root1 != root2) + return root1 < root2; + if(index1 != index2) + return index1 < index2; + + return f1 < f2; +} + +bool FileTraceLogWriter::reverseCompareTraceFileName(std::string f1, std::string f2) { + return compareTraceFileName(f2, f1); +} + +void FileTraceLogWriter::cleanupTraceFiles() { + // Setting maxLogsSize=0 disables trace file cleanup based on dir size + if(!g_network->isSimulated() && maxLogsSize > 0) { + try { + std::vector existingFiles = platform::listFiles(directory, extension); + std::vector existingTraceFiles; + + for(auto f = existingFiles.begin(); f != existingFiles.end(); ++f) { + if(f->substr(0, processName.length()) == processName) { + existingTraceFiles.push_back(*f); + } + } + + // reverse sort, so we preserve the most recent files and delete the oldest + std::sort(existingTraceFiles.begin(), existingTraceFiles.end(), FileTraceLogWriter::reverseCompareTraceFileName); + + int64_t runningTotal = 0; + std::vector::iterator fileListIterator = existingTraceFiles.begin(); + + while(runningTotal < maxLogsSize && fileListIterator != existingTraceFiles.end()) { + runningTotal += (fileSize(joinPath(directory, *fileListIterator)) + FLOW_KNOBS->ZERO_LENGTH_FILE_PAD); + ++fileListIterator; + } + + while(fileListIterator != existingTraceFiles.end()) { + deleteFile(joinPath(directory, *fileListIterator)); + ++fileListIterator; + } + } catch( Error & ) {} + } +} diff --git a/flow/FileTraceLogWriter.h b/flow/FileTraceLogWriter.h new file mode 100644 index 0000000000..423642849a --- /dev/null +++ b/flow/FileTraceLogWriter.h @@ -0,0 +1,65 @@ +/* + * FileTraceLogWriter.h + * + * This source file is part of the FoundationDB open source project + * + * Copyright 2018 Apple Inc. and the FoundationDB project authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#ifndef FLOW_FILE_TRACE_LOG_WRITER_H +#define FLOW_FILE_TRACE_LOG_WRITER_H +#pragma once + +#include "FastRef.h" +#include "Trace.h" + +#include + +class FileTraceLogWriter : public ITraceLogWriter, ReferenceCounted { +private: + std::string directory; + std::string processName; + std::string basename; + std::string extension; + + uint64_t maxLogsSize; + int traceFileFD; + int index; + + std::function onError; + +public: + FileTraceLogWriter(std::string directory, std::string processName, std::string basename, std::string extension, uint64_t maxLogsSize, std::function onError); + + void addref(); + void delref(); + + void lastError(int err); + + void write(const std::string& str); + void open(); + void close(); + void roll(); + void sync(); + + static void extractTraceFileNameInfo(std::string const& filename, std::string &root, int &index); + static bool compareTraceFileName (std::string const& f1, std::string const& f2); + static bool reverseCompareTraceFileName(std::string f1, std::string f2); + + void cleanupTraceFiles(); +}; + +#endif diff --git a/flow/Knobs.cpp b/flow/Knobs.cpp index 248c2d6f17..2845769a89 100644 --- a/flow/Knobs.cpp +++ b/flow/Knobs.cpp @@ -115,10 +115,12 @@ FlowKnobs::FlowKnobs(bool randomize, bool isSimulated) { init( TRACE_RETRY_OPEN_INTERVAL, 1.00 ); init( MIN_TRACE_SEVERITY, isSimulated ? 0 : 10 ); // Related to the trace severity in Trace.h init( MAX_TRACE_SUPPRESSIONS, 1e4 ); - init( TRACE_FSYNC_ENABLED, 0 ); - init( TRACE_EVENT_METRIC_UNITS_PER_SAMPLE, 500 ); - init( TRACE_EVENT_THROTLLER_SAMPLE_EXPIRY, 1800.0 ); // 30 mins - init( TRACE_EVENT_THROTTLER_MSG_LIMIT, 20000 ); + init( TRACE_SYNC_ENABLED, 0 ); + init( TRACE_EVENT_METRIC_UNITS_PER_SAMPLE, 500 ); + init( TRACE_EVENT_THROTTLER_SAMPLE_EXPIRY, 1800.0 ); // 30 mins + init( TRACE_EVENT_THROTTLER_MSG_LIMIT, 20000 ); + init( TRACE_EVENT_MAX_SIZE, 4000 ); + init( TRACE_LOG_MAX_PREOPEN_BUFFER, 1000000 ); //TDMetrics init( MAX_METRICS, 600 ); diff --git a/flow/Knobs.h b/flow/Knobs.h index fa12bc409c..b41e046eab 100644 --- a/flow/Knobs.h +++ b/flow/Knobs.h @@ -137,10 +137,12 @@ public: double TRACE_RETRY_OPEN_INTERVAL; int MIN_TRACE_SEVERITY; int MAX_TRACE_SUPPRESSIONS; - int TRACE_FSYNC_ENABLED; + int TRACE_SYNC_ENABLED; int TRACE_EVENT_METRIC_UNITS_PER_SAMPLE; - int TRACE_EVENT_THROTLLER_SAMPLE_EXPIRY; + int TRACE_EVENT_THROTTLER_SAMPLE_EXPIRY; int TRACE_EVENT_THROTTLER_MSG_LIMIT; + int TRACE_EVENT_MAX_SIZE; + int TRACE_LOG_MAX_PREOPEN_BUFFER; //TDMetrics int64_t MAX_METRIC_SIZE; diff --git a/flow/Platform.cpp b/flow/Platform.cpp index 5a1f913a7e..d7f5c6e897 100644 --- a/flow/Platform.cpp +++ b/flow/Platform.cpp @@ -2317,6 +2317,7 @@ extern void flushTraceFileVoid(); extern "C" void flushAndExit(int exitCode) { flushTraceFileVoid(); fflush(stdout); + closeTraceFile(); #ifdef _WIN32 // This function is documented as being asynchronous, but we suspect it might actually be synchronous in the // case that it is passed a handle to the current process. If not, then there may be cases where we escalate diff --git a/flow/Platform.h b/flow/Platform.h index d725fdb0cf..dcaaaabd10 100644 --- a/flow/Platform.h +++ b/flow/Platform.h @@ -501,6 +501,16 @@ inline static void* aligned_alloc(size_t alignment, size_t size) { return memali #elif defined(__APPLE__) #include inline static void* aligned_alloc(size_t alignment, size_t size) { + // Linux's aligned_alloc() requires alignment to be a power of 2. While posix_memalign() + // also requires this, in addition it requires alignment to be a multiple of sizeof(void *). + // Rather than add this requirement to the platform::aligned_alloc() interface we will simply + // upgrade powers of 2 which are less than sizeof(void *) to be exactly sizeof(void *). Non + // powers of 2 of any size will fail as they would on other platforms. This change does not + // break the platform::aligned_alloc() contract as all addresses which are aligned to + // sizeof(void *) are also aligned to any power of 2 less than sizeof(void *). + if(alignment != 0 && alignment < sizeof(void *) && (alignment & (alignment - 1)) == 0) { + alignment = sizeof(void *); + } void* ptr = nullptr; posix_memalign(&ptr, alignment, size); return ptr; diff --git a/flow/Trace.cpp b/flow/Trace.cpp index 56f2429457..740bd89c39 100644 --- a/flow/Trace.cpp +++ b/flow/Trace.cpp @@ -20,15 +20,12 @@ #include "Trace.h" +#include "FileTraceLogWriter.h" +#include "XmlTraceLogFormatter.h" #include "flow.h" #include "DeterministicRandom.h" #include #include -#ifdef WIN32 -#include -#undef max -#undef min -#endif #include #include "IThreadPool.h" @@ -38,24 +35,10 @@ #include "TDMetric.actor.h" #include "MetricSample.h" -#include -#if defined(__unixish__) -#define __open ::open -#define __write ::write -#define __close ::close -#define __fsync ::fsync -#define TRACEFILE_FLAGS O_WRONLY | O_CREAT | O_EXCL -#define TRACEFILE_MODE 0664 -#elif defined(_WIN32) -#include -#include -#include -#define __open _open -#define __write _write -#define __close _close -#define __fsync _commit -#define TRACEFILE_FLAGS _O_WRONLY | _O_CREAT | _O_EXCL -#define TRACEFILE_MODE _S_IWRITE +#ifdef _WIN32 +#include +#undef max +#undef min #endif class DummyThreadPool : public IThreadPool, ReferenceCounted { @@ -105,6 +88,7 @@ struct SuppressionMap { // Returns -1 if this event is suppressed int64_t checkAndInsertSuppression(std::string type, double duration) { + ASSERT(g_network); if(suppressionMap.size() >= FLOW_KNOBS->MAX_TRACE_SUPPRESSIONS) { TraceEvent(SevWarnAlways, "ClearingTraceSuppressionMap"); suppressionMap.clear(); @@ -135,53 +119,36 @@ SuppressionMap suppressedEvents; static TransientThresholdMetricSample> *traceEventThrottlerCache; static const char *TRACE_EVENT_THROTTLE_STARTING_TYPE = "TraceEventThrottle_"; -void badField(const char *key, const char *type) { - fprintf(stderr, "Invalid trace event detail name: Type=%s, Field=%s\n", type, key); - //ASSERT_WE_THINK(false); -} - -void validateFieldName(const char *key, const char *type, bool allowUnderscores=false) { - if(g_network && g_network->isSimulated()) { - if((key[0] < 'A' || key[0] > 'Z') && key[0] != '_') { - badField(key, type); - return; - } - - const char* underscore = strchr(key, '_'); - while(underscore) { - if(!allowUnderscores || ((underscore[1] < 'A' || underscore[1] > 'Z') && key[0] != '_' && key[0] != '\0')) { - badField(key, type); - return; - } - - underscore = strchr(&underscore[1], '_'); - } - } -} - struct TraceLog { - Standalone< VectorRef > buffer; - int file_length; - int buffer_length; + +private: + Reference logWriter; + Reference formatter; + std::vector eventBuffer; + int loggedLength; + int bufferLength; bool opened; + int64_t preopenOverflowCount; std::string basename; std::string logGroup; std::string directory; std::string processName; - NetworkAddress localAddress; + Optional localAddress; Reference writer; uint64_t rollsize; Mutex mutex; - bool logTraceEventMetrics; EventMetricHandle SevErrorNames; EventMetricHandle SevWarnAlwaysNames; EventMetricHandle SevWarnNames; EventMetricHandle SevInfoNames; EventMetricHandle SevDebugNames; +public: + bool logTraceEventMetrics; + void initMetrics() { SevErrorNames.init(LiteralStringRef("TraceEvents.SevError")); @@ -224,84 +191,38 @@ struct TraceLog { Reference barriers; struct WriterThread : IThreadPoolReceiver { - WriterThread( std::string directory, std::string processName, uint32_t maxLogsSize, std::string basename, Reference barriers ) : directory(directory), processName(processName), maxLogsSize(maxLogsSize), basename(basename), traceFileFD(0), index(0), barriers(barriers) {} + WriterThread( Reference barriers, Reference logWriter, Reference formatter ) + : barriers(barriers), logWriter(logWriter), formatter(formatter) {} virtual void init() {} + Reference logWriter; + Reference formatter; Reference barriers; - int traceFileFD; - std::string directory; - std::string processName; - int maxLogsSize; - std::string basename; - int index; - - void lastError(int err) { - // Whenever we get a serious error writing a trace log, all flush barriers posted between the operation encountering - // the error and the occurrence of the error are unblocked, even though we haven't actually succeeded in flushing. - // Otherwise a permanent write error would make the program block forever. - if (err != 0 && err != EINTR) { - barriers->triggerAll(); - } - } - - void writeReliable(const uint8_t* buf, int count) { - auto ptr = buf; - int remaining = count; - - while ( remaining ) { - int ret = __write( traceFileFD, ptr, remaining ); - if ( ret > 0 ) { - lastError(0); - remaining -= ret; - ptr += ret; - } else { - lastError(errno); - threadSleep(0.1); - } - } - } - void writeReliable(const char* msg) { - writeReliable( (const uint8_t*)msg, strlen(msg) ); - } - - void handleTraceFileOpenError(const std::string& filename) - { - } struct Open : TypedAction { virtual double getTimeEstimate() { return 0; } }; void action( Open& o ) { - if (traceFileFD) { - writeReliable(""); - while ( __close(traceFileFD) ) threadSleep(0.1); - } + logWriter->open(); + logWriter->write(formatter->getHeader()); + } - cleanupTraceFiles(); + struct Close : TypedAction { + virtual double getTimeEstimate() { return 0; } + }; + void action( Close& c ) { + logWriter->write(formatter->getFooter()); + logWriter->close(); + } - auto finalname = format("%s.%d.xml", basename.c_str(), ++index); - while ( (traceFileFD = __open( finalname.c_str(), TRACEFILE_FLAGS, TRACEFILE_MODE )) == -1 ) { - lastError(errno); - if (errno == EEXIST) - finalname = format("%s.%d.xml", basename.c_str(), ++index); - else { - fprintf(stderr, "ERROR: could not create trace log file `%s' (%d: %s)\n", finalname.c_str(), errno, strerror(errno)); - - int errorNum = errno; - onMainThreadVoid([finalname, errorNum]{ - TraceEvent(SevWarnAlways, "TraceFileOpenError") - .detail("Filename", finalname) - .detail("ErrorCode", errorNum) - .detail("Error", strerror(errorNum)) - .trackLatest("TraceFileOpenError"); }, NULL); - threadSleep(FLOW_KNOBS->TRACE_RETRY_OPEN_INTERVAL); - } - } - onMainThreadVoid([]{ latestEventCache.clear("TraceFileOpenError"); }, NULL); - lastError(0); - - writeReliable( "\r\n\r\n" ); + struct Roll : TypedAction { + virtual double getTimeEstimate() { return 0; } + }; + void action( Roll& c ) { + logWriter->write(formatter->getFooter()); + logWriter->roll(); + logWriter->write(formatter->getHeader()); } struct Barrier : TypedAction { @@ -312,116 +233,106 @@ struct TraceLog { } struct WriteBuffer : TypedAction { - Standalone< VectorRef > buffer; + std::vector events; - WriteBuffer( Standalone< VectorRef > buffer ) : buffer(buffer) {} + WriteBuffer(std::vector events) : events(events) {} virtual double getTimeEstimate() { return .001; } }; void action( WriteBuffer& a ) { - if ( traceFileFD ) { - for ( auto i = a.buffer.begin(); i != a.buffer.end(); ++i ) - writeReliable( i->begin(), i->size() ); - - if(FLOW_KNOBS->TRACE_FSYNC_ENABLED) { - __fsync( traceFileFD ); - } - - a.buffer = Standalone< VectorRef >(); + for(auto event : a.events) { + event.validateFormat(); + logWriter->write(formatter->formatEvent(event)); } - } - void cleanupTraceFiles() { - // Setting maxLogsSize=0 disables trace file cleanup based on dir size - if(!g_network->isSimulated() && maxLogsSize > 0) { - try { - std::vector existingFiles = platform::listFiles(directory, ".xml"); - std::vector existingTraceFiles; - - for(auto f = existingFiles.begin(); f != existingFiles.end(); ++f) - if(f->substr(0, processName.length()) == processName) - existingTraceFiles.push_back(*f); - - // reverse sort, so we preserve the most recent files and delete the oldest - std::sort(existingTraceFiles.begin(), existingTraceFiles.end(), TraceLog::reverseCompareTraceFileName); - - int64_t runningTotal = 0; - std::vector::iterator fileListIterator = existingTraceFiles.begin(); - - while(runningTotal < maxLogsSize && fileListIterator != existingTraceFiles.end()) { - runningTotal += (fileSize(joinPath(directory, *fileListIterator)) + FLOW_KNOBS->ZERO_LENGTH_FILE_PAD); - ++fileListIterator; - } - - while(fileListIterator != existingTraceFiles.end()) { - deleteFile(joinPath(directory, *fileListIterator)); - ++fileListIterator; - } - } catch( Error & ) {} + if(FLOW_KNOBS->TRACE_SYNC_ENABLED) { + logWriter->sync(); } } }; - TraceLog() : buffer_length(0), file_length(0), opened(false), barriers(new BarrierList), logTraceEventMetrics(false) {} + TraceLog() : bufferLength(0), loggedLength(0), opened(false), preopenOverflowCount(0), barriers(new BarrierList), logTraceEventMetrics(false), formatter(new XmlTraceLogFormatter()) {} bool isOpen() const { return opened; } - void open( std::string const& directory, std::string const& processName, std::string const& timestamp, uint64_t rs, uint64_t maxLogsSize, NetworkAddress na ) { + void open( std::string const& directory, std::string const& processName, std::string logGroup, std::string const& timestamp, uint64_t rs, uint64_t maxLogsSize, Optional na ) { ASSERT( !writer && !opened ); this->directory = directory; this->processName = processName; + this->logGroup = logGroup; this->localAddress = na; basename = format("%s/%s.%s.%s", directory.c_str(), processName.c_str(), timestamp.c_str(), g_random->randomAlphaNumeric(6).c_str()); + logWriter = Reference(new FileTraceLogWriter(directory, processName, basename, formatter->getExtension(), maxLogsSize, [this](){ barriers->triggerAll(); })); if ( g_network->isSimulated() ) writer = Reference(new DummyThreadPool()); else writer = createGenericThreadPool(); - writer->addThread( new WriterThread(directory, processName, maxLogsSize, basename, barriers) ); + writer->addThread( new WriterThread(barriers, logWriter, formatter) ); rollsize = rs; auto a = new WriterThread::Open; writer->post(a); + MutexHolder holder(mutex); + if(g_network->isSimulated()) { + // We don't support early trace logs in simulation. + // This is because we don't know if we're being simulated prior to the network being created, which causes two ambiguities: + // + // 1. We need to employ two different methods to determine the time of an event prior to the network starting for real-world and simulated runs. + // 2. Simulated runs manually insert the Machine field at TraceEvent creation time. Real-world runs add this field at write time. + // + // Without the ability to resolve the ambiguity, we've chosen to always favor the real-world approach and not support such events in simulation. + eventBuffer.clear(); + } + + for(TraceEventFields &fields : eventBuffer) { + annotateEvent(fields); + } + opened = true; + if(preopenOverflowCount > 0) { + TraceEvent(SevWarn, "TraceLogPreopenOverflow").detail("OverflowEventCount", preopenOverflowCount); + preopenOverflowCount = 0; + } } - static void extractTraceFileNameInfo(std::string const& filename, std::string &root, int &index) { - int split = filename.find_last_of('.', filename.size() - 5); - root = filename.substr(0, split); - if(sscanf(filename.substr(split + 1, filename.size() - split - 4).c_str(), "%d", &index) == EOF) - index = -1; + void annotateEvent( TraceEventFields &fields ) { + if(localAddress.present()) { + fields.addField("Machine", format("%d.%d.%d.%d:%d", (localAddress.get().ip>>24)&0xff, (localAddress.get().ip>>16)&0xff, (localAddress.get().ip>>8)&0xff, localAddress.get().ip&0xff, localAddress.get().port)); + } + + fields.addField("LogGroup", logGroup); } - static bool compareTraceFileName (std::string const& f1, std::string const& f2) { - std::string root1; - std::string root2; - - int index1; - int index2; - - extractTraceFileNameInfo(f1, root1, index1); - extractTraceFileNameInfo(f2, root2, index2); - - if(root1 != root2) - return root1 < root2; - if(index1 != index2) - return index1 < index2; - - return f1 < f2; - } - - static bool reverseCompareTraceFileName(std::string f1, std::string f2) { - return compareTraceFileName(f2, f1); - } - - void write( const void* data, int length ) { + void writeEvent( TraceEventFields fields, std::string trackLatestKey, bool trackError ) { MutexHolder hold(mutex); + + if(opened) { + annotateEvent(fields); + } + + if(!trackLatestKey.empty()) { + fields.addField("TrackLatestType", "Original"); + } + + if(!isOpen() && (preopenOverflowCount > 0 || bufferLength + fields.sizeBytes() > FLOW_KNOBS->TRACE_LOG_MAX_PREOPEN_BUFFER)) { + ++preopenOverflowCount; + return; + } + // FIXME: What if we are using way too much memory for buffer? - buffer.push_back_deep( buffer.arena(), StringRef((const uint8_t*)data,length) ); - buffer_length += length; + eventBuffer.push_back(fields); + bufferLength += fields.sizeBytes(); + + if(trackError) { + latestEventCache.setLatestError(fields); + } + if(!trackLatestKey.empty()) { + latestEventCache.set(trackLatestKey, fields); + } } void log(int severity, const char *name, UID id, uint64_t event_ts) @@ -453,34 +364,43 @@ struct TraceLog { MutexHolder hold(mutex); bool roll = false; - if (!buffer.size()) return Void(); // SOMEDAY: maybe we still roll the tracefile here? + if (!eventBuffer.size()) return Void(); // SOMEDAY: maybe we still roll the tracefile here? - if (rollsize && buffer_length + file_length > rollsize) // SOMEDAY: more conditions to roll + if (rollsize && bufferLength + loggedLength > rollsize) // SOMEDAY: more conditions to roll roll = true; - auto a = new WriterThread::WriteBuffer( std::move(buffer) ); - file_length += buffer_length; - buffer = Standalone>(); - buffer_length = 0; + auto a = new WriterThread::WriteBuffer( std::move(eventBuffer) ); + loggedLength += bufferLength; + eventBuffer = std::vector(); + bufferLength = 0; writer->post( a ); if (roll) { - std::vector events = latestEventCache.getAllUnsafe(); + auto o = new WriterThread::Roll; + writer->post(o); + + std::vector events = latestEventCache.getAllUnsafe(); for (int idx = 0; idx < events.size(); idx++) { if(events[idx].size() > 0) { - int timeIndex = events[idx].find(" Time="); - ASSERT(timeIndex != events[idx].npos); + TraceEventFields rolledFields; + for(auto itr = events[idx].begin(); itr != events[idx].end(); ++itr) { + if(itr->first == "Time") { + rolledFields.addField("Time", format("%.6f", (g_trace_clock == TRACE_CLOCK_NOW) ? now() : timer())); + rolledFields.addField("OriginalTime", itr->second); + } + else if(itr->first == "TrackLatestType") { + rolledFields.addField("TrackLatestType", "Rolled"); + } + else { + rolledFields.addField(itr->first, itr->second); + } + } - double time = g_trace_clock == TRACE_CLOCK_NOW ? now() : timer(); - std::string rolledEvent = format("%s Time=\"%.6f\" Original%s", events[idx].substr(0, timeIndex).c_str(), time, events[idx].substr(timeIndex+1).c_str()); - - buffer.push_back_deep( buffer.arena(), StringRef(rolledEvent) ); + eventBuffer.push_back(rolledFields); } } - auto o = new WriterThread::Open; - file_length = 0; - writer->post( o ); + loggedLength = 0; } ThreadFuture f(new ThreadSingleAssignmentVar); @@ -492,14 +412,23 @@ struct TraceLog { void close() { if (opened) { - auto s = LiteralStringRef( "\r\n" ); - write( s.begin(), s.size() ); + MutexHolder hold(mutex); - auto f = flush(); + // Write remaining contents + auto a = new WriterThread::WriteBuffer( std::move(eventBuffer) ); + loggedLength += bufferLength; + eventBuffer = std::vector(); + bufferLength = 0; + writer->post( a ); - // If we are using a writer thread, try to wait for it to finish its work queue before killing it - // If it's encountering errors, we'll get past here without it actually finishing - if (writer) f.getBlocking(); + auto c = new WriterThread::Close(); + writer->post( c ); + + ThreadFuture f(new ThreadSingleAssignmentVar); + barriers->push(f); + writer->post( new WriterThread::Barrier ); + + f.getBlocking(); opened = false; } @@ -507,7 +436,7 @@ struct TraceLog { ~TraceLog() { close(); - if (writer) writer->addref(); // FIXME: We are not shutting down the writer thread at all, because the ThreadPool shutdown mechanism is blocking (necessarily waits for current work items to finish) and we might not be able to finish everything. Also we (already) weren't closing the file on shutdown anyway. + if (writer) writer->addref(); // FIXME: We are not shutting down the writer thread at all, because the ThreadPool shutdown mechanism is blocking (necessarily waits for current work items to finish) and we might not be able to finish everything. } }; @@ -520,7 +449,7 @@ NetworkAddress getAddressIndex() { } // This does not check for simulation, and as such is not safe for external callers -void clearPrefix_internal( std::map& data, std::string prefix ) { +void clearPrefix_internal( std::map& data, std::string prefix ) { auto first = data.lower_bound( prefix ); auto last = data.lower_bound( strinc( prefix ).toString() ); data.erase( first, last ); @@ -534,29 +463,29 @@ void LatestEventCache::clear() { latest[getAddressIndex()].clear(); } -void LatestEventCache::set( std::string tag, std::string contents ) { +void LatestEventCache::set( std::string tag, const TraceEventFields& contents ) { latest[getAddressIndex()][tag] = contents; } -std::string LatestEventCache::get( std::string tag ) { +TraceEventFields LatestEventCache::get( std::string tag ) { return latest[getAddressIndex()][tag]; } -std::vector allEvents( std::map const& data ) { - std::vector all; +std::vector allEvents( std::map const& data ) { + std::vector all; for(auto it = data.begin(); it != data.end(); it++) { all.push_back( it->second ); } return all; } -std::vector LatestEventCache::getAll() { +std::vector LatestEventCache::getAll() { return allEvents( latest[getAddressIndex()] ); } // if in simulation, all events from all machines will be returned -std::vector LatestEventCache::getAllUnsafe() { - std::vector all; +std::vector LatestEventCache::getAllUnsafe() { + std::vector all; for(auto it = latest.begin(); it != latest.end(); ++it) { auto m = allEvents( it->second ); all.insert( all.end(), m.begin(), m.end() ); @@ -564,13 +493,13 @@ std::vector LatestEventCache::getAllUnsafe() { return all; } -void LatestEventCache::setLatestError( std::string contents ) { +void LatestEventCache::setLatestError( const TraceEventFields& contents ) { if(TraceEvent::isNetworkThread()) { // The latest event cache doesn't track errors that happen on other threads latestErrors[getAddressIndex()] = contents; } } -std::string LatestEventCache::getLatestError() { +TraceEventFields LatestEventCache::getLatestError() { return latestErrors[getAddressIndex()]; } @@ -601,8 +530,7 @@ void openTraceFile(const NetworkAddress& na, uint64_t rollsize, uint64_t maxLogs baseOfBase = "trace"; std::string baseName = format("%s.%03d.%03d.%03d.%03d.%d", baseOfBase.c_str(), (na.ip>>24)&0xff, (na.ip>>16)&0xff, (na.ip>>8)&0xff, na.ip&0xff, na.port); - g_traceLog.logGroup = logGroup; - g_traceLog.open( directory, baseName, format("%lld", time(NULL)), rollsize, maxLogsSize, na ); + g_traceLog.open( directory, baseName, logGroup, format("%lld", time(NULL)), rollsize, maxLogsSize, !g_network->isSimulated() ? na : Optional()); // FIXME suppress.insert( LiteralStringRef( "TLogCommitDurable" ) ); @@ -632,7 +560,6 @@ bool traceFileIsOpen() { bool TraceEvent::isEnabled( const char* type, Severity severity ) { //if (!g_traceLog.isOpen()) return false; - if( !g_network ) return false; if( severity < FLOW_KNOBS->MIN_TRACE_SEVERITY) return false; StringRef s( (const uint8_t*)type, strlen(type) ); return !suppress.count(s); @@ -697,21 +624,47 @@ bool TraceEvent::init( Severity severity, const char* type ) { this->type = type; this->severity = severity; - tmpEventMetric = new DynamicEventMetric(MetricNameRef()); - tmpEventMetric->setField("Severity", (int64_t)severity); - validateFieldName(type, type, true); - length = 0; - if (isEnabled(type, severity)) { - enabled = true; - buffer[sizeof(buffer)-1]=0; - NetworkAddress local = g_network->isSimulated() ? g_network->getLocalAddress() : g_traceLog.localAddress; - double time = g_trace_clock == TRACE_CLOCK_NOW ? now() : timer(); - writef( ">24)&0xff,(local.ip>>16)&0xff,(local.ip>>8)&0xff,local.ip&0xff,local.port ); - } else - enabled = false; + enabled = isEnabled(type, severity); + + // Backstop to throttle very spammy trace events + if (enabled && g_network && !g_network->isSimulated() && severity > SevDebug && isNetworkThread()) { + if (traceEventThrottlerCache->isAboveThreshold(StringRef((uint8_t*)type, strlen(type)))) { + enabled = false; + TraceEvent(SevWarnAlways, std::string(TRACE_EVENT_THROTTLE_STARTING_TYPE).append(type).c_str()).suppressFor(5); + } + else { + traceEventThrottlerCache->addAndExpire(StringRef((uint8_t*)type, strlen(type)), 1, now() + FLOW_KNOBS->TRACE_EVENT_THROTTLER_SAMPLE_EXPIRY); + } + } + + if(enabled) { + tmpEventMetric = new DynamicEventMetric(MetricNameRef()); + + double time; + if(g_trace_clock == TRACE_CLOCK_NOW) { + if(!g_network) { + static double preNetworkTime = timer_monotonic(); + time = preNetworkTime; + } + else { + time = now(); + } + } + else { + time = timer(); + } + + detail("Severity", severity); + detailf("Time", "%.6f", time); + detail("Type", type); + if(g_network && g_network->isSimulated()) { + NetworkAddress local = g_network->getLocalAddress(); + detailf("Machine", "%d.%d.%d.%d:%d", (local.ip>>24)&0xff, (local.ip>>16)&0xff, (local.ip>>8)&0xff, local.ip&0xff, local.port); + } + } else { + tmpEventMetric = nullptr; + } return enabled; } @@ -734,108 +687,99 @@ TraceEvent& TraceEvent::error(class Error const& error, bool includeCancelled) { return *this; } -TraceEvent& TraceEvent::detail( const char* key, const char* value ) { - validateFieldName(key, type); +TraceEvent& TraceEvent::detailImpl( std::string&& key, std::string&& value, bool writeEventMetricField) { if (enabled) { - if( strlen( value ) > 495 ) { - char replacement[500]; - strncpy( replacement, value, 495 ); - strcpy( &replacement[495], "\\..." ); - value = replacement; + if( value.size() > 495 ) { + value = value.substr(0, 495) + "..."; + } + + if(writeEventMetricField) { + tmpEventMetric->setField(key.c_str(), Standalone(StringRef(value))); + } + + fields.addField(std::move(key), std::move(value)); + + if(fields.sizeBytes() > FLOW_KNOBS->TRACE_EVENT_MAX_SIZE) { + TraceEvent(SevError, "TraceEventOverflow").detail("TraceFirstBytes", fields.toString().substr(300)); + enabled = false; } - writef( " %s=\"", key ); - writeEscaped( value ); - writef( "\"" ); - tmpEventMetric->setField(key, Standalone(StringRef((const uint8_t *)value, strlen(value)))); } return *this; } -TraceEvent& TraceEvent::detail( const char* key, const std::string& value ) { - return detail( key, value.c_str() ); + +TraceEvent& TraceEvent::detail( std::string key, std::string value ) { + return detailImpl(std::move(key), std::move(value)); } -TraceEvent& TraceEvent::detail( const char* key, double value ) { +TraceEvent& TraceEvent::detail( std::string key, double value ) { if(enabled) - tmpEventMetric->setField(key, value); - return _detailf( key, "%g", value ); + tmpEventMetric->setField(key.c_str(), value); + return detailfNoMetric( std::move(key), "%g", value ); } -TraceEvent& TraceEvent::detail( const char* key, int value ) { +TraceEvent& TraceEvent::detail( std::string key, int value ) { if(enabled) - tmpEventMetric->setField(key, (int64_t)value); - return _detailf( key, "%d", value ); + tmpEventMetric->setField(key.c_str(), (int64_t)value); + return detailfNoMetric( std::move(key), "%d", value ); } -TraceEvent& TraceEvent::detail( const char* key, unsigned value ) { +TraceEvent& TraceEvent::detail( std::string key, unsigned value ) { if(enabled) - tmpEventMetric->setField(key, (int64_t)value); - return _detailf( key, "%u", value ); + tmpEventMetric->setField(key.c_str(), (int64_t)value); + return detailfNoMetric( std::move(key), "%u", value ); } -TraceEvent& TraceEvent::detail( const char* key, long int value ) { +TraceEvent& TraceEvent::detail( std::string key, long int value ) { if(enabled) - tmpEventMetric->setField(key, (int64_t)value); - return _detailf( key, "%ld", value ); + tmpEventMetric->setField(key.c_str(), (int64_t)value); + return detailfNoMetric( std::move(key), "%ld", value ); } -TraceEvent& TraceEvent::detail( const char* key, long unsigned int value ) { +TraceEvent& TraceEvent::detail( std::string key, long unsigned int value ) { if(enabled) - tmpEventMetric->setField(key, (int64_t)value); - return _detailf( key, "%lu", value ); + tmpEventMetric->setField(key.c_str(), (int64_t)value); + return detailfNoMetric( std::move(key), "%lu", value ); } -TraceEvent& TraceEvent::detail( const char* key, long long int value ) { +TraceEvent& TraceEvent::detail( std::string key, long long int value ) { if(enabled) - tmpEventMetric->setField(key, (int64_t)value); - return _detailf( key, "%lld", value ); + tmpEventMetric->setField(key.c_str(), (int64_t)value); + return detailfNoMetric( std::move(key), "%lld", value ); } -TraceEvent& TraceEvent::detail( const char* key, long long unsigned int value ) { +TraceEvent& TraceEvent::detail( std::string key, long long unsigned int value ) { if(enabled) - tmpEventMetric->setField(key, (int64_t)value); - return _detailf( key, "%llu", value ); + tmpEventMetric->setField(key.c_str(), (int64_t)value); + return detailfNoMetric( std::move(key), "%llu", value ); } -TraceEvent& TraceEvent::detail( const char* key, NetworkAddress const& value ) { - return detail( key, value.toString() ); +TraceEvent& TraceEvent::detail( std::string key, const NetworkAddress& value ) { + return detailImpl( std::move(key), value.toString() ); } -TraceEvent& TraceEvent::detail( const char* key, UID const& value ) { - return detailf( key, "%016llx", value.first() ); // SOMEDAY: Log entire value? We also do this explicitly in some "lists" in various individual TraceEvent calls +TraceEvent& TraceEvent::detail( std::string key, const UID& value ) { + return detailf( std::move(key), "%016llx", value.first() ); // SOMEDAY: Log entire value? We also do this explicitly in some "lists" in various individual TraceEvent calls } -TraceEvent& TraceEvent::detailext(const char* key, StringRef const& value) { - return detail(key, value.printable()); +TraceEvent& TraceEvent::detailext( std::string key, StringRef const& value ) { + return detailImpl(std::move(key), value.printable()); } -TraceEvent& TraceEvent::detailext(const char* key, Optional> const& value) { - return detail(key, (value.present()) ? value.get().printable() : "[not set]"); +TraceEvent& TraceEvent::detailext( std::string key, const Optional>& value ) { + return detailImpl(std::move(key), (value.present()) ? value.get().printable() : "[not set]"); } -TraceEvent& TraceEvent::detailf( const char* key, const char* valueFormat, ... ) { +TraceEvent& TraceEvent::detailf( std::string key, const char* valueFormat, ... ) { if (enabled) { va_list args; va_start(args, valueFormat); - detailfv( key, valueFormat, args, true); // Write this detail to eventMetric + std::string value; + int result = vsformat(value, valueFormat, args); va_end(args); + + ASSERT(result >= 0); + detailImpl(std::move(key), std::move(value)); } return *this; } -TraceEvent& TraceEvent::_detailf( const char* key, const char* valueFormat, ... ) { +TraceEvent& TraceEvent::detailfNoMetric( std::string&& key, const char* valueFormat, ... ) { if (enabled) { va_list args; va_start(args, valueFormat); - detailfv( key, valueFormat, args, false); // Do NOT write this detail to the event metric, caller of _detailf should do that itself with the appropriate value type + std::string value; + int result = vsformat(value, valueFormat, args); va_end(args); - } - return *this; -} -TraceEvent& TraceEvent::detailfv( const char* key, const char* valueFormat, va_list args, bool writeEventMetricField ) { - validateFieldName(key, type); - if (enabled) { - writef( " %s=\"", key ); - va_list args2; - va_copy(args2, args); - writeEscapedfv( valueFormat, args ); - writef( "\"" ); - if(writeEventMetricField) - { - // TODO: It would be better to make use of the formatted string created in writeEscapedfv above - char temp[ 1024 ]; - size_t n = vsnprintf(temp, sizeof(temp)-1, valueFormat, args2); - if(n > sizeof(temp) - 1) - n = sizeof(temp) - 1; - temp[n] = 0; - tmpEventMetric->setField(key, Standalone(StringRef((uint8_t *)temp, n))); - } + + ASSERT(result >= 0); + detailImpl(std::move(key), std::move(value), false); // Do NOT write this detail to the event metric, caller of detailfNoMetric should do that itself with the appropriate value type } return *this; } @@ -895,50 +839,24 @@ unsigned long TraceEvent::CountEventsLoggedAt(Severity sev) { return TraceEvent::eventCounts[sev/10]; } -TraceEvent& TraceEvent::backtrace(std::string prefix) { +TraceEvent& TraceEvent::backtrace(const std::string& prefix) { if (this->severity == SevError) return *this; // We'll backtrace this later in ~TraceEvent - return detail((prefix + "Backtrace").c_str(), platform::get_backtrace()); + return detail(prefix + "Backtrace", platform::get_backtrace()); } TraceEvent::~TraceEvent() { try { if (enabled) { - // TRACE_EVENT_THROTTLER - if (!g_network->isSimulated() && severity > SevDebug && isNetworkThread()) { - if (traceEventThrottlerCache->isAboveThreshold(StringRef((uint8_t *)type, strlen(type)))) { - TraceEvent(SevWarnAlways, std::string(TRACE_EVENT_THROTTLE_STARTING_TYPE).append(type).c_str()).suppressFor(5); - // Throttle Msg - delete tmpEventMetric; - return; - } - else { - traceEventThrottlerCache->addAndExpire(StringRef((uint8_t *)type, strlen(type)), 1, now() + FLOW_KNOBS->TRACE_EVENT_THROTLLER_SAMPLE_EXPIRY); - } - } // End of Throttler - - _detailf("LogGroup", "%.*s", g_traceLog.logGroup.size(), g_traceLog.logGroup.data()); - if (!trackingKey.empty()) { - if(!isNetworkThread()) { - TraceEvent(SevError, "TrackLatestFromNonNetworkThread"); - detail("__InvalidTrackLatest__", ""); // Choosing a detail name that is unlikely to collide with other names - } - else { - latestEventCache.set( trackingKey, std::string(buffer, length) + " TrackLatestType=\"Rolled\"/>\r\n" ); - } - - detail("TrackLatestType", "Original"); - } - if (this->severity == SevError) { severity = SevInfo; backtrace(); severity = SevError; } - if (g_traceLog.isOpen()) { - writef("/>\r\n"); - g_traceLog.write( buffer, length ); - TraceEvent::eventCounts[severity/10]++; + TraceEvent::eventCounts[severity/10]++; + g_traceLog.writeEvent( fields, trackingKey, severity > SevWarnAlways ); + + if (g_traceLog.isOpen()) { // Log Metrics if(g_traceLog.logTraceEventMetrics && isNetworkThread()) { // Get the persistent Event Metric representing this trace event and push the fields (details) accumulated in *this to it and then log() it. @@ -950,74 +868,13 @@ TraceEvent::~TraceEvent() { g_traceLog.log(severity, type, id, event_ts); } } - if (severity > SevWarnAlways) { - latestEventCache.setLatestError( std::string(buffer, length) + " LatestError=\"1\"/>\r\n" ); - } } } catch( Error &e ) { TraceEvent(SevError, "TraceEventDestructorError").error(e,true); - delete tmpEventMetric; - ASSERT_ABORT(false); } delete tmpEventMetric; } -void TraceEvent::write( int length, const void* bytes ) { - if (!(this->length + length <= sizeof(buffer))) { - buffer[300] = 0; - TraceEvent(SevError, "TraceEventOverflow").detail("TraceFirstBytes", buffer); - enabled = false; - } else { - memcpy( buffer + this->length, bytes, length ); - this->length += length; - } -} - -void TraceEvent::writef( const char* format, ... ) { - va_list args; - va_start( args, format ); - int size = sizeof(buffer)-1-length; - int i = size<0 ? -1 : vsnprintf( buffer + length, size, format, args ); - if( i < 0 || i >= size ) { // first block catches truncations on windows, second on linux - buffer[300] = 0; - TraceEvent(SevError, "TraceEventOverflow").detail("TraceFirstBytes", buffer); - enabled = false; - } else { - length += i; - } - va_end(args); -} - -void TraceEvent::writeEscaped( const char* data ) { - while (*data) { - if (*data == '&') { - write( 5, "&" ); - data++; - } else if (*data == '"') { - write( 6, """ ); - data++; - } else if (*data == '<') { - write( 4, "<" ); - data++; - } else if (*data == '>') { - write( 4, ">" ); - data++; - } else { - const char* e = data; - while (*e && *e != '"' && *e != '&' && *e != '<' && *e != '>') e++; - write( e-data, data ); - data = e; - } - } -} - -void TraceEvent::writeEscapedfv( const char* format, va_list args ) { - char temp[ 1024 ]; - vsnprintf(temp, sizeof(temp)-1, format, args); - temp[sizeof(temp)-1] = 0; - writeEscaped( temp ); -} - thread_local bool TraceEvent::networkThread = false; void TraceEvent::setNetworkThread() { @@ -1060,30 +917,31 @@ void TraceBatch::addBuggify( int activated, int line, std::string file ) { void TraceBatch::dump() { if (!g_traceLog.isOpen()) return; - NetworkAddress local = g_network->getLocalAddress(); + std::string machine; + if(g_network->isSimulated()) { + NetworkAddress local = g_network->getLocalAddress(); + machine = format("%d.%d.%d.%d:%d", (local.ip>>24)&0xff,(local.ip>>16)&0xff,(local.ip>>8)&0xff,local.ip&0xff,local.port); + } for(int i = 0; i < attachBatch.size(); i++) { - char buffer[256]; - int length = sprintf(buffer, "\r\n", - (int)SevInfo, attachBatch[i].time, attachBatch[i].name, (local.ip>>24)&0xff,(local.ip>>16)&0xff,(local.ip>>8)&0xff,local.ip&0xff,local.port, (int)g_traceLog.logGroup.size(), g_traceLog.logGroup.data(), - attachBatch[i].id, attachBatch[i].to); - g_traceLog.write( buffer, length ); + if(g_network->isSimulated()) { + attachBatch[i].fields.addField("Machine", machine); + } + g_traceLog.writeEvent(attachBatch[i].fields, "", false); } for(int i = 0; i < eventBatch.size(); i++) { - char buffer[256]; - int length = sprintf(buffer, "\r\n", - (int)SevInfo, eventBatch[i].time, eventBatch[i].name, (local.ip>>24)&0xff,(local.ip>>16)&0xff,(local.ip>>8)&0xff,local.ip&0xff,local.port, (int)g_traceLog.logGroup.size(), g_traceLog.logGroup.data(), - eventBatch[i].id, eventBatch[i].location ); - g_traceLog.write( buffer, length ); + if(g_network->isSimulated()) { + eventBatch[i].fields.addField("Machine", machine); + } + g_traceLog.writeEvent(eventBatch[i].fields, "", false); } for(int i = 0; i < buggifyBatch.size(); i++) { - char buffer[256]; - int length = sprintf( buffer, "\r\n", - (int)SevInfo, buggifyBatch[i].time, (local.ip>>24)&0xff,(local.ip>>16)&0xff,(local.ip>>8)&0xff,local.ip&0xff,local.port, (int)g_traceLog.logGroup.size(), g_traceLog.logGroup.data(), - buggifyBatch[i].activated, buggifyBatch[i].file.c_str(), buggifyBatch[i].line ); - g_traceLog.write( buffer, length ); + if(g_network->isSimulated()) { + buggifyBatch[i].fields.addField("Machine", machine); + } + g_traceLog.writeEvent(buggifyBatch[i].fields, "", false); } g_traceLog.flush(); @@ -1091,3 +949,127 @@ void TraceBatch::dump() { attachBatch.clear(); buggifyBatch.clear(); } + +TraceBatch::EventInfo::EventInfo(double time, const char *name, uint64_t id, const char *location) { + fields.addField("Severity", format("%d", (int)SevInfo)); + fields.addField("Time", format("%.6f", time)); + fields.addField("Type", name); + fields.addField("ID", format("%016" PRIx64, id)); + fields.addField("Location", location); +} + +TraceBatch::AttachInfo::AttachInfo(double time, const char *name, uint64_t id, uint64_t to) { + fields.addField("Severity", format("%d", (int)SevInfo)); + fields.addField("Time", format("%.6f", time)); + fields.addField("Type", name); + fields.addField("ID", format("%016" PRIx64, id)); + fields.addField("To", format("%016" PRIx64, to)); +} + +TraceBatch::BuggifyInfo::BuggifyInfo(double time, int activated, int line, std::string file) { + fields.addField("Severity", format("%d", (int)SevInfo)); + fields.addField("Time", format("%.6f", time)); + fields.addField("Type", "BuggifySection"); + fields.addField("Activated", format("%d", activated)); + fields.addField("File", std::move(file)); + fields.addField("Line", format("%d", line)); +} + +TraceEventFields::TraceEventFields() : bytes(0) {} + +void TraceEventFields::addField(const std::string& key, const std::string& value) { + bytes += key.size() + value.size(); + fields.push_back(std::make_pair(key, value)); +} + +void TraceEventFields::addField(std::string&& key, std::string&& value) { + bytes += key.size() + value.size(); + fields.push_back(std::make_pair(std::move(key), std::move(value))); +} + +size_t TraceEventFields::size() const { + return fields.size(); +} + +size_t TraceEventFields::sizeBytes() const { + return bytes; +} + +TraceEventFields::FieldIterator TraceEventFields::begin() const { + return fields.cbegin(); +} + +TraceEventFields::FieldIterator TraceEventFields::end() const { + return fields.cend(); +} + +const TraceEventFields::Field &TraceEventFields::operator[] (int index) const { + ASSERT(index >= 0 && index < size()); + return fields.at(index); +} + +bool TraceEventFields::tryGetValue(std::string key, std::string &outValue) const { + for(auto itr = begin(); itr != end(); ++itr) { + if(itr->first == key) { + outValue = itr->second; + return true; + } + } + + return false; +} + +std::string TraceEventFields::getValue(std::string key) const { + std::string value; + if(tryGetValue(key, value)) { + return value; + } + else { + throw attribute_not_found(); + } +} + +std::string TraceEventFields::toString() const { + std::string str; + bool first = true; + for(auto itr = begin(); itr != end(); ++itr) { + if(!first) { + str += ", "; + } + first = false; + + str += format("\"%s\"=\"%s\"", itr->first.c_str(), itr->second.c_str()); + } + + return str; +} + +bool validateField(const char *key, bool allowUnderscores) { + if((key[0] < 'A' || key[0] > 'Z') && key[0] != '_') { + return false; + } + + const char* underscore = strchr(key, '_'); + while(underscore) { + if(!allowUnderscores || ((underscore[1] < 'A' || underscore[1] > 'Z') && key[0] != '_' && key[0] != '\0')) { + return false; + } + + underscore = strchr(&underscore[1], '_'); + } + + return true; +} + +void TraceEventFields::validateFormat() const { + if(g_network && g_network->isSimulated()) { + for(Field field : fields) { + if(!validateField(field.first.c_str(), false)) { + fprintf(stderr, "Trace event detail name `%s' is invalid in:\n\t%s\n", field.first.c_str(), toString().c_str()); + } + if(field.first == "Type" && !validateField(field.second.c_str(), true)) { + fprintf(stderr, "Trace event detail Type `%s' is invalid\n", field.second.c_str()); + } + } + } +} diff --git a/flow/Trace.h b/flow/Trace.h index 9ab0a63360..c055ccaba1 100644 --- a/flow/Trace.h +++ b/flow/Trace.h @@ -52,6 +52,55 @@ enum Severity { SevMax=1000000 }; +class TraceEventFields { +public: + typedef std::pair Field; + typedef std::vector FieldContainer; + typedef FieldContainer::const_iterator FieldIterator; + + TraceEventFields(); + + size_t size() const; + size_t sizeBytes() const; + FieldIterator begin() const; + FieldIterator end() const; + + void addField(const std::string& key, const std::string& value); + void addField(std::string&& key, std::string&& value); + + const Field &operator[] (int index) const; + bool tryGetValue(std::string key, std::string &outValue) const; + std::string getValue(std::string key) const; + + std::string toString() const; + void validateFormat() const; + +private: + FieldContainer fields; + size_t bytes; +}; + +template +inline void load( Archive& ar, TraceEventFields& value ) { + uint32_t count; + ar >> count; + + std::string k; + std::string v; + for(uint32_t i = 0; i < count; ++i) { + ar >> k >> v; + value.addField(k, v); + } +} +template +inline void save( Archive& ar, const TraceEventFields& value ) { + ar << (uint32_t)value.size(); + + for(auto itr : value) { + ar << itr.first << itr.second; + } +} + class TraceBatch { public: void addEvent( const char *name, uint64_t id, const char *location ); @@ -61,30 +110,18 @@ public: private: struct EventInfo { - double time; - const char *name; - uint64_t id; - const char *location; - - EventInfo(double time, const char *name, uint64_t id, const char *location) : time(time), name(name), id(id), location(location) {} + TraceEventFields fields; + EventInfo(double time, const char *name, uint64_t id, const char *location); }; struct AttachInfo { - double time; - const char *name; - uint64_t id; - uint64_t to; - - AttachInfo(double time, const char *name, uint64_t id, uint64_t to) : time(time), name(name), id(id), to(to) {} + TraceEventFields fields; + AttachInfo(double time, const char *name, uint64_t id, uint64_t to); }; struct BuggifyInfo { - double time; - int activated; - int line; - std::string file; - - BuggifyInfo(double time, int activated, int line, std::string file) : time(time), activated(activated), line(line), file(file) {} + TraceEventFields fields; + BuggifyInfo(double time, int activated, int line, std::string file); }; std::vector eventBatch; @@ -97,7 +134,6 @@ class StringRef; template class Standalone; template class Optional; -#if 1 struct TraceEvent { TraceEvent( const char* type, UID id = UID() ); // Assumes SevInfo severity TraceEvent( Severity, const char* type, UID id = UID() ); @@ -112,29 +148,28 @@ struct TraceEvent { static void setNetworkThread(); static bool isNetworkThread(); - TraceEvent& error(class Error const& e, bool includeCancelled=false); + TraceEvent& error(const class Error& e, bool includeCancelled=false); - TraceEvent& detail( const char* key, const char* value ); - TraceEvent& detail( const char* key, const std::string& value ); - TraceEvent& detail( const char* key, double value ); - TraceEvent& detail( const char* key, long int value ); - TraceEvent& detail( const char* key, long unsigned int value ); - TraceEvent& detail( const char* key, long long int value ); - TraceEvent& detail( const char* key, long long unsigned int value ); - TraceEvent& detail( const char* key, int value ); - TraceEvent& detail( const char* key, unsigned value ); - TraceEvent& detail( const char* key, struct NetworkAddress const& value ); - TraceEvent& detailf( const char* key, const char* valueFormat, ... ); - TraceEvent& detailext(const char* key, StringRef const& value); - TraceEvent& detailext(const char* key, Optional> const& value); + TraceEvent& detail( std::string key, std::string value ); + TraceEvent& detail( std::string key, double value ); + TraceEvent& detail( std::string key, long int value ); + TraceEvent& detail( std::string key, long unsigned int value ); + TraceEvent& detail( std::string key, long long int value ); + TraceEvent& detail( std::string key, long long unsigned int value ); + TraceEvent& detail( std::string key, int value ); + TraceEvent& detail( std::string key, unsigned value ); + TraceEvent& detail( std::string key, const struct NetworkAddress& value ); + TraceEvent& detailf( std::string key, const char* valueFormat, ... ); + TraceEvent& detailext( std::string key, const StringRef& value ); + TraceEvent& detailext( std::string key, const Optional>& value ); private: - // Private version of _detailf that does NOT write to the eventMetric. This is to be used by other detail methods + // Private version of detailf that does NOT write to the eventMetric. This is to be used by other detail methods // which can write field metrics of a more appropriate type than string but use detailf() to add to the TraceEvent. - TraceEvent& _detailf( const char* key, const char* valueFormat, ... ); + TraceEvent& detailfNoMetric( std::string&& key, const char* valueFormat, ... ); + TraceEvent& detailImpl( std::string&& key, std::string&& value, bool writeEventMetricField=true ); public: - TraceEvent& detailfv( const char* key, const char* valueFormat, va_list args, bool writeEventMetricField); - TraceEvent& detail( const char* key, UID const& value ); - TraceEvent& backtrace(std::string prefix = ""); + TraceEvent& detail( std::string key, const UID& value ); + TraceEvent& backtrace(const std::string& prefix = ""); TraceEvent& trackLatest( const char* trackingKey ); TraceEvent& sample( double sampleRate, bool logSampleRate=true ); TraceEvent& suppressFor( double duration, bool logSuppressedEventCount=true ); @@ -151,7 +186,7 @@ public: private: bool enabled; std::string trackingKey; - char buffer[4000]; + TraceEventFields fields; int length; Severity severity; const char *type; @@ -162,45 +197,28 @@ private: bool init( Severity, const char* type ); bool init( Severity, struct TraceInterval& ); - - void write( int length, const void* data ); - void writef( const char* format, ... ); - void writeEscaped( const char* data ); - void writeEscapedfv( const char* format, va_list args ); }; -#else -struct TraceEvent { - TraceEvent(const char* type, UID id = UID()) {} - TraceEvent(Severity, const char* type, UID id = UID()) {} - TraceEvent(struct TraceInterval&, UID id = UID()) {} - TraceEvent(const char* type, StringRef& const id); {} // Assumes SevInfo severity - TraceEvent(Severity, const char* type, StringRef& const id); {} - static bool isEnabled(const char* type) { return false; } +struct ITraceLogWriter { + virtual void open() = 0; + virtual void roll() = 0; + virtual void close() = 0; + virtual void write(const std::string&) = 0; + virtual void sync() = 0; - TraceEvent& error(class Error const& e, bool includeCancelled = false) { return *this; } - - TraceEvent& detail(const char* key, const char* value) { return *this; } - TraceEvent& detail(const char* key, const std::string& value) { return *this; } - TraceEvent& detail(const char* key, double value) { return *this; } - TraceEvent& detail(const char* key, long int value) { return *this; } - TraceEvent& detail(const char* key, long unsigned int value) { return *this; } - TraceEvent& detail(const char* key, long long int value) { return *this; } - TraceEvent& detail(const char* key, long long unsigned int value) { return *this; } - TraceEvent& detail(const char* key, int value) { return *this; } - TraceEvent& detail(const char* key, unsigned value) { return *this; } - TraceEvent& detail(const char* key, struct NetworkAddress const& value) { return *this; } - TraceEvent& detailf(const char* key, const char* valueFormat, ...) { return *this; } - TraceEvent& detailfv(const char* key, const char* valueFormat, va_list args) { return *this; } - TraceEvent& detail(const char* key, UID const& value) { return *this; } - TraceEvent& detailext(const char* key, StringRef const& value) { return *this; } - TraceEvent& detailext(const char* key, Optional> const& value); { return *this; } - TraceEvent& backtrace(std::string prefix = "") { return *this; } - TraceEvent& trackLatest(const char* trackingKey) { return *this; } - - TraceEvent& GetLastError() { return *this; } + virtual void addref() = 0; + virtual void delref() = 0; +}; + +struct ITraceLogFormatter { + virtual const char* getExtension() = 0; + virtual const char* getHeader() = 0; // Called when starting a new file + virtual const char* getFooter() = 0; // Called when ending a file + virtual std::string formatEvent(const TraceEventFields&) = 0; // Called for each event + + virtual void addref() = 0; + virtual void delref() = 0; }; -#endif struct TraceInterval { TraceInterval( const char* type ) : count(-1), type(type), severity(SevInfo) {} @@ -216,20 +234,20 @@ struct TraceInterval { struct LatestEventCache { public: - void set( std::string tag, std::string contents ); - std::string get( std::string tag ); - std::vector getAll(); - std::vector getAllUnsafe(); + void set( std::string tag, const TraceEventFields& fields ); + TraceEventFields get( std::string tag ); + std::vector getAll(); + std::vector getAllUnsafe(); void clear( std::string prefix ); void clear(); // Latest error tracking only tracks errors when called from the main thread. Other errors are silently ignored. - void setLatestError( std::string contents ); - std::string getLatestError(); + void setLatestError( const TraceEventFields& contents ); + TraceEventFields getLatestError(); private: - std::map> latest; - std::map latestErrors; + std::map> latest; + std::map latestErrors; }; extern LatestEventCache latestEventCache; diff --git a/flow/XmlTraceLogFormatter.cpp b/flow/XmlTraceLogFormatter.cpp new file mode 100644 index 0000000000..e1a6412a48 --- /dev/null +++ b/flow/XmlTraceLogFormatter.cpp @@ -0,0 +1,95 @@ +/* + * XmlTraceLogFormatter.cpp + * + * This source file is part of the FoundationDB open source project + * + * Copyright 2018 Apple Inc. and the FoundationDB project authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#include "actorcompiler.h" +#include "XmlTraceLogFormatter.h" + +void XmlTraceLogFormatter::addref() { + ReferenceCounted::addref(); +} + +void XmlTraceLogFormatter::delref() { + ReferenceCounted::delref(); +} + +const char* XmlTraceLogFormatter::getExtension() { + return "xml"; +} + +const char* XmlTraceLogFormatter::getHeader() { + return "\r\n\r\n"; +} + +const char* XmlTraceLogFormatter::getFooter() { + return "\r\n"; +} + +void XmlTraceLogFormatter::escape(std::stringstream &ss, std::string source) { + loop { + int index = source.find_first_of(std::string({'&', '"', '<', '>', '\r', '\n', '\0'})); + if(index == source.npos) { + break; + } + + ss << source.substr(0, index); + if(source[index] == '&') { + ss << "&"; + } + else if(source[index] == '"') { + ss << """; + } + else if(source[index] == '<') { + ss << "<"; + } + else if(source[index] == '>') { + ss << ">"; + } + else if(source[index] == '\n' || source[index] == '\r') { + ss << " "; + } + else if(source[index] == '\0') { + ss << " "; + TraceEvent(SevWarnAlways, "StrippedIllegalCharacterFromTraceEvent").detail("Source", StringRef(source).printable()).detail("Character", StringRef(source.substr(index, 1)).printable()); + } + else { + ASSERT(false); + } + + source = source.substr(index+1); + } + + ss << source; +} + +std::string XmlTraceLogFormatter::formatEvent(const TraceEventFields &fields) { + std::stringstream ss; + ss << "\r\n"; + return ss.str(); +} diff --git a/flow/XmlTraceLogFormatter.h b/flow/XmlTraceLogFormatter.h new file mode 100644 index 0000000000..f3ad8be7c0 --- /dev/null +++ b/flow/XmlTraceLogFormatter.h @@ -0,0 +1,43 @@ +/* + * XmlTraceLogFormatter.h + * + * This source file is part of the FoundationDB open source project + * + * Copyright 2018 Apple Inc. and the FoundationDB project authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#ifndef FLOW_XML_TRACE_LOG_FORMATTER_H +#define FLOW_XML_TRACE_LOG_FORMATTER_H +#pragma once + +#include + +#include "FastRef.h" +#include "Trace.h" + +struct XmlTraceLogFormatter : public ITraceLogFormatter, ReferenceCounted { + void addref(); + void delref(); + + const char* getExtension(); + const char* getHeader(); + const char* getFooter(); + + void escape(std::stringstream &ss, std::string source); + std::string formatEvent(const TraceEventFields &fields); +}; + +#endif diff --git a/flow/actorcompiler/ActorParser.cs b/flow/actorcompiler/ActorParser.cs index c0b374c85f..e4b1391bef 100644 --- a/flow/actorcompiler/ActorParser.cs +++ b/flow/actorcompiler/ActorParser.cs @@ -874,13 +874,13 @@ namespace actorcompiler case "\r\n": LineCount++; break; case "\n": LineCount++; break; } - if (tokens[i].Value.StartsWith("/*")) LineCount += tokens[i].Value.Count(c=>c=='\n'); if (BraceDepth < 0) throw new Error(LineCount, "Mismatched braces"); if (ParenDepth < 0) throw new Error(LineCount, "Mismatched parenthesis"); tokens[i].Position = i; tokens[i].SourceLine = LineCount; tokens[i].BraceDepth = BraceDepth; tokens[i].ParenDepth = ParenDepth; + if (tokens[i].Value.StartsWith("/*")) LineCount += tokens[i].Value.Count(c=>c=='\n'); switch (tokens[i].Value) { case "{": BraceDepth++; if (BraceDepth==1) lastBrace = tokens[i]; break; diff --git a/flow/error_definitions.h b/flow/error_definitions.h index 236b3e4411..87b50367e7 100644 --- a/flow/error_definitions.h +++ b/flow/error_definitions.h @@ -118,7 +118,7 @@ ERROR( read_version_already_set, 2010, "Transaction already has a read version s ERROR( version_invalid, 2011, "Version not valid" ) ERROR( range_limits_invalid, 2012, "Range limits not valid" ) ERROR( invalid_database_name, 2013, "Database name must be 'DB'" ) -ERROR( attribute_not_found, 2014, "Attribute not found in string" ) +ERROR( attribute_not_found, 2014, "Attribute not found" ) ERROR( future_not_set, 2015, "Future not ready" ) ERROR( future_not_error, 2016, "Future not an error" ) ERROR( used_during_commit, 2017, "Operation issued while a commit was outstanding" ) diff --git a/flow/flow.cpp b/flow/flow.cpp index 638ec096e1..df2ed38d4f 100644 --- a/flow/flow.cpp +++ b/flow/flow.cpp @@ -93,38 +93,52 @@ Optional parse_with_suffix(std::string toparse, std::string default_un return ret; } -std::string format( const char* form, ... ) { +int vsformat( std::string &outputString, const char* form, va_list args) { char buf[200]; - va_list args; - va_start(args, form); - int size = vsnprintf(buf, sizeof(buf), form, args); - va_end(args); + va_list args2; + va_copy(args2, args); + int size = vsnprintf(buf, sizeof(buf), form, args2); + va_end(args2); if(size >= 0 && size < sizeof(buf)) { - return std::string(buf, size); + outputString = std::string(buf, size); + return size; } #ifdef _WIN32 // Microsoft's non-standard vsnprintf doesn't return a correct size, but just an error, so determine the necessary size - va_start(args, form); - size = _vscprintf(form, args); - va_end(args); + va_copy(args2, args); + size = _vscprintf(form, args2); + va_end(args2); #endif - if (size < 0) throw internal_error(); + if (size < 0) { + return -1; + } TEST(true); //large format result - std::string s; - s.resize(size + 1); - va_start(args, form); - size = vsnprintf(&s[0], s.size(), form, args); - va_end(args); - if (size < 0 || size >= s.size()) throw internal_error(); + outputString.resize(size + 1); + size = vsnprintf(&outputString[0], outputString.size(), form, args); + if (size < 0 || size >= outputString.size()) { + return -1; + } - s.resize(size); - return s; + outputString.resize(size); + return size; +} + +std::string format( const char* form, ... ) { + va_list args; + va_start(args, form); + + std::string str; + int result = vsformat(str, form, args); + va_end(args); + + ASSERT(result >= 0); + return str; } Standalone strinc(StringRef const& str) { diff --git a/flow/flow.h b/flow/flow.h index dde08547ec..99fa0b2f35 100644 --- a/flow/flow.h +++ b/flow/flow.h @@ -68,6 +68,10 @@ bool validationIsEnabled(); extern Optional parse_with_suffix(std::string toparse, std::string default_unit = ""); extern std::string format(const char* form, ...); + +// On success, returns the number of characters written. On failure, returns a negative number. +extern int vsformat(std::string &outputString, const char* form, va_list args); + extern Standalone strinc(StringRef const& str); extern StringRef strinc(StringRef const& str, Arena& arena); extern Standalone addVersionStampAtEnd(StringRef const& str); diff --git a/flow/flow.vcxproj b/flow/flow.vcxproj index 5d1b0d8881..b3c3e252c8 100644 --- a/flow/flow.vcxproj +++ b/flow/flow.vcxproj @@ -1,4 +1,4 @@ - + @@ -18,6 +18,10 @@ + + + + diff --git a/flow/flow.vcxproj.filters b/flow/flow.vcxproj.filters index bce9a8d126..875aab1086 100644 --- a/flow/flow.vcxproj.filters +++ b/flow/flow.vcxproj.filters @@ -36,6 +36,8 @@ + + @@ -72,6 +74,8 @@ + + diff --git a/flow/genericactors.actor.h b/flow/genericactors.actor.h index 9c8dbb87bf..5eb31de045 100644 --- a/flow/genericactors.actor.h +++ b/flow/genericactors.actor.h @@ -132,6 +132,18 @@ Future transformErrors( Future f, Error err ) { } } +ACTOR template +Future transformError( Future f, Error inErr, Error outErr ) { + try { + T t = wait( f ); + return t; + } catch( Error &e ) { + if( e.code() == inErr.code() ) + throw outErr; + throw e; + } +} + // Note that the RequestStream version of forwardPromise doesn't exist, because what to do with errors? ACTOR template diff --git a/recipes/java-recipes/MicroQueue.java b/recipes/java-recipes/MicroQueue.java index a3a805a067..09f8ebe1a4 100644 --- a/recipes/java-recipes/MicroQueue.java +++ b/recipes/java-recipes/MicroQueue.java @@ -44,21 +44,20 @@ public class MicroQueue { // Remove the top element from the queue. public static Object dequeue(TransactionContext tcx){ - final KeyValue item = firstItem(tcx); - if(item == null){ - return null; - } - // Remove from the top of the queue. - tcx.run(new Function(){ + return tcx.run(new Function(){ public Void apply(Transaction tr){ + final KeyValue item = firstItem(tr); + if(item == null){ + return null; + } + tr.clear(item.getKey()); - return null; + // Return the old value. + return Tuple.fromBytes(item.getValue()).get(0); } }); - // Return the old value. - return Tuple.fromBytes(item.getValue()).get(0); } // Add an element to the queue. diff --git a/tests/fast/SidebandWithStatus.txt b/tests/fast/SidebandWithStatus.txt index 94f4616d9f..4160e6d3b8 100644 --- a/tests/fast/SidebandWithStatus.txt +++ b/tests/fast/SidebandWithStatus.txt @@ -5,7 +5,7 @@ testTitle=CloggedCausalConsistencyTest testName=Status testDuration=30.0 - schema={"cluster":{"layers":{"_valid":true,"_error":"some error description"},"processes":{"$map":{"version":"3.0.0","machine_id":"0ccb4e0feddb5583010f6b77d9d10ece","locality":{"$map":"value"},"class_source":{"$enum":["command_line","configure_auto","set_class"]},"class_type":{"$enum":["unset","storage","transaction","resolution","proxy","master","test"]},"roles":[{"query_queue_max":0,"input_bytes":{"hz":0,"counter":0,"roughness":0},"stored_bytes":12341234,"kvstore_used_bytes":12341234,"kvstore_available_bytes":12341234,"kvstore_free_bytes":12341234,"kvstore_total_bytes":12341234,"durable_bytes":{"hz":0,"counter":0,"roughness":0},"queue_disk_used_bytes":12341234,"queue_disk_available_bytes":12341234,"queue_disk_free_bytes":12341234,"queue_disk_total_bytes":12341234,"role":{"$enum":["master","proxy","log","storage","resolver","cluster_controller"]},"data_version":12341234,"data_version_lag":12341234,"id":"eb84471d68c12d1d26f692a50000003f","finished_queries":{"hz":0,"counter":0,"roughness":0}}],"command_line":"-r simulation","memory":{"available_bytes":0,"limit_bytes":0,"unused_allocated_memory":0,"used_bytes":0},"messages":[{"time":12345.12312,"type":"x","name":{"$enum":["file_open_error","incorrect_cluster_file_contents","process_error","io_error","io_timeout","platform_error","storage_server_lagging","(other FDB error messages)"]},"raw_log_message":"","description":"abc"}],"fault_domain":"0ccb4e0fdbdb5583010f6b77d9d10ece","excluded":false,"address":"1.2.3.4:1234","disk":{"free_bytes":3451233456234,"reads":{"hz":0,"counter":0,"sectors":0},"busy":0,"writes":{"hz":0,"counter":0,"sectors":0},"total_bytes":123412341234},"uptime_seconds":1234.2345,"cpu":{"usage_cores":0},"network":{"current_connections":0,"connections_established":{"hz":0},"connections_closed":{"hz":0},"connection_errors":{"hz":0},"megabits_sent":{"hz":0},"megabits_received":{"hz":0}}}},"old_logs":[{"logs":[{"id":"7f8d623d0cb9966e","healthy":true,"address":"1.2.3.4:1234"}],"log_replication_factor":3,"log_write_anti_quorum":0,"log_fault_tolerance":2,"remote_log_replication_factor":3,"remote_log_fault_tolerance":2,"satellite_log_replication_factor":3,"satellite_log_write_anti_quorum":0,"satellite_log_fault_tolerance":2}],"fault_tolerance":{"max_machine_failures_without_losing_availability":0,"max_machine_failures_without_losing_data":0},"qos":{"worst_queue_bytes_log_server":460,"performance_limited_by":{"reason_server_id":"7f8d623d0cb9966e","reason_id":0,"name":{"$enum":["workload","storage_server_write_queue_size","storage_server_write_bandwidth_mvcc","storage_server_readable_behind","log_server_mvcc_write_bandwidth","log_server_write_queue","storage_server_min_free_space","storage_server_min_free_space_ratio","log_server_min_free_space","log_server_min_free_space_ratio"]},"description":"The database is not being saturated by the workload."},"transactions_per_second_limit":0,"released_transactions_per_second":0,"limiting_queue_bytes_storage_server":0,"worst_queue_bytes_storage_server":0,"limiting_version_lag_storage_server":0,"worst_version_lag_storage_server":0},"incompatible_connections":[],"datacenter_version_difference":0,"database_available":true,"database_locked":false,"generation":2,"latency_probe":{"read_seconds":7,"immediate_priority_transaction_start_seconds":0,"batch_priority_transaction_start_seconds":0,"transaction_start_seconds":0,"commit_seconds":0.02},"clients":{"count":1,"supported_versions":[{"client_version":"3.0.0","connected_clients":[{"address":"127.0.0.1:9898","log_group":"default"}],"count":1,"protocol_version":"fdb00a400050001","source_version":"9430e1127b4991cbc5ab2b17f41cfffa5de07e9d"}]},"messages":[{"reasons":[{"description":"Blah."}],"unreachable_processes":[{"address":"1.2.3.4:1234"}],"name":{"$enum":["unreachable_master_worker","unreadable_configuration","full_replication_timeout","client_issues","unreachable_processes","immediate_priority_transaction_start_probe_timeout","batch_priority_transaction_start_probe_timeout","transaction_start_probe_timeout","read_probe_timeout","commit_probe_timeout","storage_servers_error","status_incomplete","layer_status_incomplete","database_availability_timeout"]},"issues":[{"name":{"$enum":["incorrect_cluster_file_contents"]},"description":"Cluster file contents do not match current cluster connection string. Verify cluster file is writable and has not been overwritten externally."}],"description":"abc"}],"recovery_state":{"required_resolvers":1,"required_proxies":1,"name":{"$enum":["reading_coordinated_state","locking_coordinated_state","locking_old_transaction_servers","reading_transaction_system_state","configuration_missing","configuration_never_created","configuration_invalid","recruiting_transaction_servers","initializing_transaction_servers","recovery_transaction","writing_coordinated_state","fully_recovered"]},"required_logs":3,"missing_logs":"7f8d623d0cb9966e","description":"Recovery complete."},"workload":{"operations":{"writes":{"hz":0,"counter":0,"roughness":0},"reads":{"hz":0,"counter":0,"roughness":0}},"bytes":{"written":{"hz":0,"counter":0,"roughness":0},"read":{"hz":0,"counter":0,"roughness":0}},"keys":{"read":{"hz":0,"counter":0,"roughness":0}},"transactions":{"started":{"hz":0,"counter":0,"roughness":0},"conflicted":{"hz":0,"counter":0,"roughness":0},"committed":{"hz":0,"counter":0,"roughness":0}}},"cluster_controller_timestamp":1415650089,"protocol_version":"fdb00a400050001","full_replication":true,"configuration":{"log_anti_quorum":0,"log_replicas":2,"log_replication_policy":"(zoneid^3x1)","redundancy_mode":"single","regions":[{"datacenters":[{"id":"mr","priority":1,"satellite":1}],"satellite_redundancy_mode":"one_satellite_single","satellite_log_replicas":1,"satellite_usable_dcs":1,"satellite_anti_quorum":0,"satellite_log_policy":"(zoneid^3x1)","satellite_logs":2}],"remote_redundancy_mode":"remote_single","remote_log_replicas":3,"remote_logs":5,"usable_regions":1,"storage_replicas":1,"resolvers":1,"storage_replication_policy":"(zoneid^3x1)","logs":2,"storage_engine":{"$enum":["ssd","ssd-1","ssd-2","memory","custom"]},"coordinators_count":1,"excluded_servers":[{"address":"10.0.4.1"}],"auto_proxies":3,"auto_resolvers":1,"auto_logs":3,"proxies":5},"data":{"least_operating_space_bytes_log_server":0,"average_partition_size_bytes":0,"state":{"healthy":true,"min_replicas_remaining":0,"name":{"$enum":["initializing","missing_data","healing","healthy_repartitioning","healthy_removing_server","healthy_rebalancing","healthy"]},"description":""},"least_operating_space_ratio_storage_server":0.1,"max_machine_failures_without_losing_availability":0,"total_disk_used_bytes":0,"total_kv_size_bytes":0,"partitions_count":2,"moving_data":{"total_written_bytes":0,"in_flight_bytes":0,"in_queue_bytes":0},"least_operating_space_bytes_storage_server":0,"max_machine_failures_without_losing_data":0},"machines":{"$map":{"network":{"megabits_sent":{"hz":0},"megabits_received":{"hz":0},"tcp_segments_retransmitted":{"hz":0}},"memory":{"free_bytes":0,"committed_bytes":0,"total_bytes":0},"contributing_workers":4,"datacenter_id":"6344abf1813eb05b","excluded":false,"address":"1.2.3.4","machine_id":"6344abf1813eb05b","locality":{"$map":"value"},"cpu":{"logical_core_utilization":0.4}}}},"client":{"coordinators":{"coordinators":[{"reachable":true,"address":"127.0.0.1:4701"}],"quorum_reachable":true},"database_status":{"available":true,"healthy":true},"messages":[{"name":{"$enum":["inconsistent_cluster_file","unreachable_cluster_controller","no_cluster_controller","status_incomplete_client","status_incomplete_coordinators","status_incomplete_error","status_incomplete_timeout","status_incomplete_cluster","quorum_not_reachable"]},"description":"The cluster file is not up to date."}],"timestamp":1415650089,"cluster_file":{"path":"/etc/foundationdb/fdb.cluster","up_to_date":true}}} + schema={"cluster":{"layers":{"_valid":true,"_error":"some error description"},"datacenter_version_difference":0,"processes":{"$map":{"fault_domain":"0ccb4e0fdbdb5583010f6b77d9d10ece","class_source":{"$enum":["command_line","configure_auto","set_class"]},"class_type":{"$enum":["unset","storage","transaction","resolution","proxy","master","test"]},"roles":[{"query_queue_max":0,"data_lag":{"seconds":5.0,"versions":12341234},"input_bytes":{"hz":0.0,"counter":0,"roughness":0.0},"kvstore_used_bytes":12341234,"stored_bytes":12341234,"kvstore_free_bytes":12341234,"durable_bytes":{"hz":0.0,"counter":0,"roughness":0.0},"id":"eb84471d68c12d1d26f692a50000003f","data_version":12341234,"role":{"$enum":["master","proxy","log","storage","resolver","cluster_controller"]},"queue_disk_available_bytes":12341234,"kvstore_available_bytes":12341234,"queue_disk_total_bytes":12341234,"queue_disk_used_bytes":12341234,"queue_disk_free_bytes":12341234,"kvstore_total_bytes":12341234,"finished_queries":{"hz":0.0,"counter":0,"roughness":0.0}}],"locality":{"$map":"value"},"messages":[{"description":"abc","type":"x","name":{"$enum":["file_open_error","incorrect_cluster_file_contents","process_error","io_error","io_timeout","platform_error","storage_server_lagging","(other FDB error messages)"]},"raw_log_message":"","time":12345.12312}],"address":"1.2.3.4:1234","command_line":"-r simulation","disk":{"free_bytes":3451233456234,"reads":{"hz":0.0,"counter":0,"sectors":0},"busy":0.0,"writes":{"hz":0.0,"counter":0,"sectors":0},"total_bytes":123412341234},"version":"3.0.0","excluded":false,"memory":{"available_bytes":0,"unused_allocated_memory":0,"limit_bytes":0,"used_bytes":0},"machine_id":"0ccb4e0feddb5583010f6b77d9d10ece","uptime_seconds":1234.2345,"cpu":{"usage_cores":0.0},"network":{"megabits_sent":{"hz":0.0},"megabits_received":{"hz":0.0},"connections_closed":{"hz":0.0},"connection_errors":{"hz":0.0},"current_connections":0,"connections_established":{"hz":0.0}}}},"clients":{"count":1,"supported_versions":[{"count":1,"protocol_version":"fdb00a400050001","client_version":"3.0.0","source_version":"9430e1127b4991cbc5ab2b17f41cfffa5de07e9d","connected_clients":[{"log_group":"default","address":"127.0.0.1:9898"}]}]},"qos":{"limiting_version_lag_storage_server":0,"released_transactions_per_second":0,"transactions_per_second_limit":0,"limiting_queue_bytes_storage_server":0,"performance_limited_by":{"reason_server_id":"7f8d623d0cb9966e","description":"The database is not being saturated by the workload.","reason_id":0,"name":{"$enum":["workload","storage_server_write_queue_size","storage_server_write_bandwidth_mvcc","storage_server_readable_behind","log_server_mvcc_write_bandwidth","log_server_write_queue","storage_server_min_free_space","storage_server_min_free_space_ratio","log_server_min_free_space","log_server_min_free_space_ratio"]}},"worst_version_lag_storage_server":0,"worst_queue_bytes_log_server":460,"worst_queue_bytes_storage_server":0},"incompatible_connections":[],"full_replication":true,"database_locked":false,"generation":2,"data":{"least_operating_space_bytes_log_server":0,"average_partition_size_bytes":0,"state":{"healthy":true,"description":"","name":{"$enum":["initializing","missing_data","healing","healthy_repartitioning","healthy_removing_server","healthy_rebalancing","healthy"]},"min_replicas_remaining":0},"least_operating_space_ratio_storage_server":0.1,"max_machine_failures_without_losing_availability":0,"total_disk_used_bytes":0,"total_kv_size_bytes":0,"max_machine_failures_without_losing_data":0,"moving_data":{"in_queue_bytes":0,"total_written_bytes":0,"in_flight_bytes":0},"least_operating_space_bytes_storage_server":0,"partitions_count":2},"fault_tolerance":{"max_machine_failures_without_losing_availability":0,"max_machine_failures_without_losing_data":0},"messages":[{"reasons":[{"description":"Blah."}],"unreachable_processes":[{"address":"1.2.3.4:1234"}],"name":{"$enum":["unreachable_master_worker","unreadable_configuration","full_replication_timeout","client_issues","unreachable_processes","immediate_priority_transaction_start_probe_timeout","batch_priority_transaction_start_probe_timeout","transaction_start_probe_timeout","read_probe_timeout","commit_probe_timeout","storage_servers_error","status_incomplete","layer_status_incomplete","database_availability_timeout"]},"issues":[{"name":{"$enum":["incorrect_cluster_file_contents"]},"description":"Cluster file contents do not match current cluster connection string. Verify cluster file is writable and has not been overwritten externally."}],"description":"abc"}],"database_available":true,"recovery_state":{"required_proxies":1,"name":{"$enum":["reading_coordinated_state","locking_coordinated_state","locking_old_transaction_servers","reading_transaction_system_state","configuration_missing","configuration_never_created","configuration_invalid","recruiting_transaction_servers","initializing_transaction_servers","recovery_transaction","writing_coordinated_state","fully_recovered"]},"missing_logs":"7f8d623d0cb9966e","required_resolvers":1,"required_logs":3,"description":"Recovery complete."},"workload":{"operations":{"writes":{"hz":0.0,"counter":0,"roughness":0.0},"reads":{"hz":0.0,"counter":0,"roughness":0.0}},"keys":{"read":{"hz":0.0,"counter":0,"roughness":0.0}},"bytes":{"read":{"hz":0.0,"counter":0,"roughness":0.0},"written":{"hz":0.0,"counter":0,"roughness":0.0}},"transactions":{"started":{"hz":0.0,"counter":0,"roughness":0.0},"conflicted":{"hz":0.0,"counter":0,"roughness":0.0},"committed":{"hz":0.0,"counter":0,"roughness":0.0}}},"cluster_controller_timestamp":1415650089,"protocol_version":"fdb00a400050001","configuration":{"resolvers":1,"regions":[{"satellite_redundancy_mode":"one_satellite_single","satellite_anti_quorum":0,"satellite_usable_dcs":1,"datacenters":[{"priority":1,"satellite":1,"id":"mr"}],"satellite_log_policy":"(zoneid^3x1)","satellite_log_replicas":1,"satellite_logs":2}],"remote_logs":5,"auto_logs":3,"logs":2,"log_anti_quorum":0,"storage_replicas":1,"log_replicas":2,"remote_redundancy_mode":"remote_single","storage_engine":{"$enum":["ssd","ssd-1","ssd-2","memory","custom"]},"coordinators_count":1,"log_replication_policy":"(zoneid^3x1)","log_routers":10,"storage_replication_policy":"(zoneid^3x1)","remote_log_replicas":3,"excluded_servers":[{"address":"10.0.4.1"}],"auto_proxies":3,"proxies":5,"usable_regions":1,"redundancy_mode":"single","auto_resolvers":1},"latency_probe":{"immediate_priority_transaction_start_seconds":0.0,"transaction_start_seconds":0.0,"batch_priority_transaction_start_seconds":0.0,"read_seconds":7,"commit_seconds":0.02},"machines":{"$map":{"network":{"megabits_sent":{"hz":0.0},"megabits_received":{"hz":0.0},"tcp_segments_retransmitted":{"hz":0.0}},"locality":{"$map":"value"},"memory":{"free_bytes":0,"committed_bytes":0,"total_bytes":0},"contributing_workers":4,"datacenter_id":"6344abf1813eb05b","excluded":false,"address":"1.2.3.4","machine_id":"6344abf1813eb05b","cpu":{"logical_core_utilization":0.4}}},"old_logs":[{"satellite_log_fault_tolerance":2,"logs":[{"healthy":true,"id":"7f8d623d0cb9966e","address":"1.2.3.4:1234"}],"satellite_log_write_anti_quorum":0,"remote_log_fault_tolerance":2,"log_fault_tolerance":2,"log_write_anti_quorum":0,"satellite_log_replication_factor":3,"remote_log_replication_factor":3,"log_replication_factor":3}]},"client":{"coordinators":{"coordinators":[{"reachable":true,"address":"127.0.0.1:4701"}],"quorum_reachable":true},"cluster_file":{"path":"/etc/foundationdb/fdb.cluster","up_to_date":true},"messages":[{"name":{"$enum":["inconsistent_cluster_file","unreachable_cluster_controller","no_cluster_controller","status_incomplete_client","status_incomplete_coordinators","status_incomplete_error","status_incomplete_timeout","status_incomplete_cluster","quorum_not_reachable"]},"description":"The cluster file is not up to date."}],"timestamp":1415650089,"database_status":{"available":true,"healthy":true}}} testName=RandomClogging testDuration=30.0 diff --git a/tests/rare/LargeApiCorrectnessStatus.txt b/tests/rare/LargeApiCorrectnessStatus.txt index 7db5388026..d1994d1e3c 100644 --- a/tests/rare/LargeApiCorrectnessStatus.txt +++ b/tests/rare/LargeApiCorrectnessStatus.txt @@ -24,4 +24,5 @@ testTitle=ApiCorrectnessTest testName=Status testDuration=30.0 - schema={"cluster":{"layers":{"_valid":true,"_error":"some error description"},"processes":{"$map":{"version":"3.0.0","machine_id":"0ccb4e0feddb5583010f6b77d9d10ece","locality":{"$map":"value"},"class_source":{"$enum":["command_line","configure_auto","set_class"]},"class_type":{"$enum":["unset","storage","transaction","resolution","proxy","master","test"]},"roles":[{"query_queue_max":0,"input_bytes":{"hz":0,"counter":0,"roughness":0},"stored_bytes":12341234,"kvstore_used_bytes":12341234,"kvstore_available_bytes":12341234,"kvstore_free_bytes":12341234,"kvstore_total_bytes":12341234,"durable_bytes":{"hz":0,"counter":0,"roughness":0},"queue_disk_used_bytes":12341234,"queue_disk_available_bytes":12341234,"queue_disk_free_bytes":12341234,"queue_disk_total_bytes":12341234,"role":{"$enum":["master","proxy","log","storage","resolver","cluster_controller"]},"data_version":12341234,"data_version_lag":12341234,"id":"eb84471d68c12d1d26f692a50000003f","finished_queries":{"hz":0,"counter":0,"roughness":0}}],"command_line":"-r simulation","memory":{"available_bytes":0,"limit_bytes":0,"unused_allocated_memory":0,"used_bytes":0},"messages":[{"time":12345.12312,"type":"x","name":{"$enum":["file_open_error","incorrect_cluster_file_contents","process_error","io_error","io_timeout","platform_error","storage_server_lagging","(other FDB error messages)"]},"raw_log_message":"","description":"abc"}],"fault_domain":"0ccb4e0fdbdb5583010f6b77d9d10ece","excluded":false,"address":"1.2.3.4:1234","disk":{"free_bytes":3451233456234,"reads":{"hz":0,"counter":0,"sectors":0},"busy":0,"writes":{"hz":0,"counter":0,"sectors":0},"total_bytes":123412341234},"uptime_seconds":1234.2345,"cpu":{"usage_cores":0},"network":{"current_connections":0,"connections_established":{"hz":0},"connections_closed":{"hz":0},"connection_errors":{"hz":0},"megabits_sent":{"hz":0},"megabits_received":{"hz":0}}}},"old_logs":[{"logs":[{"id":"7f8d623d0cb9966e","healthy":true,"address":"1.2.3.4:1234"}],"log_replication_factor":3,"log_write_anti_quorum":0,"log_fault_tolerance":2,"remote_log_replication_factor":3,"remote_log_fault_tolerance":2,"satellite_log_replication_factor":3,"satellite_log_write_anti_quorum":0,"satellite_log_fault_tolerance":2}],"fault_tolerance":{"max_machine_failures_without_losing_availability":0,"max_machine_failures_without_losing_data":0},"qos":{"worst_queue_bytes_log_server":460,"performance_limited_by":{"reason_server_id":"7f8d623d0cb9966e","reason_id":0,"name":{"$enum":["workload","storage_server_write_queue_size","storage_server_write_bandwidth_mvcc","storage_server_readable_behind","log_server_mvcc_write_bandwidth","log_server_write_queue","storage_server_min_free_space","storage_server_min_free_space_ratio","log_server_min_free_space","log_server_min_free_space_ratio"]},"description":"The database is not being saturated by the workload."},"transactions_per_second_limit":0,"released_transactions_per_second":0,"limiting_queue_bytes_storage_server":0,"worst_queue_bytes_storage_server":0,"limiting_version_lag_storage_server":0,"worst_version_lag_storage_server":0},"incompatible_connections":[],"datacenter_version_difference":0,"database_available":true,"database_locked":false,"generation":2,"latency_probe":{"read_seconds":7,"immediate_priority_transaction_start_seconds":0,"batch_priority_transaction_start_seconds":0,"transaction_start_seconds":0,"commit_seconds":0.02},"clients":{"count":1,"supported_versions":[{"client_version":"3.0.0","connected_clients":[{"address":"127.0.0.1:9898","log_group":"default"}],"count":1,"protocol_version":"fdb00a400050001","source_version":"9430e1127b4991cbc5ab2b17f41cfffa5de07e9d"}]},"messages":[{"reasons":[{"description":"Blah."}],"unreachable_processes":[{"address":"1.2.3.4:1234"}],"name":{"$enum":["unreachable_master_worker","unreadable_configuration","full_replication_timeout","client_issues","unreachable_processes","immediate_priority_transaction_start_probe_timeout","batch_priority_transaction_start_probe_timeout","transaction_start_probe_timeout","read_probe_timeout","commit_probe_timeout","storage_servers_error","status_incomplete","layer_status_incomplete","database_availability_timeout"]},"issues":[{"name":{"$enum":["incorrect_cluster_file_contents"]},"description":"Cluster file contents do not match current cluster connection string. Verify cluster file is writable and has not been overwritten externally."}],"description":"abc"}],"recovery_state":{"required_resolvers":1,"required_proxies":1,"name":{"$enum":["reading_coordinated_state","locking_coordinated_state","locking_old_transaction_servers","reading_transaction_system_state","configuration_missing","configuration_never_created","configuration_invalid","recruiting_transaction_servers","initializing_transaction_servers","recovery_transaction","writing_coordinated_state","fully_recovered"]},"required_logs":3,"missing_logs":"7f8d623d0cb9966e","description":"Recovery complete."},"workload":{"operations":{"writes":{"hz":0,"counter":0,"roughness":0},"reads":{"hz":0,"counter":0,"roughness":0}},"bytes":{"written":{"hz":0,"counter":0,"roughness":0},"read":{"hz":0,"counter":0,"roughness":0}},"keys":{"read":{"hz":0,"counter":0,"roughness":0}},"transactions":{"started":{"hz":0,"counter":0,"roughness":0},"conflicted":{"hz":0,"counter":0,"roughness":0},"committed":{"hz":0,"counter":0,"roughness":0}}},"cluster_controller_timestamp":1415650089,"protocol_version":"fdb00a400050001","full_replication":true,"configuration":{"log_anti_quorum":0,"log_replicas":2,"log_replication_policy":"(zoneid^3x1)","redundancy_mode":"single","regions":[{"datacenters":[{"id":"mr","priority":1,"satellite":1}],"satellite_redundancy_mode":"one_satellite_single","satellite_log_replicas":1,"satellite_usable_dcs":1,"satellite_anti_quorum":0,"satellite_log_policy":"(zoneid^3x1)","satellite_logs":2}],"remote_redundancy_mode":"remote_single","remote_log_replicas":3,"remote_logs":5,"usable_regions":1,"storage_replicas":1,"resolvers":1,"storage_replication_policy":"(zoneid^3x1)","logs":2,"storage_engine":{"$enum":["ssd","ssd-1","ssd-2","memory","custom"]},"coordinators_count":1,"excluded_servers":[{"address":"10.0.4.1"}],"auto_proxies":3,"auto_resolvers":1,"auto_logs":3,"proxies":5},"data":{"least_operating_space_bytes_log_server":0,"average_partition_size_bytes":0,"state":{"healthy":true,"min_replicas_remaining":0,"name":{"$enum":["initializing","missing_data","healing","healthy_repartitioning","healthy_removing_server","healthy_rebalancing","healthy"]},"description":""},"least_operating_space_ratio_storage_server":0.1,"max_machine_failures_without_losing_availability":0,"total_disk_used_bytes":0,"total_kv_size_bytes":0,"partitions_count":2,"moving_data":{"total_written_bytes":0,"in_flight_bytes":0,"in_queue_bytes":0},"least_operating_space_bytes_storage_server":0,"max_machine_failures_without_losing_data":0},"machines":{"$map":{"network":{"megabits_sent":{"hz":0},"megabits_received":{"hz":0},"tcp_segments_retransmitted":{"hz":0}},"memory":{"free_bytes":0,"committed_bytes":0,"total_bytes":0},"contributing_workers":4,"datacenter_id":"6344abf1813eb05b","excluded":false,"address":"1.2.3.4","machine_id":"6344abf1813eb05b","locality":{"$map":"value"},"cpu":{"logical_core_utilization":0.4}}}},"client":{"coordinators":{"coordinators":[{"reachable":true,"address":"127.0.0.1:4701"}],"quorum_reachable":true},"database_status":{"available":true,"healthy":true},"messages":[{"name":{"$enum":["inconsistent_cluster_file","unreachable_cluster_controller","no_cluster_controller","status_incomplete_client","status_incomplete_coordinators","status_incomplete_error","status_incomplete_timeout","status_incomplete_cluster","quorum_not_reachable"]},"description":"The cluster file is not up to date."}],"timestamp":1415650089,"cluster_file":{"path":"/etc/foundationdb/fdb.cluster","up_to_date":true}}} + schema={"cluster":{"layers":{"_valid":true,"_error":"some error description"},"datacenter_version_difference":0,"processes":{"$map":{"fault_domain":"0ccb4e0fdbdb5583010f6b77d9d10ece","class_source":{"$enum":["command_line","configure_auto","set_class"]},"class_type":{"$enum":["unset","storage","transaction","resolution","proxy","master","test"]},"roles":[{"query_queue_max":0,"data_lag":{"seconds":5.0,"versions":12341234},"input_bytes":{"hz":0.0,"counter":0,"roughness":0.0},"kvstore_used_bytes":12341234,"stored_bytes":12341234,"kvstore_free_bytes":12341234,"durable_bytes":{"hz":0.0,"counter":0,"roughness":0.0},"id":"eb84471d68c12d1d26f692a50000003f","data_version":12341234,"role":{"$enum":["master","proxy","log","storage","resolver","cluster_controller"]},"queue_disk_available_bytes":12341234,"kvstore_available_bytes":12341234,"queue_disk_total_bytes":12341234,"queue_disk_used_bytes":12341234,"queue_disk_free_bytes":12341234,"kvstore_total_bytes":12341234,"finished_queries":{"hz":0.0,"counter":0,"roughness":0.0}}],"locality":{"$map":"value"},"messages":[{"description":"abc","type":"x","name":{"$enum":["file_open_error","incorrect_cluster_file_contents","process_error","io_error","io_timeout","platform_error","storage_server_lagging","(other FDB error messages)"]},"raw_log_message":"","time":12345.12312}],"address":"1.2.3.4:1234","command_line":"-r simulation","disk":{"free_bytes":3451233456234,"reads":{"hz":0.0,"counter":0,"sectors":0},"busy":0.0,"writes":{"hz":0.0,"counter":0,"sectors":0},"total_bytes":123412341234},"version":"3.0.0","excluded":false,"memory":{"available_bytes":0,"unused_allocated_memory":0,"limit_bytes":0,"used_bytes":0},"machine_id":"0ccb4e0feddb5583010f6b77d9d10ece","uptime_seconds":1234.2345,"cpu":{"usage_cores":0.0},"network":{"megabits_sent":{"hz":0.0},"megabits_received":{"hz":0.0},"connections_closed":{"hz":0.0},"connection_errors":{"hz":0.0},"current_connections":0,"connections_established":{"hz":0.0}}}},"clients":{"count":1,"supported_versions":[{"count":1,"protocol_version":"fdb00a400050001","client_version":"3.0.0","source_version":"9430e1127b4991cbc5ab2b17f41cfffa5de07e9d","connected_clients":[{"log_group":"default","address":"127.0.0.1:9898"}]}]},"qos":{"limiting_version_lag_storage_server":0,"released_transactions_per_second":0,"transactions_per_second_limit":0,"limiting_queue_bytes_storage_server":0,"performance_limited_by":{"reason_server_id":"7f8d623d0cb9966e","description":"The database is not being saturated by the workload.","reason_id":0,"name":{"$enum":["workload","storage_server_write_queue_size","storage_server_write_bandwidth_mvcc","storage_server_readable_behind","log_server_mvcc_write_bandwidth","log_server_write_queue","storage_server_min_free_space","storage_server_min_free_space_ratio","log_server_min_free_space","log_server_min_free_space_ratio"]}},"worst_version_lag_storage_server":0,"worst_queue_bytes_log_server":460,"worst_queue_bytes_storage_server":0},"incompatible_connections":[],"full_replication":true,"database_locked":false,"generation":2,"data":{"least_operating_space_bytes_log_server":0,"average_partition_size_bytes":0,"state":{"healthy":true,"description":"","name":{"$enum":["initializing","missing_data","healing","healthy_repartitioning","healthy_removing_server","healthy_rebalancing","healthy"]},"min_replicas_remaining":0},"least_operating_space_ratio_storage_server":0.1,"max_machine_failures_without_losing_availability":0,"total_disk_used_bytes":0,"total_kv_size_bytes":0,"max_machine_failures_without_losing_data":0,"moving_data":{"in_queue_bytes":0,"total_written_bytes":0,"in_flight_bytes":0},"least_operating_space_bytes_storage_server":0,"partitions_count":2},"fault_tolerance":{"max_machine_failures_without_losing_availability":0,"max_machine_failures_without_losing_data":0},"messages":[{"reasons":[{"description":"Blah."}],"unreachable_processes":[{"address":"1.2.3.4:1234"}],"name":{"$enum":["unreachable_master_worker","unreadable_configuration","full_replication_timeout","client_issues","unreachable_processes","immediate_priority_transaction_start_probe_timeout","batch_priority_transaction_start_probe_timeout","transaction_start_probe_timeout","read_probe_timeout","commit_probe_timeout","storage_servers_error","status_incomplete","layer_status_incomplete","database_availability_timeout"]},"issues":[{"name":{"$enum":["incorrect_cluster_file_contents"]},"description":"Cluster file contents do not match current cluster connection string. Verify cluster file is writable and has not been overwritten externally."}],"description":"abc"}],"database_available":true,"recovery_state":{"required_proxies":1,"name":{"$enum":["reading_coordinated_state","locking_coordinated_state","locking_old_transaction_servers","reading_transaction_system_state","configuration_missing","configuration_never_created","configuration_invalid","recruiting_transaction_servers","initializing_transaction_servers","recovery_transaction","writing_coordinated_state","fully_recovered"]},"missing_logs":"7f8d623d0cb9966e","required_resolvers":1,"required_logs":3,"description":"Recovery complete."},"workload":{"operations":{"writes":{"hz":0.0,"counter":0,"roughness":0.0},"reads":{"hz":0.0,"counter":0,"roughness":0.0}},"keys":{"read":{"hz":0.0,"counter":0,"roughness":0.0}},"bytes":{"read":{"hz":0.0,"counter":0,"roughness":0.0},"written":{"hz":0.0,"counter":0,"roughness":0.0}},"transactions":{"started":{"hz":0.0,"counter":0,"roughness":0.0},"conflicted":{"hz":0.0,"counter":0,"roughness":0.0},"committed":{"hz":0.0,"counter":0,"roughness":0.0}}},"cluster_controller_timestamp":1415650089,"protocol_version":"fdb00a400050001","configuration":{"resolvers":1,"regions":[{"satellite_redundancy_mode":"one_satellite_single","satellite_anti_quorum":0,"satellite_usable_dcs":1,"datacenters":[{"priority":1,"satellite":1,"id":"mr"}],"satellite_log_policy":"(zoneid^3x1)","satellite_log_replicas":1,"satellite_logs":2}],"remote_logs":5,"auto_logs":3,"logs":2,"log_anti_quorum":0,"storage_replicas":1,"log_replicas":2,"remote_redundancy_mode":"remote_single","storage_engine":{"$enum":["ssd","ssd-1","ssd-2","memory","custom"]},"coordinators_count":1,"log_replication_policy":"(zoneid^3x1)","log_routers":10,"storage_replication_policy":"(zoneid^3x1)","remote_log_replicas":3,"excluded_servers":[{"address":"10.0.4.1"}],"auto_proxies":3,"proxies":5,"usable_regions":1,"redundancy_mode":"single","auto_resolvers":1},"latency_probe":{"immediate_priority_transaction_start_seconds":0.0,"transaction_start_seconds":0.0,"batch_priority_transaction_start_seconds":0.0,"read_seconds":7,"commit_seconds":0.02},"machines":{"$map":{"network":{"megabits_sent":{"hz":0.0},"megabits_received":{"hz":0.0},"tcp_segments_retransmitted":{"hz":0.0}},"locality":{"$map":"value"},"memory":{"free_bytes":0,"committed_bytes":0,"total_bytes":0},"contributing_workers":4,"datacenter_id":"6344abf1813eb05b","excluded":false,"address":"1.2.3.4","machine_id":"6344abf1813eb05b","cpu":{"logical_core_utilization":0.4}}},"old_logs":[{"satellite_log_fault_tolerance":2,"logs":[{"healthy":true,"id":"7f8d623d0cb9966e","address":"1.2.3.4:1234"}],"satellite_log_write_anti_quorum":0,"remote_log_fault_tolerance":2,"log_fault_tolerance":2,"log_write_anti_quorum":0,"satellite_log_replication_factor":3,"remote_log_replication_factor":3,"log_replication_factor":3}]},"client":{"coordinators":{"coordinators":[{"reachable":true,"address":"127.0.0.1:4701"}],"quorum_reachable":true},"cluster_file":{"path":"/etc/foundationdb/fdb.cluster","up_to_date":true},"messages":[{"name":{"$enum":["inconsistent_cluster_file","unreachable_cluster_controller","no_cluster_controller","status_incomplete_client","status_incomplete_coordinators","status_incomplete_error","status_incomplete_timeout","status_incomplete_cluster","quorum_not_reachable"]},"description":"The cluster file is not up to date."}],"timestamp":1415650089,"database_status":{"available":true,"healthy":true}}} + diff --git a/tests/slow/DDBalanceAndRemoveStatus.txt b/tests/slow/DDBalanceAndRemoveStatus.txt index 7e07285c3c..49ba100d77 100644 --- a/tests/slow/DDBalanceAndRemoveStatus.txt +++ b/tests/slow/DDBalanceAndRemoveStatus.txt @@ -43,4 +43,5 @@ testTitle=DDBalance_test testName=Status testDuration=30.0 - schema={"cluster":{"layers":{"_valid":true,"_error":"some error description"},"processes":{"$map":{"version":"3.0.0","machine_id":"0ccb4e0feddb5583010f6b77d9d10ece","locality":{"$map":"value"},"class_source":{"$enum":["command_line","configure_auto","set_class"]},"class_type":{"$enum":["unset","storage","transaction","resolution","proxy","master","test"]},"roles":[{"query_queue_max":0,"input_bytes":{"hz":0,"counter":0,"roughness":0},"stored_bytes":12341234,"kvstore_used_bytes":12341234,"kvstore_available_bytes":12341234,"kvstore_free_bytes":12341234,"kvstore_total_bytes":12341234,"durable_bytes":{"hz":0,"counter":0,"roughness":0},"queue_disk_used_bytes":12341234,"queue_disk_available_bytes":12341234,"queue_disk_free_bytes":12341234,"queue_disk_total_bytes":12341234,"role":{"$enum":["master","proxy","log","storage","resolver","cluster_controller"]},"data_version":12341234,"data_version_lag":12341234,"id":"eb84471d68c12d1d26f692a50000003f","finished_queries":{"hz":0,"counter":0,"roughness":0}}],"command_line":"-r simulation","memory":{"available_bytes":0,"limit_bytes":0,"unused_allocated_memory":0,"used_bytes":0},"messages":[{"time":12345.12312,"type":"x","name":{"$enum":["file_open_error","incorrect_cluster_file_contents","process_error","io_error","io_timeout","platform_error","storage_server_lagging","(other FDB error messages)"]},"raw_log_message":"","description":"abc"}],"fault_domain":"0ccb4e0fdbdb5583010f6b77d9d10ece","excluded":false,"address":"1.2.3.4:1234","disk":{"free_bytes":3451233456234,"reads":{"hz":0,"counter":0,"sectors":0},"busy":0,"writes":{"hz":0,"counter":0,"sectors":0},"total_bytes":123412341234},"uptime_seconds":1234.2345,"cpu":{"usage_cores":0},"network":{"current_connections":0,"connections_established":{"hz":0},"connections_closed":{"hz":0},"connection_errors":{"hz":0},"megabits_sent":{"hz":0},"megabits_received":{"hz":0}}}},"old_logs":[{"logs":[{"id":"7f8d623d0cb9966e","healthy":true,"address":"1.2.3.4:1234"}],"log_replication_factor":3,"log_write_anti_quorum":0,"log_fault_tolerance":2,"remote_log_replication_factor":3,"remote_log_fault_tolerance":2,"satellite_log_replication_factor":3,"satellite_log_write_anti_quorum":0,"satellite_log_fault_tolerance":2}],"fault_tolerance":{"max_machine_failures_without_losing_availability":0,"max_machine_failures_without_losing_data":0},"qos":{"worst_queue_bytes_log_server":460,"performance_limited_by":{"reason_server_id":"7f8d623d0cb9966e","reason_id":0,"name":{"$enum":["workload","storage_server_write_queue_size","storage_server_write_bandwidth_mvcc","storage_server_readable_behind","log_server_mvcc_write_bandwidth","log_server_write_queue","storage_server_min_free_space","storage_server_min_free_space_ratio","log_server_min_free_space","log_server_min_free_space_ratio"]},"description":"The database is not being saturated by the workload."},"transactions_per_second_limit":0,"released_transactions_per_second":0,"limiting_queue_bytes_storage_server":0,"worst_queue_bytes_storage_server":0,"limiting_version_lag_storage_server":0,"worst_version_lag_storage_server":0},"incompatible_connections":[],"datacenter_version_difference":0,"database_available":true,"database_locked":false,"generation":2,"latency_probe":{"read_seconds":7,"immediate_priority_transaction_start_seconds":0,"batch_priority_transaction_start_seconds":0,"transaction_start_seconds":0,"commit_seconds":0.02},"clients":{"count":1,"supported_versions":[{"client_version":"3.0.0","connected_clients":[{"address":"127.0.0.1:9898","log_group":"default"}],"count":1,"protocol_version":"fdb00a400050001","source_version":"9430e1127b4991cbc5ab2b17f41cfffa5de07e9d"}]},"messages":[{"reasons":[{"description":"Blah."}],"unreachable_processes":[{"address":"1.2.3.4:1234"}],"name":{"$enum":["unreachable_master_worker","unreadable_configuration","full_replication_timeout","client_issues","unreachable_processes","immediate_priority_transaction_start_probe_timeout","batch_priority_transaction_start_probe_timeout","transaction_start_probe_timeout","read_probe_timeout","commit_probe_timeout","storage_servers_error","status_incomplete","layer_status_incomplete","database_availability_timeout"]},"issues":[{"name":{"$enum":["incorrect_cluster_file_contents"]},"description":"Cluster file contents do not match current cluster connection string. Verify cluster file is writable and has not been overwritten externally."}],"description":"abc"}],"recovery_state":{"required_resolvers":1,"required_proxies":1,"name":{"$enum":["reading_coordinated_state","locking_coordinated_state","locking_old_transaction_servers","reading_transaction_system_state","configuration_missing","configuration_never_created","configuration_invalid","recruiting_transaction_servers","initializing_transaction_servers","recovery_transaction","writing_coordinated_state","fully_recovered"]},"required_logs":3,"missing_logs":"7f8d623d0cb9966e","description":"Recovery complete."},"workload":{"operations":{"writes":{"hz":0,"counter":0,"roughness":0},"reads":{"hz":0,"counter":0,"roughness":0}},"bytes":{"written":{"hz":0,"counter":0,"roughness":0},"read":{"hz":0,"counter":0,"roughness":0}},"keys":{"read":{"hz":0,"counter":0,"roughness":0}},"transactions":{"started":{"hz":0,"counter":0,"roughness":0},"conflicted":{"hz":0,"counter":0,"roughness":0},"committed":{"hz":0,"counter":0,"roughness":0}}},"cluster_controller_timestamp":1415650089,"protocol_version":"fdb00a400050001","full_replication":true,"configuration":{"log_anti_quorum":0,"log_replicas":2,"log_replication_policy":"(zoneid^3x1)","redundancy_mode":"single","regions":[{"datacenters":[{"id":"mr","priority":1,"satellite":1}],"satellite_redundancy_mode":"one_satellite_single","satellite_log_replicas":1,"satellite_usable_dcs":1,"satellite_anti_quorum":0,"satellite_log_policy":"(zoneid^3x1)","satellite_logs":2}],"remote_redundancy_mode":"remote_single","remote_log_replicas":3,"remote_logs":5,"usable_regions":1,"storage_replicas":1,"resolvers":1,"storage_replication_policy":"(zoneid^3x1)","logs":2,"storage_engine":{"$enum":["ssd","ssd-1","ssd-2","memory","custom"]},"coordinators_count":1,"excluded_servers":[{"address":"10.0.4.1"}],"auto_proxies":3,"auto_resolvers":1,"auto_logs":3,"proxies":5},"data":{"least_operating_space_bytes_log_server":0,"average_partition_size_bytes":0,"state":{"healthy":true,"min_replicas_remaining":0,"name":{"$enum":["initializing","missing_data","healing","healthy_repartitioning","healthy_removing_server","healthy_rebalancing","healthy"]},"description":""},"least_operating_space_ratio_storage_server":0.1,"max_machine_failures_without_losing_availability":0,"total_disk_used_bytes":0,"total_kv_size_bytes":0,"partitions_count":2,"moving_data":{"total_written_bytes":0,"in_flight_bytes":0,"in_queue_bytes":0},"least_operating_space_bytes_storage_server":0,"max_machine_failures_without_losing_data":0},"machines":{"$map":{"network":{"megabits_sent":{"hz":0},"megabits_received":{"hz":0},"tcp_segments_retransmitted":{"hz":0}},"memory":{"free_bytes":0,"committed_bytes":0,"total_bytes":0},"contributing_workers":4,"datacenter_id":"6344abf1813eb05b","excluded":false,"address":"1.2.3.4","machine_id":"6344abf1813eb05b","locality":{"$map":"value"},"cpu":{"logical_core_utilization":0.4}}}},"client":{"coordinators":{"coordinators":[{"reachable":true,"address":"127.0.0.1:4701"}],"quorum_reachable":true},"database_status":{"available":true,"healthy":true},"messages":[{"name":{"$enum":["inconsistent_cluster_file","unreachable_cluster_controller","no_cluster_controller","status_incomplete_client","status_incomplete_coordinators","status_incomplete_error","status_incomplete_timeout","status_incomplete_cluster","quorum_not_reachable"]},"description":"The cluster file is not up to date."}],"timestamp":1415650089,"cluster_file":{"path":"/etc/foundationdb/fdb.cluster","up_to_date":true}}} + schema={"cluster":{"layers":{"_valid":true,"_error":"some error description"},"datacenter_version_difference":0,"processes":{"$map":{"fault_domain":"0ccb4e0fdbdb5583010f6b77d9d10ece","class_source":{"$enum":["command_line","configure_auto","set_class"]},"class_type":{"$enum":["unset","storage","transaction","resolution","proxy","master","test"]},"roles":[{"query_queue_max":0,"data_lag":{"seconds":5.0,"versions":12341234},"input_bytes":{"hz":0.0,"counter":0,"roughness":0.0},"kvstore_used_bytes":12341234,"stored_bytes":12341234,"kvstore_free_bytes":12341234,"durable_bytes":{"hz":0.0,"counter":0,"roughness":0.0},"id":"eb84471d68c12d1d26f692a50000003f","data_version":12341234,"role":{"$enum":["master","proxy","log","storage","resolver","cluster_controller"]},"queue_disk_available_bytes":12341234,"kvstore_available_bytes":12341234,"queue_disk_total_bytes":12341234,"queue_disk_used_bytes":12341234,"queue_disk_free_bytes":12341234,"kvstore_total_bytes":12341234,"finished_queries":{"hz":0.0,"counter":0,"roughness":0.0}}],"locality":{"$map":"value"},"messages":[{"description":"abc","type":"x","name":{"$enum":["file_open_error","incorrect_cluster_file_contents","process_error","io_error","io_timeout","platform_error","storage_server_lagging","(other FDB error messages)"]},"raw_log_message":"","time":12345.12312}],"address":"1.2.3.4:1234","command_line":"-r simulation","disk":{"free_bytes":3451233456234,"reads":{"hz":0.0,"counter":0,"sectors":0},"busy":0.0,"writes":{"hz":0.0,"counter":0,"sectors":0},"total_bytes":123412341234},"version":"3.0.0","excluded":false,"memory":{"available_bytes":0,"unused_allocated_memory":0,"limit_bytes":0,"used_bytes":0},"machine_id":"0ccb4e0feddb5583010f6b77d9d10ece","uptime_seconds":1234.2345,"cpu":{"usage_cores":0.0},"network":{"megabits_sent":{"hz":0.0},"megabits_received":{"hz":0.0},"connections_closed":{"hz":0.0},"connection_errors":{"hz":0.0},"current_connections":0,"connections_established":{"hz":0.0}}}},"clients":{"count":1,"supported_versions":[{"count":1,"protocol_version":"fdb00a400050001","client_version":"3.0.0","source_version":"9430e1127b4991cbc5ab2b17f41cfffa5de07e9d","connected_clients":[{"log_group":"default","address":"127.0.0.1:9898"}]}]},"qos":{"limiting_version_lag_storage_server":0,"released_transactions_per_second":0,"transactions_per_second_limit":0,"limiting_queue_bytes_storage_server":0,"performance_limited_by":{"reason_server_id":"7f8d623d0cb9966e","description":"The database is not being saturated by the workload.","reason_id":0,"name":{"$enum":["workload","storage_server_write_queue_size","storage_server_write_bandwidth_mvcc","storage_server_readable_behind","log_server_mvcc_write_bandwidth","log_server_write_queue","storage_server_min_free_space","storage_server_min_free_space_ratio","log_server_min_free_space","log_server_min_free_space_ratio"]}},"worst_version_lag_storage_server":0,"worst_queue_bytes_log_server":460,"worst_queue_bytes_storage_server":0},"incompatible_connections":[],"full_replication":true,"database_locked":false,"generation":2,"data":{"least_operating_space_bytes_log_server":0,"average_partition_size_bytes":0,"state":{"healthy":true,"description":"","name":{"$enum":["initializing","missing_data","healing","healthy_repartitioning","healthy_removing_server","healthy_rebalancing","healthy"]},"min_replicas_remaining":0},"least_operating_space_ratio_storage_server":0.1,"max_machine_failures_without_losing_availability":0,"total_disk_used_bytes":0,"total_kv_size_bytes":0,"max_machine_failures_without_losing_data":0,"moving_data":{"in_queue_bytes":0,"total_written_bytes":0,"in_flight_bytes":0},"least_operating_space_bytes_storage_server":0,"partitions_count":2},"fault_tolerance":{"max_machine_failures_without_losing_availability":0,"max_machine_failures_without_losing_data":0},"messages":[{"reasons":[{"description":"Blah."}],"unreachable_processes":[{"address":"1.2.3.4:1234"}],"name":{"$enum":["unreachable_master_worker","unreadable_configuration","full_replication_timeout","client_issues","unreachable_processes","immediate_priority_transaction_start_probe_timeout","batch_priority_transaction_start_probe_timeout","transaction_start_probe_timeout","read_probe_timeout","commit_probe_timeout","storage_servers_error","status_incomplete","layer_status_incomplete","database_availability_timeout"]},"issues":[{"name":{"$enum":["incorrect_cluster_file_contents"]},"description":"Cluster file contents do not match current cluster connection string. Verify cluster file is writable and has not been overwritten externally."}],"description":"abc"}],"database_available":true,"recovery_state":{"required_proxies":1,"name":{"$enum":["reading_coordinated_state","locking_coordinated_state","locking_old_transaction_servers","reading_transaction_system_state","configuration_missing","configuration_never_created","configuration_invalid","recruiting_transaction_servers","initializing_transaction_servers","recovery_transaction","writing_coordinated_state","fully_recovered"]},"missing_logs":"7f8d623d0cb9966e","required_resolvers":1,"required_logs":3,"description":"Recovery complete."},"workload":{"operations":{"writes":{"hz":0.0,"counter":0,"roughness":0.0},"reads":{"hz":0.0,"counter":0,"roughness":0.0}},"keys":{"read":{"hz":0.0,"counter":0,"roughness":0.0}},"bytes":{"read":{"hz":0.0,"counter":0,"roughness":0.0},"written":{"hz":0.0,"counter":0,"roughness":0.0}},"transactions":{"started":{"hz":0.0,"counter":0,"roughness":0.0},"conflicted":{"hz":0.0,"counter":0,"roughness":0.0},"committed":{"hz":0.0,"counter":0,"roughness":0.0}}},"cluster_controller_timestamp":1415650089,"protocol_version":"fdb00a400050001","configuration":{"resolvers":1,"regions":[{"satellite_redundancy_mode":"one_satellite_single","satellite_anti_quorum":0,"satellite_usable_dcs":1,"datacenters":[{"priority":1,"satellite":1,"id":"mr"}],"satellite_log_policy":"(zoneid^3x1)","satellite_log_replicas":1,"satellite_logs":2}],"remote_logs":5,"auto_logs":3,"logs":2,"log_anti_quorum":0,"storage_replicas":1,"log_replicas":2,"remote_redundancy_mode":"remote_single","storage_engine":{"$enum":["ssd","ssd-1","ssd-2","memory","custom"]},"coordinators_count":1,"log_replication_policy":"(zoneid^3x1)","log_routers":10,"storage_replication_policy":"(zoneid^3x1)","remote_log_replicas":3,"excluded_servers":[{"address":"10.0.4.1"}],"auto_proxies":3,"proxies":5,"usable_regions":1,"redundancy_mode":"single","auto_resolvers":1},"latency_probe":{"immediate_priority_transaction_start_seconds":0.0,"transaction_start_seconds":0.0,"batch_priority_transaction_start_seconds":0.0,"read_seconds":7,"commit_seconds":0.02},"machines":{"$map":{"network":{"megabits_sent":{"hz":0.0},"megabits_received":{"hz":0.0},"tcp_segments_retransmitted":{"hz":0.0}},"locality":{"$map":"value"},"memory":{"free_bytes":0,"committed_bytes":0,"total_bytes":0},"contributing_workers":4,"datacenter_id":"6344abf1813eb05b","excluded":false,"address":"1.2.3.4","machine_id":"6344abf1813eb05b","cpu":{"logical_core_utilization":0.4}}},"old_logs":[{"satellite_log_fault_tolerance":2,"logs":[{"healthy":true,"id":"7f8d623d0cb9966e","address":"1.2.3.4:1234"}],"satellite_log_write_anti_quorum":0,"remote_log_fault_tolerance":2,"log_fault_tolerance":2,"log_write_anti_quorum":0,"satellite_log_replication_factor":3,"remote_log_replication_factor":3,"log_replication_factor":3}]},"client":{"coordinators":{"coordinators":[{"reachable":true,"address":"127.0.0.1:4701"}],"quorum_reachable":true},"cluster_file":{"path":"/etc/foundationdb/fdb.cluster","up_to_date":true},"messages":[{"name":{"$enum":["inconsistent_cluster_file","unreachable_cluster_controller","no_cluster_controller","status_incomplete_client","status_incomplete_coordinators","status_incomplete_error","status_incomplete_timeout","status_incomplete_cluster","quorum_not_reachable"]},"description":"The cluster file is not up to date."}],"timestamp":1415650089,"database_status":{"available":true,"healthy":true}}} +