From f9c8840fd697551ba0af5f5d1ed986120b35d469 Mon Sep 17 00:00:00 2001 From: Steve Atherton Date: Wed, 26 Apr 2023 01:08:16 -0700 Subject: [PATCH 1/2] Initial checkin of RandomKeyValueUtils.h/cpp and a unit test. --- fdbclient/RandomKeyValueUtils.cpp | 81 +++++ .../include/fdbclient/RandomKeyValueUtils.h | 324 ++++++++++++++++++ fdbserver/workloads/UnitTests.actor.cpp | 2 + 3 files changed, 407 insertions(+) create mode 100644 fdbclient/RandomKeyValueUtils.cpp create mode 100644 fdbclient/include/fdbclient/RandomKeyValueUtils.h diff --git a/fdbclient/RandomKeyValueUtils.cpp b/fdbclient/RandomKeyValueUtils.cpp new file mode 100644 index 0000000000..410bee174a --- /dev/null +++ b/fdbclient/RandomKeyValueUtils.cpp @@ -0,0 +1,81 @@ +/* + * RandomKeyValueUtils.cpp + * + * This source file is part of the FoundationDB open source project + * + * Copyright 2013-2023 Apple Inc. and the FoundationDB project authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "fdbclient/RandomKeyValueUtils.h" +#include "flow/UnitTest.h" + +template +void printNextN(T generator, int count = 10) { + fmt::print("Generating from .next() on {}\n", generator.toString()); + for (int i = 0; i < count; ++i) { + fmt::print(" {}\n", generator.next()); + } + fmt::print("\n"); +} + +TEST_CASE("/randomKeyValueUtils/generate") { + + printNextN(RandomIntGenerator(3, 10, false), 5); + printNextN(RandomIntGenerator("3..10"), 5); + printNextN(RandomIntGenerator("a..z"), 5); + // Works in reverse too + printNextN(RandomIntGenerator("10..3"), 5); + // Skewed low + printNextN(RandomIntGenerator("^3..10"), 5); + // Skewed high + printNextN(RandomIntGenerator("^10..3"), 5); + printNextN(RandomIntGenerator("5"), 5); + + printNextN(RandomStringGenerator(RandomIntGenerator(3, 10, false), RandomIntGenerator('d', 'g', false)), 10); + printNextN(RandomStringGenerator("3..10", "d..g"), 10); + printNextN(RandomStringGenerator("3..10/d..g"), 10); + printNextN(RandomStringGenerator("5/a..c"), 5); + printNextN(RandomStringGenerator("5/a..a"), 5); + + printNextN(RandomKeySetGenerator("0..5", "3..10/d..g"), 20); + // Index generator will use a min of 0 so this is the same as 0:5 + printNextN(RandomKeySetGenerator("5", "3..10/d..g"), 20); + + printNextN(RandomKeyTupleSetGenerator( + RandomIntGenerator(10), + RandomKeyTupleGenerator( + { RandomKeySetGenerator( + RandomIntGenerator(5), + RandomStringGenerator(RandomIntGenerator(5), RandomIntGenerator('a', 'c', false))), + RandomKeySetGenerator(RandomIntGenerator(5), + RandomStringGenerator(RandomIntGenerator(3, 10, true), + RandomIntGenerator('d', 'f', false))) })), + 10); + + // Same as above in string form + printNextN(RandomKeyTupleSetGenerator("10::5::5/a..c,5::^3..10/d..f"), 10); + + // uniform random selection from 1000 pregenerated key tuples. Tuples have 4 parts + // len 5 chars a-d with 2 choices + // len 10 chars k-t with 10000 choices + // len 5-8 chars z-z with 2 choices + printNextN(RandomKeyTupleSetGenerator("1000::2::5/a..d,10000::10/k..t,2::5..8/z"), 100); + + printNextN(RandomValueGenerator("10..100/r..z"), 20); + + return Void(); +} + +void forceLinkRandomKeyValueUtilsTests() {} diff --git a/fdbclient/include/fdbclient/RandomKeyValueUtils.h b/fdbclient/include/fdbclient/RandomKeyValueUtils.h new file mode 100644 index 0000000000..7824d71283 --- /dev/null +++ b/fdbclient/include/fdbclient/RandomKeyValueUtils.h @@ -0,0 +1,324 @@ +/* + * RandomKeyValueUtils.h + * + * This source file is part of the FoundationDB open source project + * + * Copyright 2013-2023 Apple Inc. and the FoundationDB project authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include +#include + +#include "flow/Arena.h" +#include "flow/Error.h" +#include "flow/IRandom.h" +#include "fdbclient/FDBTypes.h" +// Random unsigned int generator which generates integers between and including first and last +// Distribution can be uniform, skewed small, or skewed large +// String Definition Format: [^]first[..last] +// last is optional and defaults to first +// If ^ is present, the generated numbers skew toward first, otherwise are uniform random +// If either first or last begins with a letter character it will be interpreted as its ASCII byte value. +struct RandomIntGenerator { + enum Skew { LARGE, SMALL, NONE }; + + unsigned int min; + unsigned int max; + unsigned int val; + bool alpha = false; + Skew skew = NONE; + + unsigned int parseInt(StringRef s) { + if (s.size() == 0) { + return 0; + } else if (std::isalpha(s[0])) { + alpha = true; + return (unsigned int)s[0]; + } else { + return atol(s.toString().c_str()); + } + } + + RandomIntGenerator(unsigned int only = 0) : min(only), max(only) {} + RandomIntGenerator(unsigned int first, unsigned int last, bool skewTowardFirst) : min(first), max(last) { + if (first != last && skewTowardFirst) { + skew = (first < last) ? SMALL : LARGE; + } + if (min > max) { + std::swap(min, max); + } + } + RandomIntGenerator(const char* cstr) : RandomIntGenerator(std::string(cstr)) {} + RandomIntGenerator(std::string str) : RandomIntGenerator(StringRef(str)) {} + RandomIntGenerator(StringRef str) { + bool skewTowardFirst = false; + if (!str.empty() && str[0] == '^') { + skewTowardFirst = true; + str = str.substr(1); + } + + StringRef first = str.eat(".."); + StringRef last = str; + if (last.size() == 0) { + last = first; + } + + min = parseInt(first); + max = parseInt(last); + if (skewTowardFirst && min != max) { + skew = (min < max) ? SMALL : LARGE; + } + if (min > max) { + std::swap(min, max); + } + } + + // Generate and return a random number + unsigned int next() { + switch (skew) { + case SMALL: + return val = deterministicRandom()->randomSkewedUInt32(min, max + 1); + case LARGE: + return val = max - deterministicRandom()->randomSkewedUInt32(min, max + 1); + case NONE: + default: + return val = deterministicRandom()->randomInt(min, max + 1); + } + } + // Return the last random number returned by next() + unsigned int last() const { return val; } + + std::string formatLimit(int x) const { + return (alpha && std::isalpha(x)) ? fmt::format("{}", (char)x) : fmt::format("{}", x); + } + + std::string toString() const { + if (min == max) { + return fmt::format("{}", min); + } + if (skew == NONE || skew == SMALL) { + return fmt::format("{}{}..{}", (skew == NONE) ? "" : "^", formatLimit(min), formatLimit(max)); + } + ASSERT(skew == LARGE); + return fmt::format("^{}..{}", formatLimit(max), formatLimit(min)); + } +}; + +// Random string generator +// Generates random strings of a random size from a size int generator and made of random chars +// from a random char int generator +// +// String Definition Format: sizeRange[/byteRange] +// sizeRange and byteRange are RandomIntGenerators +// The default `byteRange` is 0:255 +struct RandomStringGenerator { + RandomStringGenerator() {} + RandomStringGenerator(RandomIntGenerator size, RandomIntGenerator byteset) : size(size), bytes(byteset) {} + RandomStringGenerator(const char* cstr) : RandomStringGenerator(std::string(cstr)) {} + RandomStringGenerator(std::string str) : RandomStringGenerator(StringRef(str)) {} + RandomStringGenerator(StringRef str) { + StringRef sSize = str.eat("/"); + StringRef sBytes = str; + if (sBytes.size() == 0) { + sBytes = "0:255"_sr; + } + size = RandomIntGenerator(sSize.toString()); + bytes = RandomIntGenerator(sBytes); + } + + RandomIntGenerator size; + RandomIntGenerator bytes; + Standalone val; + + Standalone next() { + val = makeString(size.next()); + for (int i = 0; i < val.size(); ++i) { + mutateString(val)[i] = (uint8_t)bytes.next(); + } + return val; + } + + Standalone last() { return val; }; + + std::string toString() const { return fmt::format("{}/{}", size.toString(), bytes.toString()); } +}; + +// Same construction, definition, and usage as RandomStringGenerator but sacrifices randomness +// and uniqueness for performance. +// It uses a large pre-generated string and generates random substrings from it. +struct RandomValueGenerator { + template + RandomValueGenerator(Args&&... args) : strings(std::forward(args)...) { + // Make a similar RandomStringGenerator to generate the noise block from + noise = RandomStringGenerator(RandomIntGenerator(std::max(2e6, strings.size.max)), strings.bytes).next(); + } + + RandomStringGenerator strings; + Standalone noise; + Value val; + + Value next() { + int len = strings.size.next(); + val = Value(noise.substr(deterministicRandom()->randomInt(0, noise.size() - len + 1), len), noise.arena()); + return val; + } + + Value last() const { return val; }; + + std::string toString() const { return fmt::format("{}", strings.toString()); } +}; + +// Base class for randomly generated key sets +// Returns a random or nearby key at some distance from a vector of keys generated at init time. +// Requires a RandomIntGenerator as the index generator for selecting which random next key to return. The given index +// generator should have a min of 0 and if it doesn't its min will be updated to 0. +struct RandomStringSetGeneratorBase { + Arena arena; + std::vector keys; + RandomIntGenerator indexGenerator; + int iVal; + KeyRange rangeVal; + + template + void init(RandomIntGenerator originalIndexGenerator, KeyGen& keyGen) { + indexGenerator = originalIndexGenerator; + indexGenerator.min = 0; + std::set uniqueKeys; + int inserts = 0; + while (uniqueKeys.size() < indexGenerator.max) { + auto k = keyGen.next(); + uniqueKeys.insert(k); + if (++inserts > 3 * indexGenerator.max) { + // StringGenerator cardinality is too low, unable to find enough unique keys. + ASSERT(false); + } + } + // Adjust indexGenerator max down by 1 because indices are 0-based. + --indexGenerator.max; + + for (auto& k : uniqueKeys) { + keys.push_back(KeyRef(arena, k)); + } + iVal = 0; + } + + Key last() const { return Key(keys[iVal], arena); }; + KeyRange lastRange() const { return rangeVal; } + + Key next() { + iVal = indexGenerator.next(); + return last(); + } + + // Next sequential with some jump distance and optional wrap-around which is false + Key next(int distance, bool wrap = false) { + iVal += distance; + if (wrap) { + iVal %= keys.size(); + } else { + iVal = std::clamp(iVal, 0, keys.size() - 1); + } + + return last(); + } + + KeyRange nextRange(int width) { + int begin = indexGenerator.next(); + int end = (begin + width) % keys.size(); + if (begin > end) { + std::swap(begin, end); + } + rangeVal = KeyRange(KeyRangeRef(keys[begin], keys[end]), arena); + return rangeVal; + } + + KeyRange nextRange() { return nextRange(deterministicRandom()->randomSkewedUInt32(0, keys.size())); } +}; + +template +struct RandomStringSetGenerator : public RandomStringSetGeneratorBase { + RandomStringSetGenerator(RandomIntGenerator indexGen, StringGenT stringGen) + : indexGen(indexGen), stringGen(stringGen) { + init(indexGen, stringGen); + } + RandomStringSetGenerator(const char* cstr) : RandomStringSetGenerator(std::string(cstr)) {} + RandomStringSetGenerator(std::string str) : RandomStringSetGenerator(StringRef(str)) {} + RandomStringSetGenerator(StringRef str) { + indexGen = str.eat("::"); + stringGen = str; + init(indexGen, stringGen); + } + + RandomIntGenerator indexGen; + StringGenT stringGen; + + std::string toString() const { return fmt::format("{}::{}", indexGen.toString(), stringGen.toString()); } +}; + +typedef RandomStringSetGenerator RandomKeySetGenerator; + +// Generate random keys which are composed of tuple segments from a list of RandomKeySets +// String Definition Format: RandomKeySet[,RandomKeySet]... +struct RandomKeyTupleGenerator { + RandomKeyTupleGenerator(){}; + RandomKeyTupleGenerator(std::vector tupleParts) : tuples(tupleParts) {} + RandomKeyTupleGenerator(std::string s) : RandomKeyTupleGenerator(StringRef(s)) {} + RandomKeyTupleGenerator(StringRef s) { + while (!s.empty()) { + tuples.push_back(s.eat(",")); + } + } + + std::vector tuples; + Key val; + + Key next() { + int totalBytes = 0; + for (auto& t : tuples) { + totalBytes += t.next().size(); + } + val = makeString(totalBytes); + totalBytes = 0; + + for (auto& t : tuples) { + memcpy(mutateString(val) + totalBytes, t.last().begin(), t.last().size()); + totalBytes += t.last().size(); + } + return val; + } + + Key last() const { return val; }; + + std::string toString() const { + std::string s; + for (auto const& t : tuples) { + if (!s.empty()) { + s += ','; + } + s += t.toString(); + } + return s; + } +}; + +typedef RandomStringSetGenerator RandomKeyTupleSetGenerator; + +struct RandomMutationGenerator { + RandomKeyTupleSetGenerator keys; + RandomValueGenerator valueGen; +}; diff --git a/fdbserver/workloads/UnitTests.actor.cpp b/fdbserver/workloads/UnitTests.actor.cpp index 0a2c92425b..3a82424e11 100644 --- a/fdbserver/workloads/UnitTests.actor.cpp +++ b/fdbserver/workloads/UnitTests.actor.cpp @@ -50,6 +50,7 @@ void forceLinkActorCollectionTests(); void forceLinkDDSketchTests(); void forceLinkCommitProxyTests(); void forceLinkWipedStringTests(); +void forceLinkRandomKeyValueUtilsTests(); struct UnitTestWorkload : TestWorkload { static constexpr auto NAME = "UnitTests"; @@ -115,6 +116,7 @@ struct UnitTestWorkload : TestWorkload { forceLinkActorCollectionTests(); forceLinkDDSketchTests(); forceLinkWipedStringTests(); + forceLinkRandomKeyValueUtilsTests(); } Future setup(Database const& cx) override { From d0cb599c7ab32cb6d54398e57d4eb9a6e40e2973 Mon Sep 17 00:00:00 2001 From: Jay Zhuang Date: Wed, 3 May 2023 16:33:04 -0700 Subject: [PATCH 2/2] Fix a gcc build error ``` RandomKeyValueUtils.cpp:64:106: error: call of overloaded 'RandomKeyTupleGenerator()' is ambiguous ``` --- fdbclient/RandomKeyValueUtils.cpp | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/fdbclient/RandomKeyValueUtils.cpp b/fdbclient/RandomKeyValueUtils.cpp index 410bee174a..22772638ca 100644 --- a/fdbclient/RandomKeyValueUtils.cpp +++ b/fdbclient/RandomKeyValueUtils.cpp @@ -53,16 +53,15 @@ TEST_CASE("/randomKeyValueUtils/generate") { // Index generator will use a min of 0 so this is the same as 0:5 printNextN(RandomKeySetGenerator("5", "3..10/d..g"), 20); - printNextN(RandomKeyTupleSetGenerator( - RandomIntGenerator(10), - RandomKeyTupleGenerator( - { RandomKeySetGenerator( - RandomIntGenerator(5), - RandomStringGenerator(RandomIntGenerator(5), RandomIntGenerator('a', 'c', false))), - RandomKeySetGenerator(RandomIntGenerator(5), - RandomStringGenerator(RandomIntGenerator(3, 10, true), - RandomIntGenerator('d', 'f', false))) })), - 10); + std::vector tupleParts{ + RandomKeySetGenerator(RandomIntGenerator(5), + RandomStringGenerator(RandomIntGenerator(5), RandomIntGenerator('a', 'c', false))), + RandomKeySetGenerator( + RandomIntGenerator(5), + RandomStringGenerator(RandomIntGenerator(3, 10, true), RandomIntGenerator('d', 'f', false))) + }; + + printNextN(RandomKeyTupleSetGenerator(RandomIntGenerator(10), RandomKeyTupleGenerator(tupleParts)), 10); // Same as above in string form printNextN(RandomKeyTupleSetGenerator("10::5::5/a..c,5::^3..10/d..f"), 10);