update to latest version
This commit is contained in:
commit
66811b7bd2
|
@ -23,6 +23,6 @@
|
|||
FDBLibTLS_BUILD_SOURCES +=
|
||||
|
||||
|
||||
FDBLibTLS_CFLAGS := -fPIC -I/usr/local/include -I$(BOOSTDIR) -I. -DUSE_UCONTEXT
|
||||
FDBLibTLS_CFLAGS := -fPIC -I/usr/local/include -isystem$(BOOSTDIR) -I. -DUSE_UCONTEXT
|
||||
|
||||
lib/libFDBLibTLS.a: bin/coverage.FDBLibTLS.xml
|
||||
|
|
|
@ -41,6 +41,7 @@ package directory
|
|||
|
||||
import (
|
||||
"errors"
|
||||
|
||||
"github.com/apple/foundationdb/bindings/go/src/fdb"
|
||||
"github.com/apple/foundationdb/bindings/go/src/fdb/subspace"
|
||||
)
|
||||
|
@ -54,6 +55,18 @@ const (
|
|||
_MICROVERSION int32 = 0
|
||||
)
|
||||
|
||||
var (
|
||||
// ErrDirAlreadyExists is returned when trying to create a directory while it already exists.
|
||||
ErrDirAlreadyExists = errors.New("the directory already exists")
|
||||
|
||||
// ErrDirNotExists is returned when opening or listing a directory that does not exist.
|
||||
ErrDirNotExists = errors.New("the directory does not exist")
|
||||
|
||||
// ErrParentDirDoesNotExist is returned when opening a directory and one or more
|
||||
// parent directories in the path do not exist.
|
||||
ErrParentDirDoesNotExist = errors.New("the parent directory does not exist")
|
||||
)
|
||||
|
||||
// Directory represents a subspace of keys in a FoundationDB database,
|
||||
// identified by a hierarchical path.
|
||||
type Directory interface {
|
||||
|
@ -69,8 +82,9 @@ type Directory interface {
|
|||
CreateOrOpen(t fdb.Transactor, path []string, layer []byte) (DirectorySubspace, error)
|
||||
|
||||
// Open opens the directory specified by path (relative to this Directory),
|
||||
// and returns the directory and its contents as a DirectorySubspace (or an
|
||||
// error if the directory does not exist).
|
||||
// and returns the directory and its contents as a DirectorySubspace (or ErrDirNotExists
|
||||
// error if the directory does not exist, or ErrParentDirDoesNotExist if one of the parent
|
||||
// directories in the path does not exist).
|
||||
//
|
||||
// If the byte slice layer is specified, it is compared against the layer
|
||||
// specified when the directory was created, and an error is returned if
|
||||
|
@ -79,7 +93,7 @@ type Directory interface {
|
|||
|
||||
// Create creates a directory specified by path (relative to this
|
||||
// Directory), and returns the directory and its contents as a
|
||||
// DirectorySubspace (or an error if the directory already exists).
|
||||
// DirectorySubspace (or ErrDirAlreadyExists if the directory already exists).
|
||||
//
|
||||
// If the byte slice layer is specified, it is recorded as the layer and
|
||||
// will be checked when opening the directory in the future.
|
||||
|
|
|
@ -99,7 +99,7 @@ func (dl directoryLayer) createOrOpen(rtr fdb.ReadTransaction, tr *fdb.Transacti
|
|||
}
|
||||
|
||||
if !allowOpen {
|
||||
return nil, errors.New("the directory already exists")
|
||||
return nil, ErrDirAlreadyExists
|
||||
}
|
||||
|
||||
if layer != nil {
|
||||
|
@ -112,7 +112,7 @@ func (dl directoryLayer) createOrOpen(rtr fdb.ReadTransaction, tr *fdb.Transacti
|
|||
}
|
||||
|
||||
if !allowCreate {
|
||||
return nil, errors.New("the directory does not exist")
|
||||
return nil, ErrDirNotExists
|
||||
}
|
||||
|
||||
if e := dl.checkVersion(rtr, tr); e != nil {
|
||||
|
@ -161,7 +161,7 @@ func (dl directoryLayer) createOrOpen(rtr fdb.ReadTransaction, tr *fdb.Transacti
|
|||
}
|
||||
|
||||
if parentNode == nil {
|
||||
return nil, errors.New("the parent directory does not exist")
|
||||
return nil, ErrParentDirDoesNotExist
|
||||
}
|
||||
|
||||
node := dl.nodeWithPrefix(prefix)
|
||||
|
@ -254,7 +254,7 @@ func (dl directoryLayer) List(rt fdb.ReadTransactor, path []string) ([]string, e
|
|||
|
||||
node := dl.find(rtr, path).prefetchMetadata(rtr)
|
||||
if !node.exists() {
|
||||
return nil, errors.New("the directory does not exist")
|
||||
return nil, ErrDirNotExists
|
||||
}
|
||||
|
||||
if node.isInPartition(nil, true) {
|
||||
|
|
|
@ -0,0 +1,91 @@
|
|||
## FDB Backup Data Format
|
||||
|
||||
### Introduction
|
||||
This document describes the data format of the files generated by FoundationDB (FDB) backup procedure.
|
||||
The target readers who may benefit from reading this document are:
|
||||
* who make changes on the current backup or restore procedure;
|
||||
* who writes tools to digest the backup data for analytical purpose;
|
||||
* who wants to understand the internals of how backup and restore works.
|
||||
|
||||
The description of the backup data format is based on FDB 5.2 to FDB 6.1. The backup data format may (although unlikely) change after FDB 6.1.
|
||||
|
||||
|
||||
### Files generated by backup
|
||||
The backup procedure generates two types of files: range files and log files.
|
||||
* A range file describes key-value pairs in a range at the version when the backup process takes a snapshot of the range. Different range files have data for different ranges at different versions.
|
||||
* A log file describes the mutations taken from a version v<sub>1</sub> to v<sub>2</sub> during the backup procedure.
|
||||
|
||||
With the key-value pairs in range file and the mutations in log file, the restore procedure can restore the database into a consistent state at a user-provided version v<sub>k</sub> if the backup data is claimed by the restore as restorable at v<sub>k</sub>. (The details of determining if a set of backup data is restorable at a version is out of scope of this document and can be found at [backup.md](https://github.com/xumengpanda/foundationdb/blob/cd873831ecd18653c5bf459d6f72d14a99b619c4/design/backup.md).
|
||||
|
||||
|
||||
### Filename conventions
|
||||
The backup files will be saved in a directory (i.e., url) specified by users. Under the directory, the range files are in the `snapshots` folder. The log files are in the `logs` folder.
|
||||
|
||||
The convention of the range filename is ` snapshots/snapshot,beginVersion,beginVersion,blockSize`, where `beginVersion` is the version when the key-values in the range file are recorded, and blockSize is the size of data blocks in the range file.
|
||||
|
||||
The convention of the log filename is `logs/,versionPrefix/log,beginVersion,endVersion,randomUID, blockSize`, where the versionPrefix is a 2-level path (`x/y`) where beginVersion should go such that `x/y/*` contains (10^smallestBucket) possible versions; the randomUID is a random UID, the `beginVersion` and `endVersion` are the version range (left inclusive, right exclusive) when the mutations are recorded; and the `blockSize` is the data block size in the log file.
|
||||
|
||||
We will use an example to explain what each field in the range and log filename means.
|
||||
Suppose under the backup directory, we have a range file `snapshots/snapshot,78994177,78994177,97` and a log file `logs/0000/0000/log,78655645,98655645,149a0bdfedecafa2f648219d5eba816e,1048576`.
|
||||
The range file’s filename tells us that all key-value pairs decoded from the file are the KV value in DB at the version `78994177`. The data block size is `97` bytes.
|
||||
The log file’s filename tells us that the mutations in the log file were the mutations in the DB during the version range `[78655645,98655645)`, and the data block size is `1048576` bytes.
|
||||
|
||||
|
||||
### Data format in a range file
|
||||
A range file can have one to many data blocks. Each data block has a set of key-value pairs.
|
||||
A data block is encoded as follows: `Header startKey k1v1 k2v2 Padding`.
|
||||
|
||||
|
||||
Example:
|
||||
|
||||
The client code writes keys in this sequence:
|
||||
a c d e f g h i j z
|
||||
The backup procedure records the key-value pairs in the database into range file.
|
||||
|
||||
H = header P = padding a...z = keys v = value | = block boundary
|
||||
|
||||
Encoded file: H a cv dv ev P | H e ev fv gv hv P | H h hv iv jv z
|
||||
Decoded in blocks yields:
|
||||
Block 1: range [a, e) with kv pairs cv, dv
|
||||
Block 2: range [e, h) with kv pairs ev, fv, gv
|
||||
Block 3: range [h, z) with kv pairs hv, iv, jv
|
||||
|
||||
NOTE: All blocks except for the final block will have one last value which will not be used. This isn't actually a waste since if the next KV pair wouldn't fit within the block after the value then the space after the final key to the next 1MB boundary would just be padding anyway.
|
||||
|
||||
The code related to how a range file is written is in the `struct RangeFileWriter` in `namespace fileBackup`.
|
||||
|
||||
The code that decodes a range block is in `ACTOR Future<Standalone<VectorRef<KeyValueRef>>> decodeRangeFileBlock(Reference<IAsyncFile> file, int64_t offset, int len)`.
|
||||
|
||||
|
||||
### Data format in a log file
|
||||
A log file can have one to many data blocks.
|
||||
Each block is encoded as `Header, [Param1, Param2]... padding`.
|
||||
The first 32bits in `Param1` and `Param2` specifies the length of the `Param1` and `Param2`.
|
||||
`Param1` specifies the version when the mutations happened;
|
||||
`Param2` encodes the group of mutations happened at the version.
|
||||
|
||||
Note that if the group of mutations is bigger than the block size, the mutation group will be split across multiple data blocks.
|
||||
For example, we may get `[Param1, Param2_part0]`, `[Param1, Param2_part1]`. By concatenating the `Param2_part0` and `Param2_part1`, we can get the group of all mutations happened in the version specified in `Param1`.
|
||||
|
||||
The encoding format for `Param1` is as follows:
|
||||
`hashValue|commitVersion|part`,
|
||||
where `hashValue` is the hash of the commitVersion, `commitVersion` is the version when the mutations in `Param2`(s) are taken, and `part` is the part number in case we need to concatenate the `Param2` to get the group of all mutations.
|
||||
`hashValue` takes 8bits, `commitVersion` takes 64bits, and `part` takes 32bits.
|
||||
|
||||
Note that in case of concatenating the partial group of mutations in `Param2` to get the full group of all mutations, the part number should be continuous.
|
||||
|
||||
The encoding format for the group of mutations, which is Param2 or the concatenated Param2 in case of partial group of mutations in a block, is as follows:
|
||||
`length_of_the_mutation_group | encoded_mutation_1 | … | encoded_mutation_k`.
|
||||
The `encoded_mutation_i` is encoded as follows
|
||||
`type|kLen|vLen|Key|Value`
|
||||
where type is the mutation type, such as Set or Clear, `kLen` and `vLen` respectively are the length of the key and value in the mutation. `Key` and `Value` are the serialized value of the Key and Value in the mutation.
|
||||
|
||||
The code related to how a log file is written is in the `struct LogFileWriter` in `namespace fileBackup`.
|
||||
|
||||
The code that decodes a mutation block is in `ACTOR Future<Standalone<VectorRef<KeyValueRef>>> decodeLogFileBlock(Reference<IAsyncFile> file, int64_t offset, int len)`.
|
||||
|
||||
|
||||
### Endianness
|
||||
When the restore decodes a serialized integer from the backup file, it needs to convert the serialized value from big endian to little endian.
|
||||
|
||||
The reason is as follows: When the backup procedure transfers the data to remote blob store, the backup data is encoded in big endian. However, FoundationDB currently only run on little endian machines. The endianness affects the interpretation of an integer, so we must perform the endianness convertion.
|
|
@ -530,7 +530,7 @@ The second feature is the ability to add one or more synchronous replicas of the
|
|||
|
||||
An example configuration would be four total datacenters, two on the east coast, two on the west coast, with a preference for fast write latencies from the west coast. One datacenter on each coast would be sized to store a full copy of the data. The second datacenter on each coast would only have a few FoundationDB processes.
|
||||
|
||||
While everything is healthy, writes need to be made durable in both west coast datacenters before a commit can succeed. The geographic proximity of the two datacenters minimizes the additional commit latency. Reads can be served from either region, and clients can get data from whichever region is closer. Getting a read version from the each coast region will still require communicating with a west coast datacenter. Clients can cache read versions if they can tolerate reading stale data to avoid waiting on read versions.
|
||||
While everything is healthy, writes need to be made durable in both west coast datacenters before a commit can succeed. The geographic proximity of the two datacenters minimizes the additional commit latency. Reads can be served from either region, and clients can get data from whichever region is closer. Getting a read version from east coast region will still require communicating with a west coast datacenter. Clients can cache read versions if they can tolerate reading stale data to avoid waiting on read versions.
|
||||
|
||||
If either west coast datacenter fails, the last few mutations will be propagated from the remaining west coast datacenter to the east coast. At this point, FoundationDB will start accepting commits on the east coast. Once the west coast comes back online, the system will automatically start copying all the data that was committed to the east coast back to the west coast replica. Once the west coast has caught up, the system will automatically switch back to accepting writes from the west coast again.
|
||||
|
||||
|
@ -615,7 +615,7 @@ The number of replicas in each region is controlled by redundancy level. For exa
|
|||
Asymmetric configurations
|
||||
-------------------------
|
||||
|
||||
The fact that satellite policies are configured per region allows for asymmetric configurations. For example, FoudnationDB can have a three datacenter setup where there are two datacenters on the west coast (WC1, WC2) and one datacenter on the east coast (EC1). The west coast region can be set as the preferred active region by setting the priority of its primary datacenter higher than the east coast datacenter. The west coast region should have a satellite policy configured, so that when it is active, FoundationDB is making mutations durable in both west coast datacenters. In the rare event that one of the west coast datacenter have failed, FoundationDB will fail over to the east coast datacenter. Because this region does not a satellite datacenter, the mutations will only be made durable in one datacenter while the transaction subsystem is located here. However this is justifiable because the region will only be active if a datacenter has already been lost.
|
||||
The fact that satellite policies are configured per region allows for asymmetric configurations. For example, FoudnationDB can have a three datacenter setup where there are two datacenters on the west coast (WC1, WC2) and one datacenter on the east coast (EC1). The west coast region can be set as the preferred active region by setting the priority of its primary datacenter higher than the east coast datacenter. The west coast region should have a satellite policy configured, so that when it is active, FoundationDB is making mutations durable in both west coast datacenters. In the rare event that one of the west coast datacenters has failed, FoundationDB will fail over to the east coast datacenter. Because this region does not a satellite datacenter, the mutations will only be made durable in one datacenter while the transaction subsystem is located here. However, this is justifiable because the region will only be active if a datacenter has already been lost.
|
||||
|
||||
This is the region configuration that implements the example::
|
||||
|
||||
|
@ -669,7 +669,7 @@ To configure an existing database to regions, do the following steps:
|
|||
|
||||
4. Configure ``usable_regions=2``. This will cause the cluster to start copying data between the regions.
|
||||
|
||||
5. Watch ``status`` and wait until data movement is complete. This will mean signal that the remote datacenter has a full replica of all of the data in the database.
|
||||
5. Watch ``status`` and wait until data movement is complete. This will signal that the remote datacenter has a full replica of all of the data in the database.
|
||||
|
||||
6. Change the region configuration to have a non-negative priority for the primary datacenters in both regions. This will enable automatic failover between regions.
|
||||
|
||||
|
@ -680,7 +680,7 @@ When a primary datacenter fails, the cluster will go into a degraded state. It w
|
|||
|
||||
.. warning:: While a datacenter has failed, the maximum write throughput of the cluster will be roughly 1/3 of normal performance. This is because the transaction logs need to store all of the mutations being committed, so that once the other datacenter comes back online, it can replay history to catch back up.
|
||||
|
||||
To drop the dead datacenter do the follow steps:
|
||||
To drop the dead datacenter do the following steps:
|
||||
|
||||
1. Configure the region configuration so that the dead datacenter has a negative priority.
|
||||
|
||||
|
|
|
@ -40,7 +40,7 @@ Promises and futures can be used within a single process, but their real strengt
|
|||
wait()
|
||||
------
|
||||
|
||||
At the point when a receiver holding a ``Future<T>`` needs the ``T`` to continue computation, it invokes the ``wait()`` statement with the ``Future<T>`` as its parameter. The ``wait()`` statement allows the calling actor to pause execution until the value of the future is set, returning a value of type ``T`` During the wait, other actors can continue execution, providing asynchronous concurrency within a single process.
|
||||
At the point when a receiver holding a ``Future<T>`` needs the ``T`` to continue computation, it invokes the ``wait()`` statement with the ``Future<T>`` as its parameter. The ``wait()`` statement allows the calling actor to pause execution until the value of the future is set, returning a value of type ``T``. During the wait, other actors can continue execution, providing asynchronous concurrency within a single process.
|
||||
|
||||
ACTOR
|
||||
-----
|
||||
|
@ -154,5 +154,5 @@ Some preprocessor definitions will not fix all issues though. When programming f
|
|||
foo([x]() { x->bar(); })
|
||||
}
|
||||
|
||||
- state variables in don't follow the normal scoping rules. So in flow a state variable can be defined in a inner scope and later it can be used in the outer scope. In order to not break compilation in IDE-mode, always define state variables in the outermost scope they will be used.
|
||||
- state variables in flow don't follow the normal scoping rules. So in flow a state variable can be defined in a inner scope and later it can be used in the outer scope. In order to not break compilation in IDE-mode, always define state variables in the outermost scope they will be used.
|
||||
|
||||
|
|
|
@ -470,6 +470,10 @@ void initHelp() {
|
|||
"include all|<ADDRESS>*",
|
||||
"permit previously-excluded servers to rejoin the database",
|
||||
"If `all' is specified, the excluded servers list is cleared.\n\nFor each IP address or IP:port pair in <ADDRESS>*, removes any matching exclusions from the excluded servers list. (A specified IP will match all IP:* exclusion entries)");
|
||||
helpMap["snapshot"] = CommandHelp(
|
||||
"snapshot <BINARY-PATH>:<ARG1=VAL1>,<ARG2=VAL2>,...",
|
||||
"snapshot the database",
|
||||
"invokes binary provided in binary-path with the arg,value pairs on TLog, Storage and Coordinators nodes. UID is a reserved ARG key.");
|
||||
helpMap["setclass"] = CommandHelp(
|
||||
"setclass <ADDRESS> <unset|storage|transaction|default>",
|
||||
"change the class of a process",
|
||||
|
@ -2121,6 +2125,11 @@ ACTOR Future<bool> exclude( Database db, std::vector<StringRef> tokens, Referenc
|
|||
return false;
|
||||
}
|
||||
|
||||
ACTOR Future<bool> createSnapshot(Database db, StringRef snapCmd) {
|
||||
wait(makeInterruptable(mgmtSnapCreate(db, snapCmd)));
|
||||
return false;
|
||||
}
|
||||
|
||||
ACTOR Future<bool> setClass( Database db, std::vector<StringRef> tokens ) {
|
||||
if( tokens.size() == 1 ) {
|
||||
vector<ProcessData> _workers = wait( makeInterruptable(getWorkers(db)) );
|
||||
|
@ -2720,6 +2729,17 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise) {
|
|||
continue;
|
||||
}
|
||||
|
||||
if (tokencmp(tokens[0], "snapshot")) {
|
||||
if (tokens.size() != 2) {
|
||||
printUsage(tokens[0]);
|
||||
is_error = true;
|
||||
} else {
|
||||
bool err = wait(createSnapshot(db, tokens[1]));
|
||||
if (err) is_error = true;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (tokencmp(tokens[0], "setclass")) {
|
||||
if (tokens.size() != 3 && tokens.size() != 1) {
|
||||
printUsage(tokens[0]);
|
||||
|
|
|
@ -44,7 +44,8 @@ static const char* typeString[] = { "SetValue",
|
|||
"ByteMax",
|
||||
"MinV2",
|
||||
"AndV2",
|
||||
"CompareAndClear" };
|
||||
"CompareAndClear",
|
||||
"Exec" };
|
||||
|
||||
struct MutationRef {
|
||||
static const int OVERHEAD_BYTES = 12; //12 is the size of Header in MutationList entries
|
||||
|
@ -70,6 +71,9 @@ struct MutationRef {
|
|||
MinV2,
|
||||
AndV2,
|
||||
CompareAndClear,
|
||||
// ExecOp is always set with FIRST_IN_BATCH option to quickly identify
|
||||
// the op in a transaction batch while parsing it in TLog
|
||||
Exec,
|
||||
MAX_ATOMIC_OP
|
||||
};
|
||||
// This is stored this way for serialization purposes.
|
||||
|
|
|
@ -148,6 +148,7 @@ public:
|
|||
int64_t transactionsMaybeCommitted;
|
||||
int64_t transactionsResourceConstrained;
|
||||
int64_t transactionsProcessBehind;
|
||||
int64_t transactionWaitsForFullRecovery;
|
||||
ContinuousSample<double> latencies, readLatencies, commitLatencies, GRVLatencies, mutationsPerCommit, bytesPerCommit;
|
||||
|
||||
int outstandingWatches;
|
||||
|
|
|
@ -1474,6 +1474,29 @@ ACTOR Future<Void> waitForExcludedServers( Database cx, vector<AddressExclusion>
|
|||
}
|
||||
}
|
||||
|
||||
ACTOR Future<Void> mgmtSnapCreate(Database cx, StringRef snapCmd) {
|
||||
state int retryCount = 0;
|
||||
|
||||
loop {
|
||||
state UID snapUID = deterministicRandom()->randomUniqueID();
|
||||
try {
|
||||
wait(snapCreate(cx, snapCmd, snapUID));
|
||||
printf("Snapshots tagged with UID: %s, check logs for status\n", snapUID.toString().c_str());
|
||||
TraceEvent("SnapCreateSucceeded").detail("snapUID", snapUID);
|
||||
break;
|
||||
} catch (Error& e) {
|
||||
++retryCount;
|
||||
TraceEvent(retryCount > 3 ? SevWarn : SevInfo, "SnapCreateFailed").error(e);
|
||||
if (retryCount > 3) {
|
||||
fprintf(stderr, "Snapshot create failed, %d (%s)."
|
||||
" Please cleanup any instance level snapshots created.\n", e.code(), e.what());
|
||||
throw;
|
||||
}
|
||||
}
|
||||
}
|
||||
return Void();
|
||||
}
|
||||
|
||||
ACTOR Future<Void> waitForFullReplication( Database cx ) {
|
||||
state ReadYourWritesTransaction tr(cx);
|
||||
loop {
|
||||
|
|
|
@ -191,5 +191,9 @@ ACTOR Future<std::vector<NetworkAddress>> getCoordinators( Database cx );
|
|||
void schemaCoverage( std::string const& spath, bool covered=true );
|
||||
bool schemaMatch( json_spirit::mValue const& schema, json_spirit::mValue const& result, std::string& errorStr, Severity sev=SevError, bool checkCoverage=false, std::string path = std::string(), std::string schema_path = std::string() );
|
||||
|
||||
// execute payload in 'snapCmd' on all the coordinators, TLogs and
|
||||
// storage nodes
|
||||
ACTOR Future<Void> mgmtSnapCreate(Database cx, StringRef snapCmd);
|
||||
|
||||
#include "flow/unactorcompiler.h"
|
||||
#endif
|
||||
|
|
|
@ -49,6 +49,7 @@ struct MasterProxyInterface {
|
|||
|
||||
RequestStream< struct GetRawCommittedVersionRequest > getRawCommittedVersion;
|
||||
RequestStream< struct TxnStateRequest > txnState;
|
||||
RequestStream<struct ExecRequest> execReq;
|
||||
|
||||
RequestStream< struct GetHealthMetricsRequest > getHealthMetrics;
|
||||
|
||||
|
@ -62,7 +63,7 @@ struct MasterProxyInterface {
|
|||
void serialize(Archive& ar) {
|
||||
serializer(ar, locality, provisional, commit, getConsistentReadVersion, getKeyServersLocations,
|
||||
waitFailure, getStorageServerRejoinInfo, getRawCommittedVersion,
|
||||
txnState, getHealthMetrics);
|
||||
txnState, getHealthMetrics, execReq);
|
||||
}
|
||||
|
||||
void initEndpoints() {
|
||||
|
@ -298,4 +299,21 @@ struct GetHealthMetricsRequest
|
|||
}
|
||||
};
|
||||
|
||||
struct ExecRequest
|
||||
{
|
||||
constexpr static FileIdentifier file_identifier = 22403900;
|
||||
Arena arena;
|
||||
StringRef execPayload;
|
||||
ReplyPromise<Void> reply;
|
||||
Optional<UID> debugID;
|
||||
|
||||
explicit ExecRequest(Optional<UID> const& debugID = Optional<UID>()) : debugID(debugID) {}
|
||||
explicit ExecRequest(StringRef exec, Optional<UID> debugID = Optional<UID>()) : execPayload(exec), debugID(debugID) {}
|
||||
|
||||
template <class Ar>
|
||||
void serialize(Ar& ar) {
|
||||
serializer(ar, execPayload, reply, arena, debugID);
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1964,7 +1964,7 @@ Future<Standalone<RangeResultRef>> getRange( Database const& cx, Future<Version>
|
|||
}
|
||||
|
||||
Transaction::Transaction( Database const& cx )
|
||||
: cx(cx), info(cx->taskID), backoff(CLIENT_KNOBS->DEFAULT_BACKOFF), committedVersion(invalidVersion), versionstampPromise(Promise<Standalone<StringRef>>()), options(cx), numErrors(0), trLogInfo(createTrLogInfoProbabilistically(cx))
|
||||
: cx(cx), info(cx->taskID), backoff(CLIENT_KNOBS->DEFAULT_BACKOFF), committedVersion(invalidVersion), versionstampPromise(Promise<Standalone<StringRef>>()), options(cx), numErrors(0), numRetries(0), trLogInfo(createTrLogInfoProbabilistically(cx))
|
||||
{
|
||||
setPriority(GetReadVersionRequest::PRIORITY_DEFAULT);
|
||||
}
|
||||
|
@ -1987,6 +1987,7 @@ void Transaction::operator=(Transaction&& r) BOOST_NOEXCEPT {
|
|||
info = r.info;
|
||||
backoff = r.backoff;
|
||||
numErrors = r.numErrors;
|
||||
numRetries = r.numRetries;
|
||||
committedVersion = r.committedVersion;
|
||||
versionstampPromise = std::move(r.versionstampPromise);
|
||||
watches = r.watches;
|
||||
|
@ -2287,6 +2288,45 @@ void Transaction::atomicOp(const KeyRef& key, const ValueRef& operand, MutationR
|
|||
TEST(true); //NativeAPI atomic operation
|
||||
}
|
||||
|
||||
ACTOR Future<Void> executeCoordinators(DatabaseContext* cx, StringRef execPayload, Optional<UID> debugID) {
|
||||
try {
|
||||
if (debugID.present()) {
|
||||
g_traceBatch.addEvent("TransactionDebug", debugID.get().first(), "NativeAPI.executeCoordinators.Before");
|
||||
}
|
||||
|
||||
state ExecRequest req(execPayload, debugID);
|
||||
if (debugID.present()) {
|
||||
g_traceBatch.addEvent("TransactionDebug", debugID.get().first(),
|
||||
"NativeAPI.executeCoordinators.Inside loop");
|
||||
}
|
||||
wait(loadBalance(cx->getMasterProxies(false), &MasterProxyInterface::execReq, req, cx->taskID));
|
||||
if (debugID.present())
|
||||
g_traceBatch.addEvent("TransactionDebug", debugID.get().first(),
|
||||
"NativeAPI.executeCoordinators.After");
|
||||
return Void();
|
||||
} catch (Error& e) {
|
||||
TraceEvent("NativeAPI.executeCoordinatorsError").error(e);
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
void Transaction::execute(const KeyRef& cmdType, const ValueRef& cmdPayload) {
|
||||
TraceEvent("Execute operation").detail("Key", cmdType.toString()).detail("Value", cmdPayload.toString());
|
||||
|
||||
if (cmdType.size() > CLIENT_KNOBS->KEY_SIZE_LIMIT) throw key_too_large();
|
||||
if (cmdPayload.size() > CLIENT_KNOBS->VALUE_SIZE_LIMIT) throw value_too_large();
|
||||
|
||||
auto& req = tr;
|
||||
|
||||
// Helps with quickly finding the exec op in a tlog batch
|
||||
setOption(FDBTransactionOptions::FIRST_IN_BATCH);
|
||||
|
||||
auto& t = req.transaction;
|
||||
auto r = singleKeyRange(cmdType, req.arena);
|
||||
auto v = ValueRef(req.arena, cmdPayload);
|
||||
t.mutations.push_back(req.arena, MutationRef(MutationRef::Exec, r.begin, v));
|
||||
}
|
||||
|
||||
void Transaction::clear( const KeyRangeRef& range, bool addConflictRange ) {
|
||||
auto &req = tr;
|
||||
auto &t = req.transaction;
|
||||
|
@ -2364,6 +2404,10 @@ TransactionOptions::TransactionOptions(Database const& cx) {
|
|||
if (BUGGIFY) {
|
||||
commitOnFirstProxy = true;
|
||||
}
|
||||
maxRetries = cx->transactionMaxRetries;
|
||||
if (maxRetries == -1) {
|
||||
maxRetries = 10;
|
||||
}
|
||||
}
|
||||
|
||||
TransactionOptions::TransactionOptions() {
|
||||
|
@ -2373,11 +2417,19 @@ TransactionOptions::TransactionOptions() {
|
|||
|
||||
void TransactionOptions::reset(Database const& cx) {
|
||||
double oldMaxBackoff = maxBackoff;
|
||||
double oldMaxRetries = maxRetries;
|
||||
memset(this, 0, sizeof(*this));
|
||||
maxBackoff = cx->apiVersionAtLeast(610) ? oldMaxBackoff : cx->transactionMaxBackoff;
|
||||
maxRetries = oldMaxRetries;
|
||||
lockAware = cx->lockAware;
|
||||
}
|
||||
|
||||
void Transaction::onErrorReset() {
|
||||
int32_t oldNumRetires = numRetries;
|
||||
reset();
|
||||
numRetries = oldNumRetires;
|
||||
}
|
||||
|
||||
void Transaction::reset() {
|
||||
tr = CommitTransactionRequest();
|
||||
readVersion = Future<Version>();
|
||||
|
@ -2654,7 +2706,13 @@ ACTOR static Future<Void> tryCommit( Database cx, Reference<TransactionLogInfo>
|
|||
// The user needs to be informed that we aren't sure whether the commit happened. Standard retry loops retry it anyway (relying on transaction idempotence) but a client might do something else.
|
||||
throw commit_unknown_result();
|
||||
} else {
|
||||
if (e.code() != error_code_transaction_too_old && e.code() != error_code_not_committed && e.code() != error_code_database_locked && e.code() != error_code_proxy_memory_limit_exceeded)
|
||||
if (e.code() != error_code_transaction_too_old
|
||||
&& e.code() != error_code_not_committed
|
||||
&& e.code() != error_code_database_locked
|
||||
&& e.code() != error_code_proxy_memory_limit_exceeded
|
||||
&& e.code() != error_code_transaction_not_permitted
|
||||
&& e.code() != error_code_cluster_not_fully_recovered
|
||||
&& e.code() != error_code_txn_exec_log_anti_quorum)
|
||||
TraceEvent(SevError, "TryCommitError").error(e);
|
||||
if (trLogInfo)
|
||||
trLogInfo->addLog(FdbClientLogEvents::EventCommitError(startTime, static_cast<int>(e.code()), req));
|
||||
|
@ -2765,6 +2823,7 @@ ACTOR Future<Void> commitAndWatch(Transaction *self) {
|
|||
}
|
||||
|
||||
self->versionstampPromise.sendError(transaction_invalid_version());
|
||||
//self->onErrorReset();
|
||||
self->reset();
|
||||
}
|
||||
|
||||
|
@ -3024,6 +3083,9 @@ Future<Standalone<StringRef>> Transaction::getVersionstamp() {
|
|||
}
|
||||
|
||||
Future<Void> Transaction::onError( Error const& e ) {
|
||||
if (numRetries < std::numeric_limits<int>::max()) {
|
||||
numRetries++;
|
||||
}
|
||||
if (e.code() == error_code_success)
|
||||
{
|
||||
return client_invalid_operation();
|
||||
|
@ -3032,7 +3094,8 @@ Future<Void> Transaction::onError( Error const& e ) {
|
|||
e.code() == error_code_commit_unknown_result ||
|
||||
e.code() == error_code_database_locked ||
|
||||
e.code() == error_code_proxy_memory_limit_exceeded ||
|
||||
e.code() == error_code_process_behind)
|
||||
e.code() == error_code_process_behind ||
|
||||
e.code() == error_code_cluster_not_fully_recovered)
|
||||
{
|
||||
if(e.code() == error_code_not_committed)
|
||||
cx->transactionsNotCommitted++;
|
||||
|
@ -3042,9 +3105,15 @@ Future<Void> Transaction::onError( Error const& e ) {
|
|||
cx->transactionsResourceConstrained++;
|
||||
if (e.code() == error_code_process_behind)
|
||||
cx->transactionsProcessBehind++;
|
||||
if (e.code() == error_code_cluster_not_fully_recovered) {
|
||||
cx->transactionWaitsForFullRecovery++;
|
||||
if (numRetries > options.maxRetries) {
|
||||
return e;
|
||||
}
|
||||
}
|
||||
|
||||
double backoff = getBackoff(e.code());
|
||||
reset();
|
||||
onErrorReset();
|
||||
return delay( backoff, info.taskID );
|
||||
}
|
||||
if (e.code() == error_code_transaction_too_old ||
|
||||
|
@ -3056,7 +3125,7 @@ Future<Void> Transaction::onError( Error const& e ) {
|
|||
cx->transactionsFutureVersions++;
|
||||
|
||||
double maxBackoff = options.maxBackoff;
|
||||
reset();
|
||||
onErrorReset();
|
||||
return delay( std::min(CLIENT_KNOBS->FUTURE_VERSION_RETRY_DELAY, maxBackoff), info.taskID );
|
||||
}
|
||||
|
||||
|
@ -3260,3 +3329,102 @@ void enableClientInfoLogging() {
|
|||
networkOptions.logClientInfo = true;
|
||||
TraceEvent(SevInfo, "ClientInfoLoggingEnabled");
|
||||
}
|
||||
|
||||
ACTOR Future<Void> snapCreate(Database inputCx, StringRef snapCmd, UID snapUID) {
|
||||
state Transaction tr(inputCx);
|
||||
state DatabaseContext* cx = inputCx.getPtr();
|
||||
// remember the client ID before the snap operation
|
||||
state UID preSnapClientUID = cx->clientInfo->get().id;
|
||||
|
||||
TraceEvent("SnapCreateEnter")
|
||||
.detail("SnapCmd", snapCmd.toString())
|
||||
.detail("UID", snapUID)
|
||||
.detail("PreSnapClientUID", preSnapClientUID);
|
||||
|
||||
StringRef snapCmdArgs = snapCmd;
|
||||
StringRef snapCmdPart = snapCmdArgs.eat(":");
|
||||
state Standalone<StringRef> snapUIDRef(snapUID.toString());
|
||||
state Standalone<StringRef> snapPayloadRef = snapCmdPart
|
||||
.withSuffix(LiteralStringRef(":uid="))
|
||||
.withSuffix(snapUIDRef)
|
||||
.withSuffix(LiteralStringRef(","))
|
||||
.withSuffix(snapCmdArgs);
|
||||
state Standalone<StringRef>
|
||||
tLogCmdPayloadRef = LiteralStringRef("empty-binary:uid=").withSuffix(snapUIDRef);
|
||||
// disable popping of TLog
|
||||
tr.reset();
|
||||
loop {
|
||||
try {
|
||||
tr.setOption(FDBTransactionOptions::LOCK_AWARE);
|
||||
tr.execute(execDisableTLogPop, tLogCmdPayloadRef);
|
||||
wait(timeoutError(tr.commit(), 10));
|
||||
break;
|
||||
} catch (Error& e) {
|
||||
TraceEvent("DisableTLogPopFailed").error(e);
|
||||
wait(tr.onError(e));
|
||||
}
|
||||
}
|
||||
|
||||
TraceEvent("SnapCreateAfterLockingTLogs").detail("UID", snapUID);
|
||||
|
||||
// snap the storage and Tlogs
|
||||
// if we retry the below command in failure cases with the same snapUID
|
||||
// then the snapCreate can end up creating multiple snapshots with
|
||||
// the same name which needs additional handling, hence we fail in
|
||||
// failure cases and let the caller retry with different snapUID
|
||||
tr.reset();
|
||||
try {
|
||||
tr.setOption(FDBTransactionOptions::LOCK_AWARE);
|
||||
tr.execute(execSnap, snapPayloadRef);
|
||||
wait(tr.commit());
|
||||
} catch (Error& e) {
|
||||
TraceEvent("SnapCreateErroSnapTLogStorage").error(e);
|
||||
throw;
|
||||
}
|
||||
|
||||
TraceEvent("SnapCreateAfterSnappingTLogStorage").detail("UID", snapUID);
|
||||
|
||||
if (BUGGIFY) {
|
||||
int32_t toDelay = deterministicRandom()->randomInt(1, 30);
|
||||
wait(delay(toDelay));
|
||||
}
|
||||
|
||||
// enable popping of the TLog
|
||||
tr.reset();
|
||||
loop {
|
||||
try {
|
||||
tr.setOption(FDBTransactionOptions::LOCK_AWARE);
|
||||
tr.execute(execEnableTLogPop, tLogCmdPayloadRef);
|
||||
wait(tr.commit());
|
||||
break;
|
||||
} catch (Error& e) {
|
||||
TraceEvent("EnableTLogPopFailed").error(e);
|
||||
wait(tr.onError(e));
|
||||
}
|
||||
}
|
||||
|
||||
TraceEvent("SnapCreateAfterUnlockingTLogs").detail("UID", snapUID);
|
||||
|
||||
// snap the coordinators
|
||||
try {
|
||||
Future<Void> exec = executeCoordinators(cx, snapPayloadRef, snapUID);
|
||||
wait(timeoutError(exec, 5.0));
|
||||
} catch (Error& e) {
|
||||
TraceEvent("SnapCreateErrorSnapCoords").error(e);
|
||||
throw;
|
||||
}
|
||||
|
||||
TraceEvent("SnapCreateAfterSnappingCoords").detail("UID", snapUID);
|
||||
|
||||
// if the client IDs did not change then we have a clean snapshot
|
||||
UID postSnapClientUID = cx->clientInfo->get().id;
|
||||
if (preSnapClientUID != postSnapClientUID) {
|
||||
TraceEvent("UID mismatch")
|
||||
.detail("SnapPreSnapClientUID", preSnapClientUID)
|
||||
.detail("SnapPostSnapClientUID", postSnapClientUID);
|
||||
throw coordinators_changed();
|
||||
}
|
||||
|
||||
TraceEvent("SnapCreateComplete").detail("UID", snapUID);
|
||||
return Void();
|
||||
}
|
||||
|
|
|
@ -141,6 +141,7 @@ struct StorageMetrics;
|
|||
|
||||
struct TransactionOptions {
|
||||
double maxBackoff;
|
||||
uint32_t maxRetries;
|
||||
uint32_t getReadVersionFlags;
|
||||
uint32_t customTransactionSizeLimit;
|
||||
bool checkWritesEnabled : 1;
|
||||
|
@ -259,6 +260,14 @@ public:
|
|||
// If checkWriteConflictRanges is true, existing write conflict ranges will be searched for this key
|
||||
void set( const KeyRef& key, const ValueRef& value, bool addConflictRange = true );
|
||||
void atomicOp( const KeyRef& key, const ValueRef& value, MutationRef::Type operationType, bool addConflictRange = true );
|
||||
// execute operation is similar to set, but the command will reach
|
||||
// one of the proxies, all the TLogs and all the storage nodes.
|
||||
// instead of setting a key and value on the DB, it executes the command
|
||||
// that is passed in the value field.
|
||||
// - cmdType can be used for logging purposes
|
||||
// - cmdPayload contains the details of the command to be executed:
|
||||
// format of the cmdPayload : <binary-path>:<arg1=val1>,<arg2=val2>...
|
||||
void execute(const KeyRef& cmdType, const ValueRef& cmdPayload);
|
||||
void clear( const KeyRangeRef& range, bool addConflictRange = true );
|
||||
void clear( const KeyRef& key, bool addConflictRange = true );
|
||||
Future<Void> commit(); // Throws not_committed or commit_unknown_result errors in normal operation
|
||||
|
@ -278,6 +287,7 @@ public:
|
|||
void operator=(Transaction&& r) BOOST_NOEXCEPT;
|
||||
|
||||
void reset();
|
||||
void onErrorReset();
|
||||
void fullReset();
|
||||
double getBackoff(int errCode);
|
||||
void debugTransaction(UID dID) { info.debugID = dID; }
|
||||
|
@ -288,6 +298,7 @@ public:
|
|||
|
||||
TransactionInfo info;
|
||||
int numErrors;
|
||||
int numRetries;
|
||||
|
||||
std::vector<Reference<Watch>> watches;
|
||||
|
||||
|
@ -324,5 +335,9 @@ std::string unprintable( const std::string& );
|
|||
|
||||
int64_t extractIntOption( Optional<StringRef> value, int64_t minValue = std::numeric_limits<int64_t>::min(), int64_t maxValue = std::numeric_limits<int64_t>::max() );
|
||||
|
||||
// Takes a snapshot of the cluster, specifically the following persistent
|
||||
// states: coordinator, TLog and storage state
|
||||
ACTOR Future<Void> snapCreate(Database cx, StringRef snapCmd, UID snapUID);
|
||||
|
||||
#include "flow/unactorcompiler.h"
|
||||
#endif
|
||||
|
|
|
@ -36,6 +36,14 @@ const KeyRef keyServersEnd = keyServersKeys.end;
|
|||
const KeyRangeRef keyServersKeyServersKeys ( LiteralStringRef("\xff/keyServers/\xff/keyServers/"), LiteralStringRef("\xff/keyServers/\xff/keyServers0"));
|
||||
const KeyRef keyServersKeyServersKey = keyServersKeyServersKeys.begin;
|
||||
|
||||
// list of reserved exec commands
|
||||
const StringRef execSnap = LiteralStringRef("snap"); // snapshot persistent state of
|
||||
// storage, TLog and coordinated state
|
||||
const StringRef execDisableTLogPop = LiteralStringRef("\xff/TLogDisablePop"); // disable pop on TLog
|
||||
const StringRef execEnableTLogPop = LiteralStringRef("\xff/TLogEnablePop"); // enable pop on TLog
|
||||
// used to communicate snap failures between TLog and SnapTest Workload, used only in simulator
|
||||
const StringRef snapTestFailStatus = LiteralStringRef("\xff/SnapTestFailStatus/");
|
||||
|
||||
const Key keyServersKey( const KeyRef& k ) {
|
||||
return k.withPrefix( keyServersPrefix );
|
||||
}
|
||||
|
|
|
@ -282,6 +282,10 @@ extern const KeyRef healthyZoneKey;
|
|||
|
||||
const Value healthyZoneValue( StringRef const& zoneId, Version version );
|
||||
std::pair<Key,Version> decodeHealthyZoneValue( ValueRef const& );
|
||||
extern const StringRef execSnap;
|
||||
extern const StringRef execDisableTLogPop;
|
||||
extern const StringRef execEnableTLogPop;
|
||||
extern const StringRef snapTestFailStatus;
|
||||
|
||||
// All mutations done to this range are blindly copied into txnStateStore.
|
||||
// Used to create artifically large txnStateStore instances in testing.
|
||||
|
|
|
@ -22,7 +22,7 @@
|
|||
|
||||
fdbrpc_BUILD_SOURCES += fdbrpc/libeio/eio.c
|
||||
|
||||
fdbrpc_CFLAGS := -I$(BOOSTDIR) -I. -Ifdbrpc/libeio -DUSE_UCONTEXT
|
||||
fdbrpc_CFLAGS := -isystem$(BOOSTDIR) -I. -Ifdbrpc/libeio -DUSE_UCONTEXT
|
||||
fdbrpc_LDFLAGS :=
|
||||
|
||||
ifeq ($(PLATFORM),osx)
|
||||
|
|
|
@ -17,6 +17,8 @@ set(FDBSERVER_SRCS
|
|||
DBCoreState.h
|
||||
DiskQueue.actor.cpp
|
||||
fdbserver.actor.cpp
|
||||
FDBExecHelper.actor.cpp
|
||||
FDBExecHelper.actor.h
|
||||
IDiskQueue.h
|
||||
IKeyValueStore.h
|
||||
IPager.h
|
||||
|
@ -152,6 +154,7 @@ set(FDBSERVER_SRCS
|
|||
workloads/Serializability.actor.cpp
|
||||
workloads/Sideband.actor.cpp
|
||||
workloads/SlowTaskWorkload.actor.cpp
|
||||
workloads/SnapTest.actor.cpp
|
||||
workloads/StatusWorkload.actor.cpp
|
||||
workloads/Storefront.actor.cpp
|
||||
workloads/StreamingRead.actor.cpp
|
||||
|
|
|
@ -40,6 +40,9 @@ struct ConflictBatch {
|
|||
TransactionConflict = 0,
|
||||
TransactionTooOld,
|
||||
TransactionCommitted,
|
||||
TransactionNotPermitted,
|
||||
TransactionNotFullyRecovered,
|
||||
TransactionExecLogAntiQuorum,
|
||||
};
|
||||
|
||||
void addTransaction( const CommitTransactionRef& transaction );
|
||||
|
@ -62,4 +65,4 @@ private:
|
|||
void addConflictRanges(Version now, std::vector< std::pair<StringRef,StringRef> >::iterator begin, std::vector< std::pair<StringRef,StringRef> >::iterator end, class SkipList* part);
|
||||
};
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1,225 @@
|
|||
#define BOOST_SYSTEM_NO_LIB
|
||||
#define BOOST_DATE_TIME_NO_LIB
|
||||
#define BOOST_REGEX_NO_LIB
|
||||
#include <boost/process.hpp>
|
||||
#include "fdbserver/FDBExecHelper.actor.h"
|
||||
#include "flow/Trace.h"
|
||||
#include "flow/flow.h"
|
||||
#if defined(CMAKE_BUILD) || !defined(WIN32)
|
||||
#include "versions.h"
|
||||
#endif
|
||||
#include "flow/actorcompiler.h" // This must be the last #include.
|
||||
|
||||
ExecCmdValueString::ExecCmdValueString(StringRef pCmdValueString) {
|
||||
cmdValueString = pCmdValueString;
|
||||
parseCmdValue();
|
||||
}
|
||||
|
||||
void ExecCmdValueString::setCmdValueString(StringRef pCmdValueString) {
|
||||
// reset everything
|
||||
binaryPath = StringRef();
|
||||
keyValueMap.clear();
|
||||
|
||||
// set the new cmdValueString
|
||||
cmdValueString = pCmdValueString;
|
||||
|
||||
// parse it out
|
||||
parseCmdValue();
|
||||
}
|
||||
|
||||
StringRef ExecCmdValueString::getCmdValueString() {
|
||||
return cmdValueString.toString();
|
||||
}
|
||||
|
||||
StringRef ExecCmdValueString::getBinaryPath() {
|
||||
return binaryPath;
|
||||
}
|
||||
|
||||
VectorRef<StringRef> ExecCmdValueString::getBinaryArgs() {
|
||||
return binaryArgs;
|
||||
}
|
||||
|
||||
StringRef ExecCmdValueString::getBinaryArgValue(StringRef key) {
|
||||
StringRef res;
|
||||
if (keyValueMap.find(key) != keyValueMap.end()) {
|
||||
res = keyValueMap[key];
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
void ExecCmdValueString::parseCmdValue() {
|
||||
StringRef param = this->cmdValueString;
|
||||
// get the binary path
|
||||
this->binaryPath = param.eat(LiteralStringRef(":"));
|
||||
|
||||
// no arguments provided
|
||||
if (param == StringRef()) {
|
||||
return;
|
||||
}
|
||||
|
||||
// extract the arguments
|
||||
while (param != StringRef()) {
|
||||
StringRef token = param.eat(LiteralStringRef(","));
|
||||
this->binaryArgs.push_back(this->binaryArgs.arena(), token);
|
||||
|
||||
StringRef key = token.eat(LiteralStringRef("="));
|
||||
keyValueMap.insert(std::make_pair(key, token));
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
void ExecCmdValueString::dbgPrint() {
|
||||
auto te = TraceEvent("ExecCmdValueString");
|
||||
|
||||
te.detail("CmdValueString", cmdValueString.toString());
|
||||
te.detail("BinaryPath", binaryPath.toString());
|
||||
|
||||
int i = 0;
|
||||
for (auto elem : binaryArgs) {
|
||||
te.detail(format("Arg", ++i).c_str(), elem.toString());
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
ACTOR Future<int> spawnProcess(std::string binPath, std::vector<std::string> paramList, double maxWaitTime, bool isSync)
|
||||
{
|
||||
state std::string argsString;
|
||||
for (auto const& elem : paramList) {
|
||||
argsString += elem + ",";
|
||||
}
|
||||
TraceEvent("SpawnProcess").detail("Cmd", binPath).detail("Args", argsString);
|
||||
|
||||
state int err = 0;
|
||||
state double runTime = 0;
|
||||
state boost::process::child c(binPath, boost::process::args(paramList),
|
||||
boost::process::std_err > boost::process::null);
|
||||
|
||||
// for async calls in simulator, always delay by a fixed time, otherwise
|
||||
// the predictability of the simulator breaks
|
||||
if (!isSync && g_network->isSimulated()) {
|
||||
wait(delay(deterministicRandom()->random01()));
|
||||
}
|
||||
|
||||
if (!isSync && !g_network->isSimulated()) {
|
||||
while (c.running() && runTime <= maxWaitTime) {
|
||||
wait(delay(0.1));
|
||||
runTime += 0.1;
|
||||
}
|
||||
} else {
|
||||
if (g_network->isSimulated()) {
|
||||
// to keep the simulator deterministic, wait till the process exits,
|
||||
// hence giving a large wait time
|
||||
c.wait_for(std::chrono::hours(24));
|
||||
ASSERT(!c.running());
|
||||
} else {
|
||||
int maxWaitTimeInt = static_cast<int>(maxWaitTime + 1.0);
|
||||
c.wait_for(std::chrono::seconds(maxWaitTimeInt));
|
||||
}
|
||||
}
|
||||
|
||||
if (c.running()) {
|
||||
TraceEvent(SevWarnAlways, "ChildTermination")
|
||||
.detail("Cmd", binPath)
|
||||
.detail("Args", argsString);
|
||||
c.terminate();
|
||||
err = -1;
|
||||
if (!c.wait_for(std::chrono::seconds(1))) {
|
||||
TraceEvent(SevWarnAlways, "SpawnProcessFailedToExit")
|
||||
.detail("Cmd", binPath)
|
||||
.detail("Args", argsString);
|
||||
}
|
||||
} else {
|
||||
err = c.exit_code();
|
||||
}
|
||||
TraceEvent("SpawnProcess")
|
||||
.detail("Cmd", binPath)
|
||||
.detail("Error", err);
|
||||
return err;
|
||||
}
|
||||
|
||||
ACTOR Future<int> execHelper(ExecCmdValueString* execArg, std::string folder, std::string role) {
|
||||
state StringRef uidStr = execArg->getBinaryArgValue(LiteralStringRef("uid"));
|
||||
state int err = 0;
|
||||
state Future<int> cmdErr;
|
||||
if (!g_network->isSimulated()) {
|
||||
// get bin path
|
||||
auto snapBin = execArg->getBinaryPath();
|
||||
auto dataFolder = "path=" + folder;
|
||||
std::vector<std::string> paramList;
|
||||
paramList.push_back(snapBin.toString());
|
||||
// get user passed arguments
|
||||
auto listArgs = execArg->getBinaryArgs();
|
||||
for (auto elem : listArgs) {
|
||||
paramList.push_back(elem.toString());
|
||||
}
|
||||
// get additional arguments
|
||||
paramList.push_back(dataFolder);
|
||||
const char* version = FDB_VT_VERSION;
|
||||
std::string versionString = "version=";
|
||||
versionString += version;
|
||||
paramList.push_back(versionString);
|
||||
paramList.push_back(role);
|
||||
cmdErr = spawnProcess(snapBin.toString(), paramList, 3.0, false /*isSync*/);
|
||||
wait(success(cmdErr));
|
||||
err = cmdErr.get();
|
||||
} else {
|
||||
// copy the files
|
||||
state std::string folderFrom = folder + "/.";
|
||||
state std::string folderTo = folder + "-snap-" + uidStr.toString();
|
||||
std::vector<std::string> paramList;
|
||||
std::string mkdirBin = "/bin/mkdir";
|
||||
paramList.push_back(folderTo);
|
||||
cmdErr = spawnProcess(mkdirBin, paramList, 3.0, false /*isSync*/);
|
||||
wait(success(cmdErr));
|
||||
err = cmdErr.get();
|
||||
if (err == 0) {
|
||||
std::vector<std::string> paramList;
|
||||
std::string cpBin = "/bin/cp";
|
||||
paramList.push_back("-a");
|
||||
paramList.push_back(folderFrom);
|
||||
paramList.push_back(folderTo);
|
||||
cmdErr = spawnProcess(cpBin, paramList, 3.0, true /*isSync*/);
|
||||
wait(success(cmdErr));
|
||||
err = cmdErr.get();
|
||||
}
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
std::map<NetworkAddress, std::set<UID>> execOpsInProgress;
|
||||
|
||||
bool isExecOpInProgress(UID execUID) {
|
||||
NetworkAddress addr = g_network->getLocalAddress();
|
||||
return (execOpsInProgress[addr].find(execUID) != execOpsInProgress[addr].end());
|
||||
}
|
||||
|
||||
void setExecOpInProgress(UID execUID) {
|
||||
NetworkAddress addr = g_network->getLocalAddress();
|
||||
ASSERT(execOpsInProgress[addr].find(execUID) == execOpsInProgress[addr].end());
|
||||
execOpsInProgress[addr].insert(execUID);
|
||||
return;
|
||||
}
|
||||
|
||||
void clearExecOpInProgress(UID execUID) {
|
||||
NetworkAddress addr = g_network->getLocalAddress();
|
||||
ASSERT(execOpsInProgress[addr].find(execUID) != execOpsInProgress[addr].end());
|
||||
execOpsInProgress[addr].erase(execUID);
|
||||
return;
|
||||
}
|
||||
|
||||
std::map<NetworkAddress, std::set<UID>> tLogsAlive;
|
||||
|
||||
void registerTLog(UID uid) {
|
||||
NetworkAddress addr = g_network->getLocalAddress();
|
||||
tLogsAlive[addr].insert(uid);
|
||||
}
|
||||
void unregisterTLog(UID uid) {
|
||||
NetworkAddress addr = g_network->getLocalAddress();
|
||||
if (tLogsAlive[addr].find(uid) != tLogsAlive[addr].end()) {
|
||||
tLogsAlive[addr].erase(uid);
|
||||
}
|
||||
}
|
||||
bool isTLogInSameNode() {
|
||||
NetworkAddress addr = g_network->getLocalAddress();
|
||||
return tLogsAlive[addr].size() >= 1;
|
||||
}
|
|
@ -0,0 +1,70 @@
|
|||
#pragma once
|
||||
#if defined(NO_INTELLISENSE) && !defined(FDBSERVER_EXEC_HELPER_ACTOR_G_H)
|
||||
#define FDBSERVER_EXEC_HELPER_ACTOR_G_H
|
||||
#include "fdbserver/FDBExecHelper.actor.g.h"
|
||||
#elif !defined(FDBSERVER_EXEC_HELPER_ACTOR_H)
|
||||
#define FDBSERVER_EXEC_HELPER_ACTOR_H
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include "flow/Arena.h"
|
||||
#include "flow/flow.h"
|
||||
#include "flow/actorcompiler.h"
|
||||
|
||||
// execute/snapshot command takes two arguments: <param1> <param2>
|
||||
// param1 - represents the command type/name
|
||||
// param2 - takes a binary path followed by a set of arguments in the following
|
||||
// format <binary-path>:<key1=val1>,<key2=val2>...
|
||||
// this class will abstract the format and give functions to get various pieces
|
||||
// of information
|
||||
class ExecCmdValueString {
|
||||
public: // ctor & dtor
|
||||
ExecCmdValueString() {}
|
||||
explicit ExecCmdValueString(StringRef cmdValueString);
|
||||
|
||||
public: // interfaces
|
||||
StringRef getBinaryPath();
|
||||
VectorRef<StringRef> getBinaryArgs();
|
||||
StringRef getBinaryArgValue(StringRef key);
|
||||
void setCmdValueString(StringRef cmdValueString);
|
||||
StringRef getCmdValueString(void);
|
||||
|
||||
public: // helper functions
|
||||
void dbgPrint();
|
||||
|
||||
private: // functions
|
||||
void parseCmdValue();
|
||||
|
||||
private: // data
|
||||
Standalone<StringRef> cmdValueString;
|
||||
Standalone<VectorRef<StringRef>> binaryArgs;
|
||||
StringRef binaryPath;
|
||||
std::map<StringRef, StringRef> keyValueMap;
|
||||
};
|
||||
|
||||
// FIXME: move this function to a common location
|
||||
// spawns a process pointed by `binPath` and the arguments provided at `paramList`,
|
||||
// if the process spawned takes more than `maxWaitTime` then it will be killed
|
||||
// if isSync is set to true then the process will be synchronously executed
|
||||
ACTOR Future<int> spawnProcess(std::string binPath, std::vector<std::string> paramList, double maxWaitTime, bool isSync);
|
||||
|
||||
// helper to run all the work related to running the exec command
|
||||
ACTOR Future<int> execHelper(ExecCmdValueString* execArg, std::string folder, std::string role);
|
||||
|
||||
// returns true if the execUID op is in progress
|
||||
bool isExecOpInProgress(UID execUID);
|
||||
// adds the execUID op to the list of ops in progress
|
||||
void setExecOpInProgress(UID execUID);
|
||||
// clears the execUID op from the list of ops in progress
|
||||
void clearExecOpInProgress(UID execUID);
|
||||
|
||||
|
||||
// registers a non-stopped TLog instance
|
||||
void registerTLog(UID uid);
|
||||
// unregisters a stopped TLog instance
|
||||
void unregisterTLog(UID uid);
|
||||
// checks if there is any non-stopped TLog instance
|
||||
bool isTLogInSameNode();
|
||||
|
||||
#endif
|
|
@ -78,6 +78,7 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs) {
|
|||
init( DISK_QUEUE_MAX_TRUNCATE_BYTES, 2<<30 ); if ( randomize && BUGGIFY ) DISK_QUEUE_MAX_TRUNCATE_BYTES = 0;
|
||||
init( TLOG_DEGRADED_DELAY_COUNT, 5 );
|
||||
init( TLOG_DEGRADED_DURATION, 5.0 );
|
||||
init( TLOG_IGNORE_POP_AUTO_ENABLE_DELAY, 300.0 );
|
||||
|
||||
// Data distribution queue
|
||||
init( HEALTH_POLL_TIME, 1.0 );
|
||||
|
|
|
@ -316,6 +316,7 @@ public:
|
|||
int64_t TLOG_SPILL_THRESHOLD;
|
||||
int64_t TLOG_HARD_LIMIT_BYTES;
|
||||
int64_t TLOG_RECOVER_MEMORY_LIMIT;
|
||||
double TLOG_IGNORE_POP_AUTO_ENABLE_DELAY;
|
||||
|
||||
double MAX_TRANSACTIONS_PER_BYTE;
|
||||
|
||||
|
|
|
@ -231,7 +231,8 @@ public:
|
|||
return resultEntries.size() == 0;
|
||||
}
|
||||
|
||||
void getPushLocations( std::vector<Tag> const& tags, std::vector<int>& locations, int locationOffset ) {
|
||||
void getPushLocations(std::vector<Tag> const& tags, std::vector<int>& locations, int locationOffset,
|
||||
bool allLocations = false) {
|
||||
if(locality == tagLocalitySatellite) {
|
||||
for(auto& t : tags) {
|
||||
if(t == txsTag || t.locality == tagLocalityLogRouter) {
|
||||
|
@ -248,9 +249,17 @@ public:
|
|||
alsoServers.clear();
|
||||
resultEntries.clear();
|
||||
|
||||
for(auto& t : tags) {
|
||||
if(locality == tagLocalitySpecial || t.locality == locality || t.locality < 0) {
|
||||
newLocations.push_back(bestLocationFor(t));
|
||||
if (allLocations) {
|
||||
// special handling for allLocations
|
||||
TraceEvent("AllLocationsSet");
|
||||
for (int i = 0; i < logServers.size(); i++) {
|
||||
newLocations.push_back(i);
|
||||
}
|
||||
} else {
|
||||
for (auto& t : tags) {
|
||||
if (locality == tagLocalitySpecial || t.locality == locality || t.locality < 0) {
|
||||
newLocations.push_back(bestLocationFor(t));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -690,7 +699,7 @@ struct ILogSystem {
|
|||
virtual Future<Void> onLogSystemConfigChange() = 0;
|
||||
// Returns when the log system configuration has changed due to a tlog rejoin.
|
||||
|
||||
virtual void getPushLocations( std::vector<Tag> const& tags, std::vector<int>& locations ) = 0;
|
||||
virtual void getPushLocations(std::vector<Tag> const& tags, std::vector<int>& locations, bool allLocations = false) = 0;
|
||||
|
||||
virtual bool hasRemoteLogs() = 0;
|
||||
|
||||
|
@ -733,7 +742,7 @@ struct CompareFirst {
|
|||
struct LogPushData : NonCopyable {
|
||||
// Log subsequences have to start at 1 (the MergedPeekCursor relies on this to make sure we never have !hasMessage() in the middle of data for a version
|
||||
|
||||
explicit LogPushData(Reference<ILogSystem> logSystem) : logSystem(logSystem), subsequence(1) {
|
||||
explicit LogPushData(Reference<ILogSystem> logSystem) : logSystem(logSystem), subsequence(1), hasExecOp(false) {
|
||||
for(auto& log : logSystem->getLogSystemConfig().tLogs) {
|
||||
if(log.isLocal) {
|
||||
for(int i = 0; i < log.tLogs.size(); i++) {
|
||||
|
@ -776,7 +785,7 @@ struct LogPushData : NonCopyable {
|
|||
}
|
||||
|
||||
template <class T>
|
||||
void addTypedMessage( T const& item ) {
|
||||
void addTypedMessage(T const& item, bool allLocations = false) {
|
||||
prev_tags.clear();
|
||||
if(logSystem->hasRemoteLogs()) {
|
||||
prev_tags.push_back( logSystem->getRandomRouterTag() );
|
||||
|
@ -785,8 +794,8 @@ struct LogPushData : NonCopyable {
|
|||
prev_tags.push_back(tag);
|
||||
}
|
||||
msg_locations.clear();
|
||||
logSystem->getPushLocations( prev_tags, msg_locations );
|
||||
|
||||
logSystem->getPushLocations(prev_tags, msg_locations, allLocations);
|
||||
|
||||
uint32_t subseq = this->subsequence++;
|
||||
for(int loc : msg_locations) {
|
||||
// FIXME: memcpy after the first time
|
||||
|
@ -805,6 +814,10 @@ struct LogPushData : NonCopyable {
|
|||
return messagesWriter[loc].toValue();
|
||||
}
|
||||
|
||||
void setHasExecOp() { hasExecOp = true; }
|
||||
|
||||
bool getHasExecOp() { return hasExecOp; }
|
||||
|
||||
private:
|
||||
Reference<ILogSystem> logSystem;
|
||||
std::vector<Tag> next_message_tags;
|
||||
|
@ -812,6 +825,7 @@ private:
|
|||
std::vector<BinaryWriter> messagesWriter;
|
||||
std::vector<int> msg_locations;
|
||||
uint32_t subsequence;
|
||||
bool hasExecOp;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
|
@ -34,6 +34,7 @@
|
|||
#include "fdbclient/Notified.h"
|
||||
#include "fdbclient/KeyRangeMap.h"
|
||||
#include "fdbserver/ConflictSet.h"
|
||||
#include "fdbclient/SystemData.h"
|
||||
#include "flow/Stats.h"
|
||||
#include "fdbserver/ApplyMetadataMutation.h"
|
||||
#include "fdbserver/RecoveryState.h"
|
||||
|
@ -41,6 +42,10 @@
|
|||
#include "fdbclient/Atomic.h"
|
||||
#include "flow/TDMetric.actor.h"
|
||||
#include "flow/actorcompiler.h" // This must be the last #include.
|
||||
#include "fdbclient/DatabaseConfiguration.h"
|
||||
#include "fdbclient/FDBTypes.h"
|
||||
#include "fdbclient/Knobs.h"
|
||||
#include "fdbserver/FDBExecHelper.actor.h"
|
||||
|
||||
struct ProxyStats {
|
||||
CounterCollection cc;
|
||||
|
@ -220,6 +225,7 @@ struct ProxyCommitData {
|
|||
RequestStream<GetReadVersionRequest> getConsistentReadVersion;
|
||||
RequestStream<CommitTransactionRequest> commit;
|
||||
Database cx;
|
||||
Reference<AsyncVar<ServerDBInfo>> db;
|
||||
EventMetricHandle<SingleKeyMutation> singleKeyMutationEvent;
|
||||
|
||||
std::map<UID, Reference<StorageInfo>> storageCache;
|
||||
|
@ -227,6 +233,7 @@ struct ProxyCommitData {
|
|||
Deque<std::pair<Version, Version>> txsPopVersions;
|
||||
Version lastTxsPop;
|
||||
bool popRemoteTxs;
|
||||
vector<Standalone<StringRef>> whitelistedBinPathVec;
|
||||
|
||||
Optional<LatencyBandConfig> latencyBandConfig;
|
||||
|
||||
|
@ -256,7 +263,7 @@ struct ProxyCommitData {
|
|||
lastVersionTime(0), commitVersionRequestNumber(1), mostRecentProcessedRequestNumber(0),
|
||||
getConsistentReadVersion(getConsistentReadVersion), commit(commit), lastCoalesceTime(0),
|
||||
localCommitBatchesStarted(0), locked(false), commitBatchInterval(SERVER_KNOBS->COMMIT_TRANSACTION_BATCH_INTERVAL_MIN),
|
||||
firstProxy(firstProxy), cx(openDBOnServer(db, TaskDefaultEndpoint, true, true)),
|
||||
firstProxy(firstProxy), cx(openDBOnServer(db, TaskDefaultEndpoint, true, true)), db(db),
|
||||
singleKeyMutationEvent(LiteralStringRef("SingleKeyMutation")), commitBatchesMemBytesCount(0), lastTxsPop(0)
|
||||
{}
|
||||
};
|
||||
|
@ -408,6 +415,34 @@ ACTOR Future<Void> commitBatcher(ProxyCommitData *commitData, PromiseStream<std:
|
|||
}
|
||||
}
|
||||
|
||||
void createWhitelistBinPathVec(const std::string& binPath, vector<Standalone<StringRef>>& binPathVec) {
|
||||
TraceEvent(SevDebug, "BinPathConverter").detail("Input", binPath);
|
||||
StringRef input(binPath);
|
||||
while (input != StringRef()) {
|
||||
StringRef token = input.eat(LiteralStringRef(","));
|
||||
if (token != StringRef()) {
|
||||
const uint8_t* ptr = token.begin();
|
||||
while (ptr != token.end() && *ptr == ' ') {
|
||||
ptr++;
|
||||
}
|
||||
if (ptr != token.end()) {
|
||||
Standalone<StringRef> newElement(token.substr(ptr - token.begin()));
|
||||
TraceEvent(SevDebug, "BinPathItem").detail("Element", newElement);
|
||||
binPathVec.push_back(newElement);
|
||||
}
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
bool isWhitelisted(const vector<Standalone<StringRef>>& binPathVec, StringRef binPath) {
|
||||
TraceEvent("BinPath").detail("Value", binPath);
|
||||
for (const auto& item : binPathVec) {
|
||||
TraceEvent("Element").detail("Value", item);
|
||||
}
|
||||
return std::find(binPathVec.begin(), binPathVec.end(), binPath) != binPathVec.end();
|
||||
}
|
||||
|
||||
ACTOR Future<Void> commitBatch(
|
||||
ProxyCommitData* self,
|
||||
vector<CommitTransactionRequest> trs,
|
||||
|
@ -727,10 +762,98 @@ ACTOR Future<Void> commitBatch(
|
|||
toCommit.addTags(allSources);
|
||||
}
|
||||
toCommit.addTypedMessage(m);
|
||||
}
|
||||
else
|
||||
UNREACHABLE();
|
||||
} else if (m.type == MutationRef::Exec) {
|
||||
state std::string param2 = m.param2.toString();
|
||||
state ExecCmdValueString execArg(param2);
|
||||
execArg.dbgPrint();
|
||||
state StringRef binPath = execArg.getBinaryPath();
|
||||
state StringRef uidStr = execArg.getBinaryArgValue(LiteralStringRef("uid"));
|
||||
|
||||
auto result =
|
||||
self->txnStateStore->readValue(LiteralStringRef("log_anti_quorum").withPrefix(configKeysPrefix)).get();
|
||||
state int logAntiQuorum = 0;
|
||||
if (result.present()) {
|
||||
logAntiQuorum = atoi(result.get().toString().c_str());
|
||||
}
|
||||
|
||||
if (m.param1 != execDisableTLogPop
|
||||
&& m.param1 != execEnableTLogPop
|
||||
&& !isWhitelisted(self->whitelistedBinPathVec, binPath)) {
|
||||
TraceEvent("ExecTransactionNotPermitted")
|
||||
.detail("TransactionNum", transactionNum);
|
||||
committed[transactionNum] = ConflictBatch::TransactionNotPermitted;
|
||||
} else if (self->db->get().recoveryState != RecoveryState::FULLY_RECOVERED) {
|
||||
// Cluster is not fully recovered and needs TLogs
|
||||
// from previous generation for full recovery.
|
||||
// Currently, snapshot of old tlog generation is not
|
||||
// supported and hence failing the snapshot request until
|
||||
// cluster is fully_recovered.
|
||||
TraceEvent("ExecTransactionNotFullyRecovered")
|
||||
.detail("TransactionNum", transactionNum);
|
||||
committed[transactionNum] = ConflictBatch::TransactionNotFullyRecovered;
|
||||
} else if (logAntiQuorum > 0) {
|
||||
// exec op is not supported when logAntiQuorum is configured
|
||||
// FIXME: Add support for exec ops in the presence of log anti quorum
|
||||
TraceEvent("ExecOpNotSupportedWithLogAntiQuorum")
|
||||
.detail("LogAntiQuorum", logAntiQuorum)
|
||||
.detail("TransactionNum", transactionNum);
|
||||
committed[transactionNum] = ConflictBatch::TransactionExecLogAntiQuorum;
|
||||
} else {
|
||||
// Send the ExecOp to
|
||||
// - all the storage nodes in a single region and
|
||||
// - only to storage nodes in local region in multi-region setup
|
||||
// step 1: get the DatabaseConfiguration
|
||||
auto result =
|
||||
self->txnStateStore->readValue(LiteralStringRef("usable_regions").withPrefix(configKeysPrefix)).get();
|
||||
ASSERT(result.present());
|
||||
state int usableRegions = atoi(result.get().toString().c_str());
|
||||
|
||||
// step 2: find the tag.id from locality info of the master
|
||||
auto localityKey =
|
||||
self->txnStateStore->readValue(tagLocalityListKeyFor(self->master.locality.dcId())).get();
|
||||
|
||||
int8_t locality = tagLocalityInvalid;
|
||||
if (usableRegions > 1) {
|
||||
if (!localityKey.present()) {
|
||||
TraceEvent(SevError, "LocalityKeyNotPresentForMasterDCID");
|
||||
ASSERT(localityKey.present());
|
||||
}
|
||||
locality = decodeTagLocalityListValue(localityKey.get());
|
||||
}
|
||||
|
||||
std::set<Tag> allSources;
|
||||
auto& m = (*pMutations)[mutationNum];
|
||||
if (debugMutation("ProxyCommit", commitVersion, m))
|
||||
TraceEvent("ProxyCommitTo", self->dbgid)
|
||||
.detail("To", "all sources")
|
||||
.detail("Mutation", m.toString())
|
||||
.detail("Version", commitVersion);
|
||||
|
||||
std::vector<Tag> localTags;
|
||||
auto tagKeys = self->txnStateStore->readRange(serverTagKeys).get();
|
||||
for( auto& kv : tagKeys ) {
|
||||
Tag t = decodeServerTagValue( kv.value );
|
||||
if ((usableRegions > 1 && t.locality == locality)
|
||||
|| (usableRegions == 1)) {
|
||||
localTags.push_back(t);
|
||||
}
|
||||
allSources.insert(localTags.begin(), localTags.end());
|
||||
}
|
||||
|
||||
auto te1 = TraceEvent("ProxyCommitTo", self->dbgid);
|
||||
te1.detail("To", "all sources");
|
||||
te1.detail("UidStr", uidStr);
|
||||
te1.detail("Mutation", m.toString());
|
||||
te1.detail("Version", commitVersion);
|
||||
te1.detail("NumTags", allSources.size());
|
||||
for (auto& tag : allSources) {
|
||||
toCommit.addTag(tag);
|
||||
}
|
||||
toCommit.addTypedMessage(m, true /* allLocations */);
|
||||
toCommit.setHasExecOp();
|
||||
}
|
||||
} else
|
||||
UNREACHABLE();
|
||||
|
||||
|
||||
// Check on backing up key, if backup ranges are defined and a normal key
|
||||
|
@ -948,7 +1071,15 @@ ACTOR Future<Void> commitBatch(
|
|||
else if (committed[t] == ConflictBatch::TransactionTooOld) {
|
||||
trs[t].reply.sendError(transaction_too_old());
|
||||
}
|
||||
else {
|
||||
else if (committed[t] == ConflictBatch::TransactionNotPermitted) {
|
||||
trs[t].reply.sendError(transaction_not_permitted());
|
||||
}
|
||||
else if (committed[t] == ConflictBatch::TransactionNotFullyRecovered) {
|
||||
trs[t].reply.sendError(cluster_not_fully_recovered());
|
||||
}
|
||||
else if (committed[t] == ConflictBatch::TransactionExecLogAntiQuorum) {
|
||||
trs[t].reply.sendError(txn_exec_log_anti_quorum());
|
||||
} else {
|
||||
trs[t].reply.sendError(not_committed());
|
||||
}
|
||||
|
||||
|
@ -1329,12 +1460,12 @@ ACTOR Future<Void> healthMetricsRequestServer(MasterProxyInterface proxy, GetHea
|
|||
}
|
||||
}
|
||||
|
||||
ACTOR Future<Void> monitorRemoteCommitted(ProxyCommitData* self, Reference<AsyncVar<ServerDBInfo>> db) {
|
||||
ACTOR Future<Void> monitorRemoteCommitted(ProxyCommitData* self) {
|
||||
loop {
|
||||
wait(delay(0)); //allow this actor to be cancelled if we are removed after db changes.
|
||||
state Optional<std::vector<OptionalInterface<TLogInterface>>> remoteLogs;
|
||||
if(db->get().recoveryState >= RecoveryState::ALL_LOGS_RECRUITED) {
|
||||
for(auto& logSet : db->get().logSystemConfig.tLogs) {
|
||||
if(self->db->get().recoveryState >= RecoveryState::ALL_LOGS_RECRUITED) {
|
||||
for(auto& logSet : self->db->get().logSystemConfig.tLogs) {
|
||||
if(!logSet.isLocal) {
|
||||
remoteLogs = logSet.tLogs;
|
||||
for(auto& tLog : logSet.tLogs) {
|
||||
|
@ -1349,12 +1480,12 @@ ACTOR Future<Void> monitorRemoteCommitted(ProxyCommitData* self, Reference<Async
|
|||
}
|
||||
|
||||
if(!remoteLogs.present()) {
|
||||
wait(db->onChange());
|
||||
wait(self->db->onChange());
|
||||
continue;
|
||||
}
|
||||
self->popRemoteTxs = true;
|
||||
|
||||
state Future<Void> onChange = db->onChange();
|
||||
state Future<Void> onChange = self->db->onChange();
|
||||
loop {
|
||||
state std::vector<Future<TLogQueuingMetricsReply>> replies;
|
||||
for(auto &it : remoteLogs.get()) {
|
||||
|
@ -1392,7 +1523,8 @@ ACTOR Future<Void> masterProxyServerCore(
|
|||
Reference<AsyncVar<ServerDBInfo>> db,
|
||||
LogEpoch epoch,
|
||||
Version recoveryTransactionVersion,
|
||||
bool firstProxy)
|
||||
bool firstProxy,
|
||||
std::string whitelistBinPaths)
|
||||
{
|
||||
state ProxyCommitData commitData(proxy.id(), master, proxy.getConsistentReadVersion, recoveryTransactionVersion, proxy.commit, db, firstProxy);
|
||||
|
||||
|
@ -1416,31 +1548,32 @@ ACTOR Future<Void> masterProxyServerCore(
|
|||
//TraceEvent("ProxyInit1", proxy.id());
|
||||
|
||||
// Wait until we can load the "real" logsystem, since we don't support switching them currently
|
||||
while (!(db->get().master.id() == master.id() && db->get().recoveryState >= RecoveryState::RECOVERY_TRANSACTION)) {
|
||||
while (!(commitData.db->get().master.id() == master.id() && commitData.db->get().recoveryState >= RecoveryState::RECOVERY_TRANSACTION)) {
|
||||
//TraceEvent("ProxyInit2", proxy.id()).detail("LSEpoch", db->get().logSystemConfig.epoch).detail("Need", epoch);
|
||||
wait(db->onChange());
|
||||
wait(commitData.db->onChange());
|
||||
}
|
||||
state Future<Void> dbInfoChange = db->onChange();
|
||||
state Future<Void> dbInfoChange = commitData.db->onChange();
|
||||
//TraceEvent("ProxyInit3", proxy.id());
|
||||
|
||||
commitData.resolvers = db->get().resolvers;
|
||||
commitData.resolvers = commitData.db->get().resolvers;
|
||||
ASSERT(commitData.resolvers.size() != 0);
|
||||
|
||||
auto rs = commitData.keyResolvers.modify(allKeys);
|
||||
for(auto r = rs.begin(); r != rs.end(); ++r)
|
||||
r->value().emplace_back(0,0);
|
||||
|
||||
commitData.logSystem = ILogSystem::fromServerDBInfo(proxy.id(), db->get(), false, addActor);
|
||||
commitData.logSystem = ILogSystem::fromServerDBInfo(proxy.id(), commitData.db->get(), false, addActor);
|
||||
commitData.logAdapter = new LogSystemDiskQueueAdapter(commitData.logSystem, txsTag, Reference<AsyncVar<PeekSpecialInfo>>(), false);
|
||||
commitData.txnStateStore = keyValueStoreLogSystem(commitData.logAdapter, proxy.id(), 2e9, true, true, true);
|
||||
createWhitelistBinPathVec(whitelistBinPaths, commitData.whitelistedBinPathVec);
|
||||
|
||||
// ((SERVER_MEM_LIMIT * COMMIT_BATCHES_MEM_FRACTION_OF_TOTAL) / COMMIT_BATCHES_MEM_TO_TOTAL_MEM_SCALE_FACTOR) is only a approximate formula for limiting the memory used.
|
||||
// COMMIT_BATCHES_MEM_TO_TOTAL_MEM_SCALE_FACTOR is an estimate based on experiments and not an accurate one.
|
||||
state int64_t commitBatchesMemoryLimit = std::min(SERVER_KNOBS->COMMIT_BATCHES_MEM_BYTES_HARD_LIMIT, static_cast<int64_t>((SERVER_KNOBS->SERVER_MEM_LIMIT * SERVER_KNOBS->COMMIT_BATCHES_MEM_FRACTION_OF_TOTAL) / SERVER_KNOBS->COMMIT_BATCHES_MEM_TO_TOTAL_MEM_SCALE_FACTOR));
|
||||
TraceEvent(SevInfo, "CommitBatchesMemoryLimit").detail("BytesLimit", commitBatchesMemoryLimit);
|
||||
|
||||
addActor.send(monitorRemoteCommitted(&commitData, db));
|
||||
addActor.send(transactionStarter(proxy, db, addActor, &commitData, &healthMetricsReply, &detailedHealthMetricsReply));
|
||||
addActor.send(monitorRemoteCommitted(&commitData));
|
||||
addActor.send(transactionStarter(proxy, commitData.db, addActor, &commitData, &healthMetricsReply, &detailedHealthMetricsReply));
|
||||
addActor.send(readRequestServer(proxy, &commitData));
|
||||
addActor.send(rejoinServer(proxy, &commitData));
|
||||
addActor.send(healthMetricsRequestServer(proxy, &healthMetricsReply, &detailedHealthMetricsReply));
|
||||
|
@ -1451,21 +1584,21 @@ ACTOR Future<Void> masterProxyServerCore(
|
|||
int commitBatchByteLimit =
|
||||
(int)std::min<double>(SERVER_KNOBS->COMMIT_TRANSACTION_BATCH_BYTES_MAX,
|
||||
std::max<double>(SERVER_KNOBS->COMMIT_TRANSACTION_BATCH_BYTES_MIN,
|
||||
SERVER_KNOBS->COMMIT_TRANSACTION_BATCH_BYTES_SCALE_BASE * pow(db->get().client.proxies.size(), SERVER_KNOBS->COMMIT_TRANSACTION_BATCH_BYTES_SCALE_POWER)));
|
||||
SERVER_KNOBS->COMMIT_TRANSACTION_BATCH_BYTES_SCALE_BASE * pow(commitData.db->get().client.proxies.size(), SERVER_KNOBS->COMMIT_TRANSACTION_BATCH_BYTES_SCALE_POWER)));
|
||||
|
||||
commitBatcherActor = commitBatcher(&commitData, batchedCommits, proxy.commit.getFuture(), commitBatchByteLimit, commitBatchesMemoryLimit);
|
||||
loop choose{
|
||||
when( wait( dbInfoChange ) ) {
|
||||
dbInfoChange = db->onChange();
|
||||
if(db->get().master.id() == master.id() && db->get().recoveryState >= RecoveryState::RECOVERY_TRANSACTION) {
|
||||
commitData.logSystem = ILogSystem::fromServerDBInfo(proxy.id(), db->get(), false, addActor);
|
||||
dbInfoChange = commitData.db->onChange();
|
||||
if(commitData.db->get().master.id() == master.id() && commitData.db->get().recoveryState >= RecoveryState::RECOVERY_TRANSACTION) {
|
||||
commitData.logSystem = ILogSystem::fromServerDBInfo(proxy.id(), commitData.db->get(), false, addActor);
|
||||
for(auto it : commitData.tag_popped) {
|
||||
commitData.logSystem->pop(it.second, it.first);
|
||||
}
|
||||
commitData.logSystem->pop(commitData.lastTxsPop, txsTag, 0, tagLocalityRemoteLog);
|
||||
}
|
||||
|
||||
Optional<LatencyBandConfig> newLatencyBandConfig = db->get().latencyBandConfig;
|
||||
Optional<LatencyBandConfig> newLatencyBandConfig = commitData.db->get().latencyBandConfig;
|
||||
|
||||
if(newLatencyBandConfig.present() != commitData.latencyBandConfig.present()
|
||||
|| (newLatencyBandConfig.present() && newLatencyBandConfig.get().grvConfig != commitData.latencyBandConfig.get().grvConfig))
|
||||
|
@ -1498,7 +1631,7 @@ ACTOR Future<Void> masterProxyServerCore(
|
|||
const vector<CommitTransactionRequest> &trs = batchedRequests.first;
|
||||
int batchBytes = batchedRequests.second;
|
||||
//TraceEvent("MasterProxyCTR", proxy.id()).detail("CommitTransactions", trs.size()).detail("TransactionRate", transactionRate).detail("TransactionQueue", transactionQueue.size()).detail("ReleasedTransactionCount", transactionCount);
|
||||
if (trs.size() || (db->get().recoveryState >= RecoveryState::ACCEPTING_COMMITS && now() - lastCommit >= SERVER_KNOBS->MAX_COMMIT_BATCH_INTERVAL)) {
|
||||
if (trs.size() || (commitData.db->get().recoveryState >= RecoveryState::ACCEPTING_COMMITS && now() - lastCommit >= SERVER_KNOBS->MAX_COMMIT_BATCH_INTERVAL)) {
|
||||
lastCommit = now();
|
||||
|
||||
if (trs.size() || lastCommitComplete.isReady()) {
|
||||
|
@ -1517,6 +1650,63 @@ ACTOR Future<Void> masterProxyServerCore(
|
|||
rep.version = commitData.committedVersion.get();
|
||||
req.reply.send(rep);
|
||||
}
|
||||
when(ExecRequest _execReq = waitNext(proxy.execReq.getFuture())) {
|
||||
state ExecRequest execReq = _execReq;
|
||||
if (execReq.debugID.present())
|
||||
g_traceBatch.addEvent("TransactionDebug", execReq.debugID.get().first(),
|
||||
"MasterProxyServer.masterProxyServerCore."
|
||||
"ExecRequest");
|
||||
|
||||
TraceEvent("ExecRequest").detail("Payload", execReq.execPayload.toString());
|
||||
|
||||
// get the list of coordinators
|
||||
state Optional<Value> coordinators = commitData.txnStateStore->readValue(coordinatorsKey).get();
|
||||
state std::vector<NetworkAddress> coordinatorsAddr =
|
||||
ClusterConnectionString(coordinators.get().toString()).coordinators();
|
||||
state std::set<NetworkAddress> coordinatorsAddrSet;
|
||||
for (int i = 0; i < coordinatorsAddr.size(); i++) {
|
||||
TraceEvent(SevDebug, "CoordinatorAddress").detail("Addr", coordinatorsAddr[i]);
|
||||
coordinatorsAddrSet.insert(coordinatorsAddr[i]);
|
||||
}
|
||||
|
||||
// get the list of workers
|
||||
state std::vector<WorkerDetails> workers =
|
||||
wait(commitData.db->get().clusterInterface.getWorkers.getReply(GetWorkersRequest()));
|
||||
|
||||
// send the exec command to the list of workers which are
|
||||
// coordinators
|
||||
state vector<Future<Void>> execCoords;
|
||||
for (int i = 0; i < workers.size(); i++) {
|
||||
NetworkAddress primary = workers[i].interf.address();
|
||||
Optional<NetworkAddress> secondary = workers[i].interf.tLog.getEndpoint().addresses.secondaryAddress;
|
||||
if (coordinatorsAddrSet.find(primary) != coordinatorsAddrSet.end()
|
||||
|| (secondary.present() && (coordinatorsAddrSet.find(secondary.get()) != coordinatorsAddrSet.end()))) {
|
||||
TraceEvent("ExecReqToCoordinator")
|
||||
.detail("PrimaryWorkerAddr", primary)
|
||||
.detail("SecondaryWorkerAddr", secondary);
|
||||
execCoords.push_back(brokenPromiseToNever(workers[i].interf.execReq.getReply(ExecuteRequest(execReq.execPayload))));
|
||||
}
|
||||
}
|
||||
if (execCoords.size() <= 0) {
|
||||
TraceEvent(SevDebug, "CoordinatorWorkersNotFound");
|
||||
execReq.reply.sendError(operation_failed());
|
||||
} else {
|
||||
try {
|
||||
wait(timeoutError(waitForAll(execCoords), 10.0));
|
||||
int numSucc = 0;
|
||||
for (auto item : execCoords) {
|
||||
if (item.isValid() && item.isReady()) {
|
||||
++numSucc;
|
||||
}
|
||||
}
|
||||
bool succ = (numSucc >= ((execCoords.size() + 1) / 2));
|
||||
succ ? execReq.reply.send(Void()) : execReq.reply.sendError(operation_failed());
|
||||
} catch (Error& e) {
|
||||
TraceEvent("WaitingForAllExecCoords").error(e);
|
||||
execReq.reply.sendError(broken_promise());
|
||||
}
|
||||
}
|
||||
}
|
||||
when(TxnStateRequest req = waitNext(proxy.txnState.getFuture())) {
|
||||
state ReplyPromise<Void> reply = req.reply;
|
||||
if(req.last) maxSequence = req.sequence + 1;
|
||||
|
@ -1601,10 +1791,11 @@ ACTOR Future<Void> checkRemoved(Reference<AsyncVar<ServerDBInfo>> db, uint64_t r
|
|||
ACTOR Future<Void> masterProxyServer(
|
||||
MasterProxyInterface proxy,
|
||||
InitializeMasterProxyRequest req,
|
||||
Reference<AsyncVar<ServerDBInfo>> db)
|
||||
Reference<AsyncVar<ServerDBInfo>> db,
|
||||
std::string whitelistBinPaths)
|
||||
{
|
||||
try {
|
||||
state Future<Void> core = masterProxyServerCore(proxy, req.master, db, req.recoveryCount, req.recoveryTransactionVersion, req.firstProxy);
|
||||
state Future<Void> core = masterProxyServerCore(proxy, req.master, db, req.recoveryCount, req.recoveryTransactionVersion, req.firstProxy, whitelistBinPaths);
|
||||
loop choose{
|
||||
when(wait(core)) { return Void(); }
|
||||
when(wait(checkRemoved(db, req.recoveryCount, proxy))) {}
|
||||
|
|
|
@ -24,6 +24,7 @@
|
|||
#include "fdbclient/NativeAPI.actor.h"
|
||||
#include "fdbclient/Notified.h"
|
||||
#include "fdbclient/KeyRangeMap.h"
|
||||
#include "fdbclient/RunTransaction.actor.h"
|
||||
#include "fdbclient/SystemData.h"
|
||||
#include "fdbserver/WorkerInterface.actor.h"
|
||||
#include "fdbserver/TLogInterface.h"
|
||||
|
@ -38,6 +39,7 @@
|
|||
#include "fdbserver/LogSystem.h"
|
||||
#include "fdbserver/WaitFailure.h"
|
||||
#include "fdbserver/RecoveryState.h"
|
||||
#include "fdbserver/FDBExecHelper.actor.h"
|
||||
#include "flow/actorcompiler.h" // This must be the last #include.
|
||||
|
||||
using std::pair;
|
||||
|
@ -251,6 +253,7 @@ struct TLogData : NonCopyable {
|
|||
AsyncVar<bool> largeDiskQueueCommitBytes; //becomes true when diskQueueCommitBytes is greater than MAX_QUEUE_COMMIT_BYTES
|
||||
|
||||
Reference<AsyncVar<ServerDBInfo>> dbInfo;
|
||||
Database cx;
|
||||
|
||||
NotifiedVersion queueCommitEnd;
|
||||
Version queueCommitBegin;
|
||||
|
@ -274,15 +277,27 @@ struct TLogData : NonCopyable {
|
|||
FlowLock concurrentLogRouterReads;
|
||||
FlowLock persistentDataCommitLock;
|
||||
|
||||
bool ignorePopRequest; // ignore pop request from storage servers
|
||||
double ignorePopDeadline; // time until which the ignorePopRequest will be
|
||||
// honored
|
||||
std::string ignorePopUid; // callers that set ignorePopRequest will set this
|
||||
// extra state, used to validate the ownership of
|
||||
// the set and for callers that unset will
|
||||
// be able to match it up
|
||||
std::string dataFolder; // folder where data is stored
|
||||
std::map<Tag, Version> toBePopped; // map of Tag->Version for all the pops
|
||||
// that came when ignorePopRequest was set
|
||||
Reference<AsyncVar<bool>> degraded;
|
||||
|
||||
TLogData(UID dbgid, IKeyValueStore* persistentData, IDiskQueue * persistentQueue, Reference<AsyncVar<ServerDBInfo>> dbInfo, Reference<AsyncVar<bool>> degraded)
|
||||
TLogData(UID dbgid, IKeyValueStore* persistentData, IDiskQueue * persistentQueue, Reference<AsyncVar<ServerDBInfo>> dbInfo, Reference<AsyncVar<bool>> degraded, std::string folder)
|
||||
: dbgid(dbgid), instanceID(deterministicRandom()->randomUniqueID().first()),
|
||||
persistentData(persistentData), rawPersistentQueue(persistentQueue), persistentQueue(new TLogQueue(persistentQueue, dbgid)),
|
||||
dbInfo(dbInfo), degraded(degraded), queueCommitBegin(0), queueCommitEnd(0),
|
||||
diskQueueCommitBytes(0), largeDiskQueueCommitBytes(false), bytesInput(0), bytesDurable(0), overheadBytesInput(0), overheadBytesDurable(0),
|
||||
concurrentLogRouterReads(SERVER_KNOBS->CONCURRENT_LOG_ROUTER_READS)
|
||||
concurrentLogRouterReads(SERVER_KNOBS->CONCURRENT_LOG_ROUTER_READS),
|
||||
ignorePopRequest(false), ignorePopDeadline(), ignorePopUid(), dataFolder(folder), toBePopped()
|
||||
{
|
||||
cx = openDBOnServer(dbInfo, TaskDefaultEndpoint, true, true);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -416,13 +431,15 @@ struct LogData : NonCopyable, public ReferenceCounted<LogData> {
|
|||
UID recruitmentID;
|
||||
std::set<Tag> allTags;
|
||||
Future<Void> terminated;
|
||||
FlowLock execOpLock;
|
||||
bool execOpCommitInProgress;
|
||||
|
||||
explicit LogData(TLogData* tLogData, TLogInterface interf, Tag remoteTag, bool isPrimary, int logRouterTags, UID recruitmentID, std::vector<Tag> tags) : tLogData(tLogData), knownCommittedVersion(0), logId(interf.id()),
|
||||
cc("TLog", interf.id().toString()), bytesInput("BytesInput", cc), bytesDurable("BytesDurable", cc), remoteTag(remoteTag), isPrimary(isPrimary), logRouterTags(logRouterTags), recruitmentID(recruitmentID),
|
||||
logSystem(new AsyncVar<Reference<ILogSystem>>()), logRouterPoppedVersion(0), durableKnownCommittedVersion(0), minKnownCommittedVersion(0), allTags(tags.begin(), tags.end()), terminated(tLogData->terminated.getFuture()),
|
||||
// These are initialized differently on init() or recovery
|
||||
recoveryCount(), stopped(false), initialized(false), queueCommittingVersion(0), newPersistentDataVersion(invalidVersion), unrecoveredBefore(1), recoveredAt(1), unpoppedRecoveredTags(0),
|
||||
logRouterPopToVersion(0), locality(tagLocalityInvalid)
|
||||
logRouterPopToVersion(0), locality(tagLocalityInvalid), execOpCommitInProgress(false)
|
||||
{
|
||||
startRole(Role::TRANSACTION_LOG, interf.id(), UID());
|
||||
|
||||
|
@ -519,6 +536,7 @@ ACTOR Future<Void> tLogLock( TLogData* self, ReplyPromise< TLogLockResult > repl
|
|||
TEST( !logData->stopped );
|
||||
|
||||
TraceEvent("TLogStop", logData->logId).detail("Ver", stopVersion).detail("IsStopped", logData->stopped).detail("QueueCommitted", logData->queueCommittedVersion.get());
|
||||
unregisterTLog(logData->logId);
|
||||
|
||||
logData->stopped = true;
|
||||
if(!logData->recoveryComplete.isSet()) {
|
||||
|
@ -886,14 +904,28 @@ std::deque<std::pair<Version, LengthPrefixedStringRef>> & getVersionMessages( Re
|
|||
return tagData->versionMessages;
|
||||
};
|
||||
|
||||
ACTOR Future<Void> tLogPop( TLogData* self, TLogPopRequest req, Reference<LogData> logData ) {
|
||||
state Version upTo = req.to;
|
||||
int8_t tagLocality = req.tag.locality;
|
||||
ACTOR Future<Void> tLogPopCore( TLogData* self, Tag inputTag, Version to, Reference<LogData> logData ) {
|
||||
if (self->ignorePopRequest && inputTag != txsTag) {
|
||||
TraceEvent("IgnoringPopRequest").detail("IgnorePopDeadline", self->ignorePopDeadline);
|
||||
|
||||
if (self->toBePopped.find(inputTag) == self->toBePopped.end()
|
||||
|| to > self->toBePopped[inputTag]) {
|
||||
self->toBePopped[inputTag] = to;
|
||||
}
|
||||
// add the pop to the toBePopped map
|
||||
TraceEvent(SevDebug, "IgnoringPopRequest")
|
||||
.detail("IgnorePopDeadline", self->ignorePopDeadline)
|
||||
.detail("Tag", inputTag.toString())
|
||||
.detail("Version", to);
|
||||
return Void();
|
||||
}
|
||||
state Version upTo = to;
|
||||
int8_t tagLocality = inputTag.locality;
|
||||
if (logData->logSystem->get().isValid() && logData->logSystem->get()->isPseudoLocality(tagLocality)) {
|
||||
upTo = logData->logSystem->get()->popPseudoLocalityTag(tagLocality, req.to);
|
||||
upTo = logData->logSystem->get()->popPseudoLocalityTag(tagLocality, to);
|
||||
tagLocality = tagLocalityLogRouter;
|
||||
}
|
||||
state Tag tag(tagLocality, req.tag.id);
|
||||
state Tag tag(tagLocality, inputTag.id);
|
||||
auto tagData = logData->getTagData(tag);
|
||||
if (!tagData) {
|
||||
tagData = logData->createTagData(tag, upTo, true, true, false);
|
||||
|
@ -914,7 +946,34 @@ ACTOR Future<Void> tLogPop( TLogData* self, TLogPopRequest req, Reference<LogDat
|
|||
wait(tagData->eraseMessagesBefore(upTo, self, logData, TaskTLogPop));
|
||||
//TraceEvent("TLogPop", self->dbgid).detail("Tag", tag.toString()).detail("To", upTo);
|
||||
}
|
||||
return Void();
|
||||
}
|
||||
|
||||
ACTOR Future<Void> tLogPop( TLogData* self, TLogPopRequest req, Reference<LogData> logData ) {
|
||||
// timeout check for ignorePopRequest
|
||||
if (self->ignorePopRequest && (g_network->now() > self->ignorePopDeadline)) {
|
||||
|
||||
TraceEvent("EnableTLogPlayAllIgnoredPops");
|
||||
// use toBePopped and issue all the pops
|
||||
state std::map<Tag, Version>::iterator it;
|
||||
state vector<Future<Void>> ignoredPops;
|
||||
self->ignorePopRequest = false;
|
||||
self->ignorePopUid = "";
|
||||
self->ignorePopDeadline = 0.0;
|
||||
for (it = self->toBePopped.begin(); it != self->toBePopped.end(); it++) {
|
||||
TraceEvent("PlayIgnoredPop")
|
||||
.detail("Tag", it->first.toString())
|
||||
.detail("Version", it->second);
|
||||
ignoredPops.push_back(tLogPopCore(self, it->first, it->second, logData));
|
||||
}
|
||||
self->toBePopped.clear();
|
||||
wait(waitForAll(ignoredPops));
|
||||
TraceEvent("ResetIgnorePopRequest")
|
||||
.detail("Now", g_network->now())
|
||||
.detail("IgnorePopRequest", self->ignorePopRequest)
|
||||
.detail("IgnorePopDeadline", self->ignorePopDeadline);
|
||||
}
|
||||
wait(tLogPopCore(self, req.tag, req.to, logData));
|
||||
req.reply.send(Void());
|
||||
return Void();
|
||||
}
|
||||
|
@ -1218,6 +1277,207 @@ ACTOR Future<Void> commitQueue( TLogData* self ) {
|
|||
}
|
||||
}
|
||||
|
||||
void execProcessingHelper(TLogData* self,
|
||||
Reference<LogData> logData,
|
||||
TLogCommitRequest* req,
|
||||
Standalone<VectorRef<Tag>>* execTags,
|
||||
ExecCmdValueString* execArg,
|
||||
StringRef* execCmd,
|
||||
Version* execVersion,
|
||||
vector<Future<Void>>* snapFailKeySetters,
|
||||
vector<Future<Void>>* ignoredPops)
|
||||
{
|
||||
// inspect the messages to find if there is an Exec type and print
|
||||
// it. message are prefixed by the length of the message and each
|
||||
// field is prefixed by the length too
|
||||
uint8_t type = MutationRef::MAX_ATOMIC_OP;
|
||||
StringRef param2;
|
||||
ArenaReader rd(req->arena, req->messages, Unversioned());
|
||||
int32_t messageLength, rawLength;
|
||||
uint16_t tagCount;
|
||||
uint32_t sub;
|
||||
while (!rd.empty()) {
|
||||
Tag tmpTag;
|
||||
bool hasTxsTag = false;
|
||||
rd.checkpoint();
|
||||
rd >> messageLength >> sub >> tagCount;
|
||||
for (int i = 0; i < tagCount; i++) {
|
||||
rd >> tmpTag;
|
||||
if (tmpTag == txsTag) {
|
||||
hasTxsTag = true;
|
||||
}
|
||||
execTags->push_back(execTags->arena(), tmpTag);
|
||||
}
|
||||
if (!hasTxsTag) {
|
||||
rd >> type;
|
||||
if (type == MutationRef::Exec) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
rawLength = messageLength + sizeof(messageLength);
|
||||
rd.rewind();
|
||||
rd.readBytes(rawLength);
|
||||
}
|
||||
|
||||
int32_t len = 0;
|
||||
if (type == MutationRef::Exec) {
|
||||
// get param1
|
||||
rd >> len;
|
||||
*execCmd = StringRef((uint8_t const*)rd.readBytes(len), len);
|
||||
// get param2
|
||||
rd >> len;
|
||||
param2 = StringRef((uint8_t const*)rd.readBytes(len), len);
|
||||
|
||||
TraceEvent(SevDebug, "TLogExecCommandType", self->dbgid)
|
||||
.detail("Value", execCmd->toString())
|
||||
.detail("Version", req->version);
|
||||
|
||||
execArg->setCmdValueString(param2);
|
||||
execArg->dbgPrint();
|
||||
StringRef uidStr = execArg->getBinaryArgValue(LiteralStringRef("uid"));
|
||||
if (!execCmd->startsWith(LiteralStringRef("\xff"))) {
|
||||
*execVersion = req->version;
|
||||
}
|
||||
if (*execCmd == execSnap) {
|
||||
// validation check specific to snap request
|
||||
std::string reason;
|
||||
if (!self->ignorePopRequest) {
|
||||
*execVersion = invalidVersion;
|
||||
reason = "SnapFailIgnorePopNotSet";
|
||||
} else if (uidStr.toString() != self->ignorePopUid) {
|
||||
*execVersion = invalidVersion;
|
||||
reason = "SnapFailedDisableTLogUidMismatch";
|
||||
}
|
||||
|
||||
if (*execVersion == invalidVersion) {
|
||||
TraceEvent(SevWarn, "TLogSnapFailed")
|
||||
.detail("IgnorePopUid", self->ignorePopUid)
|
||||
.detail("IgnorePopRequest", self->ignorePopRequest)
|
||||
.detail("Reason", reason)
|
||||
.detail("Version", req->version);
|
||||
|
||||
TraceEvent("ExecCmdSnapCreate")
|
||||
.detail("Uid", uidStr.toString())
|
||||
.detail("Status", -1)
|
||||
.detail("Tag", logData->allTags.begin()->toString())
|
||||
.detail("Role", "TLog")
|
||||
.detail("Version", req->version);
|
||||
if (g_network->isSimulated()) {
|
||||
// write SnapFailedTLog.$UID
|
||||
Standalone<StringRef> keyStr = snapTestFailStatus.withSuffix(uidStr);
|
||||
Standalone<StringRef> valStr = LiteralStringRef("Success");
|
||||
TraceEvent(SevDebug, "TLogKeyStr").detail("Value", keyStr);
|
||||
snapFailKeySetters->push_back(runRYWTransaction(self->cx, [=](Reference<ReadYourWritesTransaction> tr) -> Future<Void>
|
||||
{ tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); tr->set(keyStr, valStr); return Void(); }));
|
||||
}
|
||||
}
|
||||
}
|
||||
if (*execCmd == execDisableTLogPop) {
|
||||
self->ignorePopRequest = true;
|
||||
if (self->ignorePopUid != "") {
|
||||
TraceEvent(SevWarn, "TLogPopDisableonDisable")
|
||||
.detail("IgnorePopUid", self->ignorePopUid)
|
||||
.detail("UidStr", uidStr.toString())
|
||||
.detail("Version", req->version);
|
||||
}
|
||||
self->ignorePopUid = uidStr.toString();
|
||||
self->ignorePopDeadline = g_network->now() + SERVER_KNOBS->TLOG_IGNORE_POP_AUTO_ENABLE_DELAY;
|
||||
TraceEvent("TLogExecCmdPopDisable")
|
||||
.detail("ExecCmd", execCmd->toString())
|
||||
.detail("UidStr", uidStr.toString())
|
||||
.detail("IgnorePopUid", self->ignorePopUid)
|
||||
.detail("IgnporePopRequest", self->ignorePopRequest)
|
||||
.detail("IgnporePopDeadline", self->ignorePopDeadline)
|
||||
.detail("Version", req->version);
|
||||
}
|
||||
if (*execCmd == execEnableTLogPop) {
|
||||
if (self->ignorePopUid != uidStr.toString()) {
|
||||
TraceEvent(SevWarn, "TLogPopDisableEnableUidMismatch")
|
||||
.detail("IgnorePopUid", self->ignorePopUid)
|
||||
.detail("UidStr", uidStr.toString())
|
||||
.detail("Version", req->version);
|
||||
}
|
||||
|
||||
TraceEvent("EnableTLogPlayAllIgnoredPops2");
|
||||
// use toBePopped and issue all the pops
|
||||
std::map<Tag, Version>::iterator it;
|
||||
self->ignorePopRequest = false;
|
||||
self->ignorePopDeadline = 0.0;
|
||||
self->ignorePopUid = "";
|
||||
for (it = self->toBePopped.begin(); it != self->toBePopped.end(); it++) {
|
||||
TraceEvent("PlayIgnoredPop")
|
||||
.detail("Tag", it->first.toString())
|
||||
.detail("Version", it->second);
|
||||
ignoredPops->push_back(tLogPopCore(self, it->first, it->second, logData));
|
||||
}
|
||||
self->toBePopped.clear();
|
||||
TraceEvent("TLogExecCmdPopEnable")
|
||||
.detail("ExecCmd", execCmd->toString())
|
||||
.detail("UidStr", uidStr.toString())
|
||||
.detail("IgnorePopUid", self->ignorePopUid)
|
||||
.detail("IgnporePopRequest", self->ignorePopRequest)
|
||||
.detail("IgnporePopDeadline", self->ignorePopDeadline)
|
||||
.detail("Version", req->version);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
ACTOR Future<Void> tLogSnapHelper(TLogData* self,
|
||||
Reference<LogData> logData,
|
||||
ExecCmdValueString* execArg,
|
||||
Version version,
|
||||
Version execVersion,
|
||||
StringRef execCmd,
|
||||
Standalone<VectorRef<Tag>> execTags)
|
||||
{
|
||||
state int err = 0;
|
||||
state StringRef uidStr = execArg->getBinaryArgValue(LiteralStringRef("uid"));
|
||||
state UID execUID = UID::fromString(uidStr.toString());
|
||||
state bool otherRoleExeced = false;
|
||||
// TLog is special, we need to snap at the execVersion.
|
||||
// storage on the same node should not initiate a snap before TLog which will make
|
||||
// the snap version at TLog unpredictable
|
||||
ASSERT(!isExecOpInProgress(execUID));
|
||||
if (!otherRoleExeced) {
|
||||
setExecOpInProgress(execUID);
|
||||
int tmpErr = wait(execHelper(execArg, self->dataFolder, "role=tlog"));
|
||||
err = tmpErr;
|
||||
clearExecOpInProgress(execUID);
|
||||
}
|
||||
TraceEvent("TLogCommitExecTraceTLog")
|
||||
.detail("UidStr", uidStr.toString())
|
||||
.detail("Status", err)
|
||||
.detail("Tag", logData->allTags.begin()->toString())
|
||||
.detail("OldTagSize", logData->allTags.size())
|
||||
.detail("Role", "TLog");
|
||||
|
||||
// print the detailed status message
|
||||
for (int i = 0; i < execTags.size(); i++) {
|
||||
Version poppedTagVersion = -1;
|
||||
auto tagv = logData->getTagData(execTags[i]);
|
||||
if (!tagv) {
|
||||
continue;
|
||||
}
|
||||
poppedTagVersion = tagv->popped;
|
||||
|
||||
TraceEvent te = TraceEvent(SevDebug, "TLogExecTraceDetailed");
|
||||
te.detail("Uid", uidStr.toString());
|
||||
te.detail("Status", err);
|
||||
te.detail("Role", "TLog");
|
||||
te.detail("ExecCmd", execCmd.toString());
|
||||
te.detail("Param2", execArg->getCmdValueString().toString());
|
||||
te.detail("Tag", tagv->tag.toString());
|
||||
te.detail("Version", version);
|
||||
te.detail("PoppedTagVersion", poppedTagVersion);
|
||||
te.detail("PersistentDataVersion", logData->persistentDataVersion);
|
||||
te.detail("PersistentDatadurableVersion", logData->persistentDataDurableVersion);
|
||||
te.detail("QueueCommittedVersion", logData->queueCommittedVersion.get());
|
||||
te.detail("IgnorePopUid", self->ignorePopUid);
|
||||
}
|
||||
return Void();
|
||||
}
|
||||
|
||||
ACTOR Future<Void> tLogCommit(
|
||||
TLogData* self,
|
||||
TLogCommitRequest req,
|
||||
|
@ -1252,22 +1512,59 @@ ACTOR Future<Void> tLogCommit(
|
|||
wait( delayJittered(.005, TaskTLogCommit) );
|
||||
}
|
||||
|
||||
// while exec op is being committed, no new transactions will be admitted.
|
||||
// This property is useful for snapshot kind of operations which wants to
|
||||
// take a snap of the disk image at a particular version (no data from
|
||||
// future version to be included)
|
||||
// NOTE: execOpCommitInProgress will not be set for exec commands which
|
||||
// start with \xff
|
||||
state bool execOpLockTaken = false;
|
||||
if (logData->execOpCommitInProgress) {
|
||||
wait(logData->execOpLock.take());
|
||||
execOpLockTaken = true;
|
||||
}
|
||||
|
||||
if(logData->stopped) {
|
||||
req.reply.sendError( tlog_stopped() );
|
||||
return Void();
|
||||
}
|
||||
|
||||
if (logData->version.get() == req.prevVersion) { // Not a duplicate (check relies on no waiting between here and self->version.set() below!)
|
||||
state Version execVersion = invalidVersion;
|
||||
state ExecCmdValueString execArg();
|
||||
state TLogQueueEntryRef qe;
|
||||
state StringRef execCmd;
|
||||
state Standalone<VectorRef<Tag>> execTags;
|
||||
state vector<Future<Void>> snapFailKeySetters;
|
||||
state vector<Future<Void>> playIgnoredPops;
|
||||
|
||||
if (logData->version.get() == req.prevVersion) { // Not a duplicate (check relies on critical section between here self->version.set() below!)
|
||||
if(req.debugID.present())
|
||||
g_traceBatch.addEvent("CommitDebug", tlogDebugID.get().first(), "TLog.tLogCommit.Before");
|
||||
|
||||
if (req.hasExecOp) {
|
||||
execProcessingHelper(self, logData, &req, &execTags, &execArg, &execCmd, &execVersion, &snapFailKeySetters, &playIgnoredPops);
|
||||
if (execVersion != invalidVersion) {
|
||||
TraceEvent(SevDebug, "SettingExecOpCommit")
|
||||
.detail("LogId", logData->logId)
|
||||
.detail("ExecVersion", execVersion)
|
||||
.detail("Version", req.version);
|
||||
logData->execOpCommitInProgress = true;
|
||||
if (!execOpLockTaken) {
|
||||
wait(logData->execOpLock.take());
|
||||
execOpLockTaken = true;
|
||||
} else {
|
||||
ASSERT(logData->execOpLock.available() == 0);
|
||||
}
|
||||
ASSERT(execOpLockTaken);
|
||||
}
|
||||
}
|
||||
|
||||
//TraceEvent("TLogCommit", logData->logId).detail("Version", req.version);
|
||||
commitMessages(self, logData, req.version, req.arena, req.messages);
|
||||
|
||||
logData->knownCommittedVersion = std::max(logData->knownCommittedVersion, req.knownCommittedVersion);
|
||||
|
||||
// Log the changes to the persistent queue, to be committed by commitQueue()
|
||||
TLogQueueEntryRef qe;
|
||||
qe.version = req.version;
|
||||
qe.knownCommittedVersion = logData->knownCommittedVersion;
|
||||
qe.messages = req.messages;
|
||||
|
@ -1281,6 +1578,7 @@ ACTOR Future<Void> tLogCommit(
|
|||
|
||||
// Notifies the commitQueue actor to commit persistentQueue, and also unblocks tLogPeekMessages actors
|
||||
logData->version.set( req.version );
|
||||
wait(waitForAll(playIgnoredPops));
|
||||
|
||||
if(req.debugID.present())
|
||||
g_traceBatch.addEvent("CommitDebug", tlogDebugID.get().first(), "TLog.tLogCommit.AfterTLogCommit");
|
||||
|
@ -1289,6 +1587,19 @@ ACTOR Future<Void> tLogCommit(
|
|||
state Future<Void> stopped = logData->stopCommit.onTrigger();
|
||||
wait( timeoutWarning( logData->queueCommittedVersion.whenAtLeast( req.version ) || stopped, 0.1, warningCollectorInput ) );
|
||||
|
||||
if ((execVersion != invalidVersion) && execVersion <= logData->queueCommittedVersion.get()) {
|
||||
wait(tLogSnapHelper(self, logData, &execArg, qe.version, execVersion, execCmd, execTags));
|
||||
}
|
||||
if (execVersion != invalidVersion && logData->execOpCommitInProgress) {
|
||||
ASSERT(execOpLockTaken);
|
||||
logData->execOpCommitInProgress = false;
|
||||
}
|
||||
if (execOpLockTaken) {
|
||||
logData->execOpLock.release();
|
||||
execOpLockTaken = false;
|
||||
}
|
||||
execVersion = invalidVersion;
|
||||
|
||||
if(stopped.isReady()) {
|
||||
ASSERT(logData->stopped);
|
||||
req.reply.sendError( tlog_stopped() );
|
||||
|
@ -1299,6 +1610,13 @@ ACTOR Future<Void> tLogCommit(
|
|||
g_traceBatch.addEvent("CommitDebug", tlogDebugID.get().first(), "TLog.tLogCommit.After");
|
||||
|
||||
req.reply.send( logData->durableKnownCommittedVersion );
|
||||
if (g_network->isSimulated()) {
|
||||
if (snapFailKeySetters.size() > 0) {
|
||||
TraceEvent(SevDebug, "SettingSnapFailKey");
|
||||
wait(waitForAll(snapFailKeySetters));
|
||||
TraceEvent(SevDebug, "SettingSnapFailKeyDone");
|
||||
}
|
||||
}
|
||||
return Void();
|
||||
}
|
||||
|
||||
|
@ -1473,7 +1791,7 @@ ACTOR Future<Void> serveTLogInterface( TLogData* self, TLogInterface tli, Refere
|
|||
logData->addActor.send( tLogPeekMessages( self, req, logData ) );
|
||||
}
|
||||
when( TLogPopRequest req = waitNext( tli.popMessages.getFuture() ) ) {
|
||||
logData->addActor.send( tLogPop( self, req, logData ) );
|
||||
logData->addActor.send(tLogPop(self, req, logData));
|
||||
}
|
||||
when( TLogCommitRequest req = waitNext( tli.commit.getFuture() ) ) {
|
||||
//TraceEvent("TLogCommitReq", logData->logId).detail("Ver", req.version).detail("PrevVer", req.prevVersion).detail("LogVer", logData->version.get());
|
||||
|
@ -1507,6 +1825,7 @@ ACTOR Future<Void> serveTLogInterface( TLogData* self, TLogInterface tli, Refere
|
|||
void removeLog( TLogData* self, Reference<LogData> logData ) {
|
||||
TraceEvent("TLogRemoved", logData->logId).detail("Input", logData->bytesInput.getValue()).detail("Durable", logData->bytesDurable.getValue());
|
||||
logData->stopped = true;
|
||||
unregisterTLog(logData->logId);
|
||||
if(!logData->recoveryComplete.isSet()) {
|
||||
logData->recoveryComplete.sendError(end_of_stream());
|
||||
}
|
||||
|
@ -1993,6 +2312,7 @@ ACTOR Future<Void> tLogStart( TLogData* self, InitializeTLogRequest req, Localit
|
|||
self->queueOrder.push_back(recruited.id());
|
||||
|
||||
TraceEvent("TLogStart", logData->logId);
|
||||
registerTLog(logData->logId);
|
||||
state Future<Void> updater;
|
||||
state bool pulledRecoveryVersions = false;
|
||||
try {
|
||||
|
@ -2098,8 +2418,8 @@ ACTOR Future<Void> tLogStart( TLogData* self, InitializeTLogRequest req, Localit
|
|||
}
|
||||
|
||||
// New tLog (if !recoverFrom.size()) or restore from network
|
||||
ACTOR Future<Void> tLog( IKeyValueStore* persistentData, IDiskQueue* persistentQueue, Reference<AsyncVar<ServerDBInfo>> db, LocalityData locality, PromiseStream<InitializeTLogRequest> tlogRequests, UID tlogId, bool restoreFromDisk, Promise<Void> oldLog, Promise<Void> recovered, Reference<AsyncVar<bool>> degraded) {
|
||||
state TLogData self( tlogId, persistentData, persistentQueue, db, degraded );
|
||||
ACTOR Future<Void> tLog( IKeyValueStore* persistentData, IDiskQueue* persistentQueue, Reference<AsyncVar<ServerDBInfo>> db, LocalityData locality, PromiseStream<InitializeTLogRequest> tlogRequests, UID tlogId, bool restoreFromDisk, Promise<Void> oldLog, Promise<Void> recovered, std::string folder, Reference<AsyncVar<bool>> degraded) {
|
||||
state TLogData self( tlogId, persistentData, persistentQueue, db, degraded, folder );
|
||||
state Future<Void> error = actorCollection( self.sharedActors.getFuture() );
|
||||
|
||||
TraceEvent("SharedTlog", tlogId);
|
||||
|
|
|
@ -196,7 +196,8 @@ ACTOR Future<ISimulator::KillType> simulatedFDBDRebooter(Reference<ClusterConnec
|
|||
LocalityData localities, ProcessClass processClass,
|
||||
std::string* dataFolder, std::string* coordFolder,
|
||||
std::string baseFolder, ClusterConnectionString connStr,
|
||||
bool useSeedFile, bool runBackupAgents) {
|
||||
bool useSeedFile, bool runBackupAgents,
|
||||
std::string whitelistBinPaths) {
|
||||
state ISimulator::ProcessInfo *simProcess = g_simulator.getCurrentProcess();
|
||||
state UID randomId = nondeterministicRandom()->randomUniqueID();
|
||||
state int cycles = 0;
|
||||
|
@ -250,7 +251,7 @@ ACTOR Future<ISimulator::KillType> simulatedFDBDRebooter(Reference<ClusterConnec
|
|||
NetworkAddress n(ip, listenPort, true, sslEnabled && listenPort == port);
|
||||
futures.push_back(FlowTransport::transport().bind( n, n ));
|
||||
}
|
||||
Future<Void> fd = fdbd( connFile, localities, processClass, *dataFolder, *coordFolder, 500e6, "", "", -1);
|
||||
Future<Void> fd = fdbd( connFile, localities, processClass, *dataFolder, *coordFolder, 500e6, "", "", -1, whitelistBinPaths);
|
||||
Future<Void> backup = runBackupAgents ? runBackup(connFile) : Future<Void>(Never());
|
||||
Future<Void> dr = runBackupAgents ? runDr(connFile) : Future<Void>(Never());
|
||||
|
||||
|
@ -359,7 +360,7 @@ std::map< Optional<Standalone<StringRef>>, std::vector< std::vector< std::string
|
|||
ACTOR Future<Void> simulatedMachine(ClusterConnectionString connStr, std::vector<IPAddress> ips, bool sslEnabled,
|
||||
Reference<TLSOptions> tlsOptions, LocalityData localities,
|
||||
ProcessClass processClass, std::string baseFolder, bool restarting,
|
||||
bool useSeedFile, bool runBackupAgents, bool sslOnly) {
|
||||
bool useSeedFile, bool runBackupAgents, bool sslOnly, std::string whitelistBinPaths) {
|
||||
state int bootCount = 0;
|
||||
state std::vector<std::string> myFolders;
|
||||
state std::vector<std::string> coordFolders;
|
||||
|
@ -401,7 +402,7 @@ ACTOR Future<Void> simulatedMachine(ClusterConnectionString connStr, std::vector
|
|||
std::string path = joinPath(myFolders[i], "fdb.cluster");
|
||||
Reference<ClusterConnectionFile> clusterFile(useSeedFile ? new ClusterConnectionFile(path, connStr.toString()) : new ClusterConnectionFile(path));
|
||||
const int listenPort = i*listenPerProcess + 1;
|
||||
processes.push_back(simulatedFDBDRebooter(clusterFile, ips[i], sslEnabled, tlsOptions, listenPort, listenPerProcess, localities, processClass, &myFolders[i], &coordFolders[i], baseFolder, connStr, useSeedFile, runBackupAgents));
|
||||
processes.push_back(simulatedFDBDRebooter(clusterFile, ips[i], sslEnabled, tlsOptions, listenPort, listenPerProcess, localities, processClass, &myFolders[i], &coordFolders[i], baseFolder, connStr, useSeedFile, runBackupAgents, whitelistBinPaths));
|
||||
TraceEvent("SimulatedMachineProcess", randomId).detail("Address", NetworkAddress(ips[i], listenPort, true, false)).detail("ZoneId", localities.zoneId()).detail("DataHall", localities.dataHallId()).detail("Folder", myFolders[i]);
|
||||
}
|
||||
|
||||
|
@ -606,7 +607,7 @@ IPAddress makeIPAddressForSim(bool isIPv6, std::array<int, 4> parts) {
|
|||
ACTOR Future<Void> restartSimulatedSystem(vector<Future<Void>>* systemActors, std::string baseFolder, int* pTesterCount,
|
||||
Optional<ClusterConnectionString>* pConnString,
|
||||
Standalone<StringRef>* pStartingConfiguration,
|
||||
Reference<TLSOptions> tlsOptions, int extraDB) {
|
||||
Reference<TLSOptions> tlsOptions, int extraDB, std::string whitelistBinPaths) {
|
||||
CSimpleIni ini;
|
||||
ini.SetUnicode();
|
||||
ini.LoadFile(joinPath(baseFolder, "restartInfo.ini").c_str());
|
||||
|
@ -704,7 +705,7 @@ ACTOR Future<Void> restartSimulatedSystem(vector<Future<Void>>* systemActors, st
|
|||
systemActors->push_back(reportErrors(
|
||||
simulatedMachine(conn, ipAddrs, usingSSL, tlsOptions, localities, processClass, baseFolder, true,
|
||||
i == useSeedForMachine, enableExtraDB,
|
||||
usingSSL && (listenersPerProcess == 1 || processClass == ProcessClass::TesterClass)),
|
||||
usingSSL && (listenersPerProcess == 1 || processClass == ProcessClass::TesterClass), whitelistBinPaths),
|
||||
processClass == ProcessClass::TesterClass ? "SimulatedTesterMachine" : "SimulatedMachine"));
|
||||
}
|
||||
|
||||
|
@ -1086,7 +1087,8 @@ void SimulationConfig::generateNormalConfig(int minimumReplication, int minimumR
|
|||
|
||||
void setupSimulatedSystem(vector<Future<Void>>* systemActors, std::string baseFolder, int* pTesterCount,
|
||||
Optional<ClusterConnectionString>* pConnString, Standalone<StringRef>* pStartingConfiguration,
|
||||
int extraDB, int minimumReplication, int minimumRegions, Reference<TLSOptions> tlsOptions) {
|
||||
int extraDB, int minimumReplication, int minimumRegions, Reference<TLSOptions> tlsOptions,
|
||||
std::string whitelistBinPaths) {
|
||||
// SOMEDAY: this does not test multi-interface configurations
|
||||
SimulationConfig simconfig(extraDB, minimumReplication, minimumRegions);
|
||||
StatusObject startingConfigJSON = simconfig.db.toJSON(true);
|
||||
|
@ -1282,7 +1284,7 @@ void setupSimulatedSystem(vector<Future<Void>>* systemActors, std::string baseFo
|
|||
LocalityData localities(Optional<Standalone<StringRef>>(), zoneId, machineId, dcUID);
|
||||
localities.set(LiteralStringRef("data_hall"), dcUID);
|
||||
systemActors->push_back(reportErrors(simulatedMachine(conn, ips, sslEnabled, tlsOptions,
|
||||
localities, processClass, baseFolder, false, machine == useSeedForMachine, true, sslOnly), "SimulatedMachine"));
|
||||
localities, processClass, baseFolder, false, machine == useSeedForMachine, true, sslOnly, whitelistBinPaths ), "SimulatedMachine"));
|
||||
|
||||
if (extraDB && g_simulator.extraDB->toString() != conn.toString()) {
|
||||
std::vector<IPAddress> extraIps;
|
||||
|
@ -1296,7 +1298,7 @@ void setupSimulatedSystem(vector<Future<Void>>* systemActors, std::string baseFo
|
|||
localities.set(LiteralStringRef("data_hall"), dcUID);
|
||||
systemActors->push_back(reportErrors(simulatedMachine(*g_simulator.extraDB, extraIps, sslEnabled, tlsOptions,
|
||||
localities,
|
||||
processClass, baseFolder, false, machine == useSeedForMachine, false, sslOnly), "SimulatedMachine"));
|
||||
processClass, baseFolder, false, machine == useSeedForMachine, false, sslOnly, whitelistBinPaths ), "SimulatedMachine"));
|
||||
}
|
||||
|
||||
assignedMachines++;
|
||||
|
@ -1324,7 +1326,7 @@ void setupSimulatedSystem(vector<Future<Void>>* systemActors, std::string baseFo
|
|||
systemActors->push_back( reportErrors( simulatedMachine(
|
||||
conn, ips, sslEnabled, tlsOptions,
|
||||
localities, ProcessClass(ProcessClass::TesterClass, ProcessClass::CommandLineSource),
|
||||
baseFolder, false, i == useSeedForMachine, false, sslEnabled),
|
||||
baseFolder, false, i == useSeedForMachine, false, sslEnabled, whitelistBinPaths ),
|
||||
"SimulatedTesterMachine") );
|
||||
}
|
||||
*pStartingConfiguration = startingConfigString;
|
||||
|
@ -1380,7 +1382,7 @@ void checkExtraDB(const char *testFile, int &extraDB, int &minimumReplication, i
|
|||
ifs.close();
|
||||
}
|
||||
|
||||
ACTOR void setupAndRun(std::string dataFolder, const char *testFile, bool rebooting, Reference<TLSOptions> tlsOptions) {
|
||||
ACTOR void setupAndRun(std::string dataFolder, const char *testFile, bool rebooting, bool restoring, std::string whitelistBinPaths, Reference<TLSOptions> tlsOptions) {
|
||||
state vector<Future<Void>> systemActors;
|
||||
state Optional<ClusterConnectionString> connFile;
|
||||
state Standalone<StringRef> startingConfiguration;
|
||||
|
@ -1410,13 +1412,16 @@ ACTOR void setupAndRun(std::string dataFolder, const char *testFile, bool reboot
|
|||
try {
|
||||
//systemActors.push_back( startSystemMonitor(dataFolder) );
|
||||
if (rebooting) {
|
||||
wait(timeoutError(restartSimulatedSystem(&systemActors, dataFolder, &testerCount, &connFile,
|
||||
&startingConfiguration, tlsOptions, extraDB),
|
||||
100.0));
|
||||
} else {
|
||||
wait( timeoutError( restartSimulatedSystem( &systemActors, dataFolder, &testerCount, &connFile, &startingConfiguration, tlsOptions, extraDB, whitelistBinPaths), 100.0 ) );
|
||||
// FIXME: snapshot restore does not support multi-region restore, hence restore it as single region always
|
||||
if (restoring) {
|
||||
startingConfiguration = LiteralStringRef("usable_regions=1");
|
||||
}
|
||||
}
|
||||
else {
|
||||
g_expect_full_pointermap = 1;
|
||||
setupSimulatedSystem(&systemActors, dataFolder, &testerCount, &connFile, &startingConfiguration, extraDB,
|
||||
minimumReplication, minimumRegions, tlsOptions);
|
||||
minimumReplication, minimumRegions, tlsOptions, whitelistBinPaths);
|
||||
wait( delay(1.0) ); // FIXME: WHY!!! //wait for machines to boot
|
||||
}
|
||||
std::string clusterFileDir = joinPath( dataFolder, deterministicRandom()->randomUniqueID().toString() );
|
||||
|
|
|
@ -24,6 +24,6 @@
|
|||
#define FDBSERVER_SIMULATEDCLUSTER_H
|
||||
#pragma once
|
||||
|
||||
void setupAndRun(std::string const& dataFolder, const char* const& testFile, bool const& rebooting, Reference<TLSOptions> const& useSSL);
|
||||
void setupAndRun(std::string const& dataFolder, const char* const& testFile, bool const& rebooting, bool const& restoring, std::string const& whitelistBinPath, Reference<TLSOptions> const& useSSL);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -218,13 +218,14 @@ struct TLogCommitRequest {
|
|||
|
||||
ReplyPromise<Version> reply;
|
||||
Optional<UID> debugID;
|
||||
bool hasExecOp;
|
||||
|
||||
TLogCommitRequest() {}
|
||||
TLogCommitRequest( const Arena& a, Version prevVersion, Version version, Version knownCommittedVersion, Version minKnownCommittedVersion, StringRef messages, Optional<UID> debugID )
|
||||
: arena(a), prevVersion(prevVersion), version(version), knownCommittedVersion(knownCommittedVersion), minKnownCommittedVersion(minKnownCommittedVersion), messages(messages), debugID(debugID) {}
|
||||
TLogCommitRequest( const Arena& a, Version prevVersion, Version version, Version knownCommittedVersion, Version minKnownCommittedVersion, StringRef messages, bool hasExecOp, Optional<UID> debugID )
|
||||
: arena(a), prevVersion(prevVersion), version(version), knownCommittedVersion(knownCommittedVersion), minKnownCommittedVersion(minKnownCommittedVersion), messages(messages), debugID(debugID), hasExecOp(hasExecOp){}
|
||||
template <class Ar>
|
||||
void serialize( Ar& ar ) {
|
||||
serializer(ar, prevVersion, version, knownCommittedVersion, minKnownCommittedVersion, messages, reply, arena, debugID);
|
||||
serializer(ar, prevVersion, version, knownCommittedVersion, minKnownCommittedVersion, messages, reply, arena, debugID, hasExecOp);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -24,6 +24,7 @@
|
|||
#include "fdbclient/NativeAPI.actor.h"
|
||||
#include "fdbclient/Notified.h"
|
||||
#include "fdbclient/KeyRangeMap.h"
|
||||
#include "fdbclient/RunTransaction.actor.h"
|
||||
#include "fdbclient/SystemData.h"
|
||||
#include "fdbserver/WorkerInterface.actor.h"
|
||||
#include "fdbserver/TLogInterface.h"
|
||||
|
@ -38,6 +39,7 @@
|
|||
#include "fdbserver/LogSystem.h"
|
||||
#include "fdbserver/WaitFailure.h"
|
||||
#include "fdbserver/RecoveryState.h"
|
||||
#include "fdbserver/FDBExecHelper.actor.h"
|
||||
#include "flow/actorcompiler.h" // This must be the last #include.
|
||||
|
||||
using std::pair;
|
||||
|
@ -301,6 +303,7 @@ struct TLogData : NonCopyable {
|
|||
AsyncVar<bool> largeDiskQueueCommitBytes; //becomes true when diskQueueCommitBytes is greater than MAX_QUEUE_COMMIT_BYTES
|
||||
|
||||
Reference<AsyncVar<ServerDBInfo>> dbInfo;
|
||||
Database cx;
|
||||
|
||||
NotifiedVersion queueCommitEnd;
|
||||
Version queueCommitBegin;
|
||||
|
@ -325,16 +328,28 @@ struct TLogData : NonCopyable {
|
|||
FlowLock concurrentLogRouterReads;
|
||||
FlowLock persistentDataCommitLock;
|
||||
|
||||
bool ignorePopRequest; // ignore pop request from storage servers
|
||||
double ignorePopDeadline; // time until which the ignorePopRequest will be
|
||||
// honored
|
||||
std::string ignorePopUid; // callers that set ignorePopRequest will set this
|
||||
// extra state, used to validate the ownership of
|
||||
// the set and for callers that unset will
|
||||
// be able to match it up
|
||||
std::string dataFolder; // folder where data is stored
|
||||
std::map<Tag, Version> toBePopped; // map of Tag->Version for all the pops
|
||||
// that came when ignorePopRequest was set
|
||||
Reference<AsyncVar<bool>> degraded;
|
||||
|
||||
TLogData(UID dbgid, IKeyValueStore* persistentData, IDiskQueue * persistentQueue, Reference<AsyncVar<ServerDBInfo>> dbInfo, Reference<AsyncVar<bool>> degraded)
|
||||
TLogData(UID dbgid, IKeyValueStore* persistentData, IDiskQueue * persistentQueue, Reference<AsyncVar<ServerDBInfo>> dbInfo, Reference<AsyncVar<bool>> degraded, std::string folder)
|
||||
: dbgid(dbgid), instanceID(deterministicRandom()->randomUniqueID().first()),
|
||||
persistentData(persistentData), rawPersistentQueue(persistentQueue), persistentQueue(new TLogQueue(persistentQueue, dbgid)),
|
||||
dbInfo(dbInfo), degraded(degraded), queueCommitBegin(0), queueCommitEnd(0),
|
||||
diskQueueCommitBytes(0), largeDiskQueueCommitBytes(false), bytesInput(0), bytesDurable(0), overheadBytesInput(0), overheadBytesDurable(0),
|
||||
peekMemoryLimiter(SERVER_KNOBS->TLOG_SPILL_REFERENCE_MAX_PEEK_MEMORY_BYTES),
|
||||
concurrentLogRouterReads(SERVER_KNOBS->CONCURRENT_LOG_ROUTER_READS)
|
||||
concurrentLogRouterReads(SERVER_KNOBS->CONCURRENT_LOG_ROUTER_READS),
|
||||
ignorePopRequest(false), ignorePopDeadline(), ignorePopUid(), dataFolder(folder), toBePopped()
|
||||
{
|
||||
cx = openDBOnServer(dbInfo, TaskDefaultEndpoint, true, true);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -474,13 +489,15 @@ struct LogData : NonCopyable, public ReferenceCounted<LogData> {
|
|||
UID recruitmentID;
|
||||
std::set<Tag> allTags;
|
||||
Future<Void> terminated;
|
||||
FlowLock execOpLock;
|
||||
bool execOpCommitInProgress;
|
||||
|
||||
explicit LogData(TLogData* tLogData, TLogInterface interf, Tag remoteTag, bool isPrimary, int logRouterTags, UID recruitmentID, uint64_t protocolVersion, std::vector<Tag> tags) : tLogData(tLogData), knownCommittedVersion(0), logId(interf.id()),
|
||||
cc("TLog", interf.id().toString()), bytesInput("BytesInput", cc), bytesDurable("BytesDurable", cc), remoteTag(remoteTag), isPrimary(isPrimary), logRouterTags(logRouterTags), recruitmentID(recruitmentID), protocolVersion(protocolVersion),
|
||||
logSystem(new AsyncVar<Reference<ILogSystem>>()), logRouterPoppedVersion(0), durableKnownCommittedVersion(0), minKnownCommittedVersion(0), queuePoppedVersion(0), allTags(tags.begin(), tags.end()), terminated(tLogData->terminated.getFuture()),
|
||||
// These are initialized differently on init() or recovery
|
||||
recoveryCount(), stopped(false), initialized(false), queueCommittingVersion(0), newPersistentDataVersion(invalidVersion), unrecoveredBefore(1), recoveredAt(1), unpoppedRecoveredTags(0),
|
||||
logRouterPopToVersion(0), locality(tagLocalityInvalid)
|
||||
logRouterPopToVersion(0), locality(tagLocalityInvalid), execOpCommitInProgress(false)
|
||||
{
|
||||
startRole(Role::TRANSACTION_LOG, interf.id(), UID());
|
||||
|
||||
|
@ -591,6 +608,7 @@ ACTOR Future<Void> tLogLock( TLogData* self, ReplyPromise< TLogLockResult > repl
|
|||
TEST( !logData->stopped );
|
||||
|
||||
TraceEvent("TLogStop", logData->logId).detail("Ver", stopVersion).detail("IsStopped", logData->stopped).detail("QueueCommitted", logData->queueCommittedVersion.get());
|
||||
unregisterTLog(logData->logId);
|
||||
|
||||
logData->stopped = true;
|
||||
if(!logData->recoveryComplete.isSet()) {
|
||||
|
@ -1136,14 +1154,28 @@ std::deque<std::pair<Version, LengthPrefixedStringRef>> & getVersionMessages( Re
|
|||
return tagData->versionMessages;
|
||||
};
|
||||
|
||||
ACTOR Future<Void> tLogPop( TLogData* self, TLogPopRequest req, Reference<LogData> logData ) {
|
||||
state Version upTo = req.to;
|
||||
int8_t tagLocality = req.tag.locality;
|
||||
ACTOR Future<Void> tLogPopCore( TLogData* self, Tag inputTag, Version to, Reference<LogData> logData ) {
|
||||
if (self->ignorePopRequest && inputTag != txsTag) {
|
||||
TraceEvent("IgnoringPopRequest").detail("IgnorePopDeadline", self->ignorePopDeadline);
|
||||
|
||||
if (self->toBePopped.find(inputTag) == self->toBePopped.end()
|
||||
|| to > self->toBePopped[inputTag]) {
|
||||
self->toBePopped[inputTag] = to;
|
||||
}
|
||||
// add the pop to the toBePopped map
|
||||
TraceEvent(SevDebug, "IgnoringPopRequest")
|
||||
.detail("IgnorePopDeadline", self->ignorePopDeadline)
|
||||
.detail("Tag", inputTag.toString())
|
||||
.detail("Version", to);
|
||||
return Void();
|
||||
}
|
||||
state Version upTo = to;
|
||||
int8_t tagLocality = inputTag.locality;
|
||||
if (logData->logSystem->get().isValid() && logData->logSystem->get()->isPseudoLocality(tagLocality)) {
|
||||
upTo = logData->logSystem->get()->popPseudoLocalityTag(tagLocality, req.to);
|
||||
upTo = logData->logSystem->get()->popPseudoLocalityTag(tagLocality, to);
|
||||
tagLocality = tagLocalityLogRouter;
|
||||
}
|
||||
state Tag tag(tagLocality, req.tag.id);
|
||||
state Tag tag(tagLocality, inputTag.id);
|
||||
auto tagData = logData->getTagData(tag);
|
||||
if (!tagData) {
|
||||
tagData = logData->createTagData(tag, upTo, true, true, false);
|
||||
|
@ -1165,7 +1197,34 @@ ACTOR Future<Void> tLogPop( TLogData* self, TLogPopRequest req, Reference<LogDat
|
|||
wait(tagData->eraseMessagesBefore(upTo, self, logData, TaskTLogPop));
|
||||
//TraceEvent("TLogPop", self->dbgid).detail("Tag", tag.toString()).detail("To", upTo);
|
||||
}
|
||||
return Void();
|
||||
}
|
||||
|
||||
ACTOR Future<Void> tLogPop( TLogData* self, TLogPopRequest req, Reference<LogData> logData ) {
|
||||
// timeout check for ignorePopRequest
|
||||
if (self->ignorePopRequest && (g_network->now() > self->ignorePopDeadline)) {
|
||||
|
||||
TraceEvent("EnableTLogPlayAllIgnoredPops");
|
||||
// use toBePopped and issue all the pops
|
||||
state std::map<Tag, Version>::iterator it;
|
||||
state vector<Future<Void>> ignoredPops;
|
||||
self->ignorePopRequest = false;
|
||||
self->ignorePopUid = "";
|
||||
self->ignorePopDeadline = 0.0;
|
||||
for (it = self->toBePopped.begin(); it != self->toBePopped.end(); it++) {
|
||||
TraceEvent("PlayIgnoredPop")
|
||||
.detail("Tag", it->first.toString())
|
||||
.detail("Version", it->second);
|
||||
ignoredPops.push_back(tLogPopCore(self, it->first, it->second, logData));
|
||||
}
|
||||
self->toBePopped.clear();
|
||||
wait(waitForAll(ignoredPops));
|
||||
TraceEvent("ResetIgnorePopRequest")
|
||||
.detail("Now", g_network->now())
|
||||
.detail("IgnorePopRequest", self->ignorePopRequest)
|
||||
.detail("IgnorePopDeadline", self->ignorePopDeadline);
|
||||
}
|
||||
wait(tLogPopCore(self, req.tag, req.to, logData));
|
||||
req.reply.send(Void());
|
||||
return Void();
|
||||
}
|
||||
|
@ -1585,6 +1644,207 @@ ACTOR Future<Void> commitQueue( TLogData* self ) {
|
|||
}
|
||||
}
|
||||
|
||||
void execProcessingHelper(TLogData* self,
|
||||
Reference<LogData> logData,
|
||||
TLogCommitRequest* req,
|
||||
Standalone<VectorRef<Tag>>* execTags,
|
||||
ExecCmdValueString* execArg,
|
||||
StringRef* execCmd,
|
||||
Version* execVersion,
|
||||
vector<Future<Void>>* snapFailKeySetters,
|
||||
vector<Future<Void>>* ignoredPops)
|
||||
{
|
||||
// inspect the messages to find if there is an Exec type and print
|
||||
// it. message are prefixed by the length of the message and each
|
||||
// field is prefixed by the length too
|
||||
uint8_t type = MutationRef::MAX_ATOMIC_OP;
|
||||
StringRef param2;
|
||||
ArenaReader rd(req->arena, req->messages, Unversioned());
|
||||
int32_t messageLength, rawLength;
|
||||
uint16_t tagCount;
|
||||
uint32_t sub;
|
||||
while (!rd.empty()) {
|
||||
Tag tmpTag;
|
||||
bool hasTxsTag = false;
|
||||
rd.checkpoint();
|
||||
rd >> messageLength >> sub >> tagCount;
|
||||
for (int i = 0; i < tagCount; i++) {
|
||||
rd >> tmpTag;
|
||||
if (tmpTag == txsTag) {
|
||||
hasTxsTag = true;
|
||||
}
|
||||
execTags->push_back(execTags->arena(), tmpTag);
|
||||
}
|
||||
if (!hasTxsTag) {
|
||||
rd >> type;
|
||||
if (type == MutationRef::Exec) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
rawLength = messageLength + sizeof(messageLength);
|
||||
rd.rewind();
|
||||
rd.readBytes(rawLength);
|
||||
}
|
||||
|
||||
int32_t len = 0;
|
||||
if (type == MutationRef::Exec) {
|
||||
// get param1
|
||||
rd >> len;
|
||||
*execCmd = StringRef((uint8_t const*)rd.readBytes(len), len);
|
||||
// get param2
|
||||
rd >> len;
|
||||
param2 = StringRef((uint8_t const*)rd.readBytes(len), len);
|
||||
|
||||
TraceEvent(SevDebug, "TLogExecCommandType", self->dbgid)
|
||||
.detail("Value", execCmd->toString())
|
||||
.detail("Version", req->version);
|
||||
|
||||
execArg->setCmdValueString(param2);
|
||||
execArg->dbgPrint();
|
||||
StringRef uidStr = execArg->getBinaryArgValue(LiteralStringRef("uid"));
|
||||
if (!execCmd->startsWith(LiteralStringRef("\xff"))) {
|
||||
*execVersion = req->version;
|
||||
}
|
||||
if (*execCmd == execSnap) {
|
||||
// validation check specific to snap request
|
||||
std::string reason;
|
||||
if (!self->ignorePopRequest) {
|
||||
*execVersion = invalidVersion;
|
||||
reason = "SnapFailIgnorePopNotSet";
|
||||
} else if (uidStr.toString() != self->ignorePopUid) {
|
||||
*execVersion = invalidVersion;
|
||||
reason = "SnapFailedDisableTLogUidMismatch";
|
||||
}
|
||||
|
||||
if (*execVersion == invalidVersion) {
|
||||
TraceEvent(SevWarn, "TLogSnapFailed")
|
||||
.detail("IgnorePopUid", self->ignorePopUid)
|
||||
.detail("IgnorePopRequest", self->ignorePopRequest)
|
||||
.detail("Reason", reason)
|
||||
.detail("Version", req->version);
|
||||
|
||||
TraceEvent("ExecCmdSnapCreate")
|
||||
.detail("Uid", uidStr.toString())
|
||||
.detail("Status", -1)
|
||||
.detail("Tag", logData->allTags.begin()->toString())
|
||||
.detail("Role", "TLog")
|
||||
.detail("Version", req->version);
|
||||
|
||||
if (g_network->isSimulated()) {
|
||||
// write SnapFailedTLog.$UID
|
||||
Standalone<StringRef> keyStr = snapTestFailStatus.withSuffix(uidStr);
|
||||
StringRef valStr = LiteralStringRef("Success");
|
||||
TraceEvent(SevDebug, "TLogKeyStr").detail("Value", keyStr);
|
||||
snapFailKeySetters->push_back(runRYWTransaction(self->cx, [=](Reference<ReadYourWritesTransaction> tr) -> Future<Void>
|
||||
{ tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); tr->set(keyStr, valStr); return Void(); }));
|
||||
}
|
||||
}
|
||||
}
|
||||
if (*execCmd == execDisableTLogPop) {
|
||||
self->ignorePopRequest = true;
|
||||
if (self->ignorePopUid != "") {
|
||||
TraceEvent(SevWarn, "TLogPopDisableonDisable")
|
||||
.detail("IgnorePopUid", self->ignorePopUid)
|
||||
.detail("UidStr", uidStr.toString())
|
||||
.detail("Version", req->version);
|
||||
}
|
||||
self->ignorePopUid = uidStr.toString();
|
||||
self->ignorePopDeadline = g_network->now() + SERVER_KNOBS->TLOG_IGNORE_POP_AUTO_ENABLE_DELAY;
|
||||
TraceEvent("TLogExecCmdPopDisable")
|
||||
.detail("ExecCmd", execCmd->toString())
|
||||
.detail("UidStr", uidStr.toString())
|
||||
.detail("IgnorePopUid", self->ignorePopUid)
|
||||
.detail("IgnporePopRequest", self->ignorePopRequest)
|
||||
.detail("IgnporePopDeadline", self->ignorePopDeadline)
|
||||
.detail("Version", req->version);
|
||||
}
|
||||
if (*execCmd == execEnableTLogPop) {
|
||||
if (self->ignorePopUid != uidStr.toString()) {
|
||||
TraceEvent(SevWarn, "TLogPopDisableEnableUidMismatch")
|
||||
.detail("IgnorePopUid", self->ignorePopUid)
|
||||
.detail("UidStr", uidStr.toString())
|
||||
.detail("Version", req->version);
|
||||
}
|
||||
|
||||
TraceEvent("EnableTLogPlayAllIgnoredPops2");
|
||||
// use toBePopped and issue all the pops
|
||||
std::map<Tag, Version>::iterator it;
|
||||
self->ignorePopRequest = false;
|
||||
self->ignorePopDeadline = 0.0;
|
||||
self->ignorePopUid = "";
|
||||
for (it = self->toBePopped.begin(); it != self->toBePopped.end(); it++) {
|
||||
TraceEvent("PlayIgnoredPop")
|
||||
.detail("Tag", it->first.toString())
|
||||
.detail("Version", it->second);
|
||||
ignoredPops->push_back(tLogPopCore(self, it->first, it->second, logData));
|
||||
}
|
||||
self->toBePopped.clear();
|
||||
TraceEvent("TLogExecCmdPopEnable")
|
||||
.detail("ExecCmd", execCmd->toString())
|
||||
.detail("UidStr", uidStr.toString())
|
||||
.detail("IgnorePopUid", self->ignorePopUid)
|
||||
.detail("IgnporePopRequest", self->ignorePopRequest)
|
||||
.detail("IgnporePopDeadline", self->ignorePopDeadline)
|
||||
.detail("Version", req->version);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ACTOR Future<Void> tLogSnapHelper(TLogData* self,
|
||||
Reference<LogData> logData,
|
||||
ExecCmdValueString* execArg,
|
||||
Version version,
|
||||
Version execVersion,
|
||||
StringRef execCmd,
|
||||
Standalone<VectorRef<Tag>> execTags)
|
||||
{
|
||||
state int err = 0;
|
||||
state StringRef uidStr = execArg->getBinaryArgValue(LiteralStringRef("uid"));
|
||||
state UID execUID = UID::fromString(uidStr.toString());
|
||||
state bool otherRoleExeced = false;
|
||||
// TLog is special, we need to snap at the execVersion.
|
||||
// storage on the same node should not initiate a snap before TLog which will make
|
||||
// the snap version at TLog unpredictable
|
||||
ASSERT(!isExecOpInProgress(execUID));
|
||||
if (!otherRoleExeced) {
|
||||
setExecOpInProgress(execUID);
|
||||
int tmpErr = wait(execHelper(execArg, self->dataFolder, "role=tlog"));
|
||||
err = tmpErr;
|
||||
clearExecOpInProgress(execUID);
|
||||
}
|
||||
TraceEvent("TLogCommitExecTraceTLog")
|
||||
.detail("UidStr", uidStr.toString())
|
||||
.detail("Status", err)
|
||||
.detail("Tag", logData->allTags.begin()->toString())
|
||||
.detail("OldTagSize", logData->allTags.size())
|
||||
.detail("Role", "TLog");
|
||||
|
||||
// print the detailed status message
|
||||
for (int i = 0; i < execTags.size(); i++) {
|
||||
Version poppedTagVersion = -1;
|
||||
auto tagv = logData->getTagData(execTags[i]);
|
||||
if (!tagv) {
|
||||
continue;
|
||||
}
|
||||
poppedTagVersion = tagv->popped;
|
||||
|
||||
TraceEvent te = TraceEvent(SevDebug, "TLogExecTraceDetailed");
|
||||
te.detail("Uid", uidStr.toString());
|
||||
te.detail("Status", err);
|
||||
te.detail("Role", "TLog");
|
||||
te.detail("ExecCmd", execCmd.toString());
|
||||
te.detail("Param2", execArg->getCmdValueString().toString());
|
||||
te.detail("Tag", tagv->tag.toString());
|
||||
te.detail("Version", version);
|
||||
te.detail("PoppedTagVersion", poppedTagVersion);
|
||||
te.detail("PersistentDataVersion", logData->persistentDataVersion);
|
||||
te.detail("PersistentDatadurableVersion", logData->persistentDataDurableVersion);
|
||||
te.detail("QueueCommittedVersion", logData->queueCommittedVersion.get());
|
||||
te.detail("IgnorePopUid", self->ignorePopUid);
|
||||
}
|
||||
return Void();
|
||||
}
|
||||
|
||||
ACTOR Future<Void> tLogCommit(
|
||||
TLogData* self,
|
||||
TLogCommitRequest req,
|
||||
|
@ -1619,22 +1879,60 @@ ACTOR Future<Void> tLogCommit(
|
|||
wait( delayJittered(.005, TaskTLogCommit) );
|
||||
}
|
||||
|
||||
// while exec op is being committed, no new transactions will be admitted.
|
||||
// This property is useful for snapshot kind of operations which wants to
|
||||
// take a snap of the disk image at a particular version (not data from
|
||||
// future version to be included)
|
||||
// NOTE: execOpCommitInProgress will not be set for exec commands which
|
||||
// start with \xff
|
||||
state bool execOpLockTaken = false;
|
||||
if (logData->execOpCommitInProgress) {
|
||||
wait(logData->execOpLock.take());
|
||||
execOpLockTaken = true;
|
||||
}
|
||||
|
||||
if(logData->stopped) {
|
||||
req.reply.sendError( tlog_stopped() );
|
||||
return Void();
|
||||
}
|
||||
|
||||
if (logData->version.get() == req.prevVersion) { // Not a duplicate (check relies on no waiting between here and self->version.set() below!)
|
||||
state Version execVersion = invalidVersion;
|
||||
state ExecCmdValueString execArg();
|
||||
state TLogQueueEntryRef qe;
|
||||
state StringRef execCmd;
|
||||
state Standalone<VectorRef<Tag>> execTags;
|
||||
state vector<Future<Void>> playIgnoredPops;
|
||||
state vector<Future<Void>> snapFailKeySetters;
|
||||
|
||||
if (logData->version.get() == req.prevVersion) { // Not a duplicate (check relies on critical section between here self->version.set() below!)
|
||||
if(req.debugID.present())
|
||||
g_traceBatch.addEvent("CommitDebug", tlogDebugID.get().first(), "TLog.tLogCommit.Before");
|
||||
|
||||
|
||||
if (req.hasExecOp) {
|
||||
execProcessingHelper(self, logData, &req, &execTags, &execArg, &execCmd, &execVersion, &snapFailKeySetters, &playIgnoredPops);
|
||||
if (execVersion != invalidVersion) {
|
||||
TraceEvent(SevDebug, "SettingExecOpCommit")
|
||||
.detail("LogId", logData->logId)
|
||||
.detail("ExecVersion", execVersion)
|
||||
.detail("Version", req.version);
|
||||
logData->execOpCommitInProgress = true;
|
||||
if (!execOpLockTaken) {
|
||||
wait(logData->execOpLock.take());
|
||||
execOpLockTaken = true;
|
||||
} else {
|
||||
ASSERT(logData->execOpLock.available() == 0);
|
||||
}
|
||||
ASSERT(execOpLockTaken);
|
||||
}
|
||||
}
|
||||
|
||||
//TraceEvent("TLogCommit", logData->logId).detail("Version", req.version);
|
||||
commitMessages(self, logData, req.version, req.arena, req.messages);
|
||||
|
||||
logData->knownCommittedVersion = std::max(logData->knownCommittedVersion, req.knownCommittedVersion);
|
||||
|
||||
// Log the changes to the persistent queue, to be committed by commitQueue()
|
||||
TLogQueueEntryRef qe;
|
||||
qe.version = req.version;
|
||||
qe.knownCommittedVersion = logData->knownCommittedVersion;
|
||||
qe.messages = req.messages;
|
||||
|
@ -1648,6 +1946,7 @@ ACTOR Future<Void> tLogCommit(
|
|||
|
||||
// Notifies the commitQueue actor to commit persistentQueue, and also unblocks tLogPeekMessages actors
|
||||
logData->version.set( req.version );
|
||||
wait(waitForAll(playIgnoredPops));
|
||||
|
||||
if(req.debugID.present())
|
||||
g_traceBatch.addEvent("CommitDebug", tlogDebugID.get().first(), "TLog.tLogCommit.AfterTLogCommit");
|
||||
|
@ -1656,6 +1955,20 @@ ACTOR Future<Void> tLogCommit(
|
|||
state Future<Void> stopped = logData->stopCommit.onTrigger();
|
||||
wait( timeoutWarning( logData->queueCommittedVersion.whenAtLeast( req.version ) || stopped, 0.1, warningCollectorInput ) );
|
||||
|
||||
if ((execVersion != invalidVersion) &&
|
||||
execVersion <= logData->queueCommittedVersion.get()) {
|
||||
wait(tLogSnapHelper(self, logData, &execArg, qe.version, execVersion, execCmd, execTags));
|
||||
}
|
||||
if (execVersion != invalidVersion && logData->execOpCommitInProgress) {
|
||||
ASSERT(execOpLockTaken);
|
||||
logData->execOpCommitInProgress = false;
|
||||
}
|
||||
if (execOpLockTaken) {
|
||||
logData->execOpLock.release();
|
||||
execOpLockTaken = false;
|
||||
}
|
||||
execVersion = invalidVersion;
|
||||
|
||||
if(stopped.isReady()) {
|
||||
ASSERT(logData->stopped);
|
||||
req.reply.sendError( tlog_stopped() );
|
||||
|
@ -1666,6 +1979,13 @@ ACTOR Future<Void> tLogCommit(
|
|||
g_traceBatch.addEvent("CommitDebug", tlogDebugID.get().first(), "TLog.tLogCommit.After");
|
||||
|
||||
req.reply.send( logData->durableKnownCommittedVersion );
|
||||
if (g_network->isSimulated()) {
|
||||
if (snapFailKeySetters.size() > 0) {
|
||||
TraceEvent(SevDebug, "SettingSnapFailKey");
|
||||
wait(waitForAll(snapFailKeySetters));
|
||||
TraceEvent(SevDebug, "SettingSnapFailKeyDone");
|
||||
}
|
||||
}
|
||||
return Void();
|
||||
}
|
||||
|
||||
|
@ -1841,7 +2161,7 @@ ACTOR Future<Void> serveTLogInterface( TLogData* self, TLogInterface tli, Refere
|
|||
logData->addActor.send( tLogPeekMessages( self, req, logData ) );
|
||||
}
|
||||
when( TLogPopRequest req = waitNext( tli.popMessages.getFuture() ) ) {
|
||||
logData->addActor.send( tLogPop( self, req, logData ) );
|
||||
logData->addActor.send(tLogPop(self, req, logData));
|
||||
}
|
||||
when( TLogCommitRequest req = waitNext( tli.commit.getFuture() ) ) {
|
||||
//TraceEvent("TLogCommitReq", logData->logId).detail("Ver", req.version).detail("PrevVer", req.prevVersion).detail("LogVer", logData->version.get());
|
||||
|
@ -1875,6 +2195,7 @@ ACTOR Future<Void> serveTLogInterface( TLogData* self, TLogInterface tli, Refere
|
|||
void removeLog( TLogData* self, Reference<LogData> logData ) {
|
||||
TraceEvent("TLogRemoved", self->dbgid).detail("LogId", logData->logId).detail("Input", logData->bytesInput.getValue()).detail("Durable", logData->bytesDurable.getValue());
|
||||
logData->stopped = true;
|
||||
unregisterTLog(logData->logId);
|
||||
if(!logData->recoveryComplete.isSet()) {
|
||||
logData->recoveryComplete.sendError(end_of_stream());
|
||||
}
|
||||
|
@ -2388,6 +2709,8 @@ ACTOR Future<Void> tLogStart( TLogData* self, InitializeTLogRequest req, Localit
|
|||
self->spillOrder.push_back(recruited.id());
|
||||
|
||||
TraceEvent("TLogStart", logData->logId);
|
||||
registerTLog(logData->logId);
|
||||
|
||||
state Future<Void> updater;
|
||||
state bool pulledRecoveryVersions = false;
|
||||
try {
|
||||
|
@ -2493,8 +2816,8 @@ ACTOR Future<Void> tLogStart( TLogData* self, InitializeTLogRequest req, Localit
|
|||
}
|
||||
|
||||
// New tLog (if !recoverFrom.size()) or restore from network
|
||||
ACTOR Future<Void> tLog( IKeyValueStore* persistentData, IDiskQueue* persistentQueue, Reference<AsyncVar<ServerDBInfo>> db, LocalityData locality, PromiseStream<InitializeTLogRequest> tlogRequests, UID tlogId, bool restoreFromDisk, Promise<Void> oldLog, Promise<Void> recovered, Reference<AsyncVar<bool>> degraded ) {
|
||||
state TLogData self( tlogId, persistentData, persistentQueue, db, degraded );
|
||||
ACTOR Future<Void> tLog( IKeyValueStore* persistentData, IDiskQueue* persistentQueue, Reference<AsyncVar<ServerDBInfo>> db, LocalityData locality, PromiseStream<InitializeTLogRequest> tlogRequests, UID tlogId, bool restoreFromDisk, Promise<Void> oldLog, Promise<Void> recovered, std::string folder, Reference<AsyncVar<bool>> degraded ) {
|
||||
state TLogData self( tlogId, persistentData, persistentQueue, db, degraded, folder );
|
||||
state Future<Void> error = actorCollection( self.sharedActors.getFuture() );
|
||||
|
||||
TraceEvent("SharedTlog", tlogId);
|
||||
|
|
|
@ -431,7 +431,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
|
|||
vector<Future<Void>> tLogCommitResults;
|
||||
for(int loc=0; loc< it->logServers.size(); loc++) {
|
||||
Standalone<StringRef> msg = data.getMessages(location);
|
||||
allReplies.push_back( it->logServers[loc]->get().interf().commit.getReply( TLogCommitRequest( msg.arena(), prevVersion, version, knownCommittedVersion, minKnownCommittedVersion, msg, debugID ), TaskTLogCommitReply ) );
|
||||
allReplies.push_back( it->logServers[loc]->get().interf().commit.getReply( TLogCommitRequest( msg.arena(), prevVersion, version, knownCommittedVersion, minKnownCommittedVersion, msg, data.getHasExecOp(), debugID ), TaskTLogCommitReply ) );
|
||||
Future<Void> commitSuccess = success(allReplies.back());
|
||||
addActor.get().send(commitSuccess);
|
||||
tLogCommitResults.push_back(commitSuccess);
|
||||
|
@ -1108,11 +1108,11 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
|
|||
return std::numeric_limits<Version>::max();
|
||||
}
|
||||
|
||||
virtual void getPushLocations( std::vector<Tag> const& tags, std::vector<int>& locations ) {
|
||||
virtual void getPushLocations(std::vector<Tag> const& tags, std::vector<int>& locations, bool allLocations) {
|
||||
int locationOffset = 0;
|
||||
for(auto& log : tLogs) {
|
||||
if(log->isLocal && log->logServers.size()) {
|
||||
log->getPushLocations(tags, locations, locationOffset);
|
||||
log->getPushLocations(tags, locations, locationOffset, allLocations);
|
||||
locationOffset += log->logServers.size();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -60,6 +60,7 @@ struct WorkerInterface {
|
|||
RequestStream< struct EventLogRequest > eventLogRequest;
|
||||
RequestStream< struct TraceBatchDumpRequest > traceBatchDumpRequest;
|
||||
RequestStream< struct DiskStoreRequest > diskStoreRequest;
|
||||
RequestStream<struct ExecuteRequest> execReq;
|
||||
|
||||
TesterInterface testerInterface;
|
||||
|
||||
|
@ -71,7 +72,7 @@ struct WorkerInterface {
|
|||
|
||||
template <class Ar>
|
||||
void serialize(Ar& ar) {
|
||||
serializer(ar, clientInterface, locality, tLog, master, masterProxy, dataDistributor, ratekeeper, resolver, storage, logRouter, debugPing, coordinationPing, waitFailure, setMetricsRate, eventLogRequest, traceBatchDumpRequest, testerInterface, diskStoreRequest);
|
||||
serializer(ar, clientInterface, locality, tLog, master, masterProxy, dataDistributor, ratekeeper, resolver, storage, logRouter, debugPing, coordinationPing, waitFailure, setMetricsRate, eventLogRequest, traceBatchDumpRequest, testerInterface, diskStoreRequest, execReq);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -239,6 +240,23 @@ struct TraceBatchDumpRequest {
|
|||
}
|
||||
};
|
||||
|
||||
struct ExecuteRequest {
|
||||
constexpr static FileIdentifier file_identifier = 8184128;
|
||||
ReplyPromise<Void> reply;
|
||||
|
||||
Arena arena;
|
||||
StringRef execPayload;
|
||||
|
||||
ExecuteRequest(StringRef execPayload) : execPayload(execPayload) {}
|
||||
|
||||
ExecuteRequest() : execPayload() {}
|
||||
|
||||
template <class Ar>
|
||||
void serialize(Ar& ar) {
|
||||
serializer(ar, reply, execPayload, arena);
|
||||
}
|
||||
};
|
||||
|
||||
struct LoadedReply {
|
||||
constexpr static FileIdentifier file_identifier = 9956350;
|
||||
Standalone<StringRef> payload;
|
||||
|
@ -380,7 +398,9 @@ ACTOR Future<Void> extractClusterInterface(Reference<AsyncVar<Optional<struct Cl
|
|||
|
||||
ACTOR Future<Void> fdbd(Reference<ClusterConnectionFile> ccf, LocalityData localities, ProcessClass processClass,
|
||||
std::string dataFolder, std::string coordFolder, int64_t memoryLimit,
|
||||
std::string metricsConnFile, std::string metricsPrefix, int64_t memoryProfilingThreshold);
|
||||
std::string metricsConnFile, std::string metricsPrefix, int64_t memoryProfilingThreshold,
|
||||
std::string whitelistBinPaths);
|
||||
|
||||
ACTOR Future<Void> clusterController(Reference<ClusterConnectionFile> ccf,
|
||||
Reference<AsyncVar<Optional<ClusterControllerFullInterface>>> currentCC,
|
||||
Reference<AsyncVar<ClusterControllerPriorityInfo>> asyncPriorityInfo,
|
||||
|
@ -399,11 +419,11 @@ ACTOR Future<Void> storageServer(IKeyValueStore* persistentData, StorageServerIn
|
|||
ACTOR Future<Void> masterServer(MasterInterface mi, Reference<AsyncVar<ServerDBInfo>> db,
|
||||
ServerCoordinators serverCoordinators, LifetimeToken lifetime, bool forceRecovery);
|
||||
ACTOR Future<Void> masterProxyServer(MasterProxyInterface proxy, InitializeMasterProxyRequest req,
|
||||
Reference<AsyncVar<ServerDBInfo>> db);
|
||||
Reference<AsyncVar<ServerDBInfo>> db, std::string whitelistBinPaths);
|
||||
ACTOR Future<Void> tLog(IKeyValueStore* persistentData, IDiskQueue* persistentQueue,
|
||||
Reference<AsyncVar<ServerDBInfo>> db, LocalityData locality,
|
||||
PromiseStream<InitializeTLogRequest> tlogRequests, UID tlogId, bool restoreFromDisk,
|
||||
Promise<Void> oldLog, Promise<Void> recovered, Reference<AsyncVar<bool>> degraded); // changes tli->id() to be the recovered ID
|
||||
Promise<Void> oldLog, Promise<Void> recovered, std::string folder, Reference<AsyncVar<bool>> degraded); // changes tli->id() to be the recovered ID
|
||||
ACTOR Future<Void> monitorServerDBInfo(Reference<AsyncVar<Optional<ClusterControllerFullInterface>>> ccInterface,
|
||||
Reference<ClusterConnectionFile> ccf, LocalityData locality,
|
||||
Reference<AsyncVar<ServerDBInfo>> dbInfo);
|
||||
|
@ -425,7 +445,7 @@ namespace oldTLog_6_0 {
|
|||
ACTOR Future<Void> tLog(IKeyValueStore* persistentData, IDiskQueue* persistentQueue,
|
||||
Reference<AsyncVar<ServerDBInfo>> db, LocalityData locality,
|
||||
PromiseStream<InitializeTLogRequest> tlogRequests, UID tlogId, bool restoreFromDisk,
|
||||
Promise<Void> oldLog, Promise<Void> recovered, Reference<AsyncVar<bool>> degraded);
|
||||
Promise<Void> oldLog, Promise<Void> recovered, std::string folder, Reference<AsyncVar<bool>> degraded);
|
||||
}
|
||||
|
||||
typedef decltype(&tLog) TLogFn;
|
||||
|
|
|
@ -61,6 +61,8 @@
|
|||
#include "versions.h"
|
||||
#endif
|
||||
|
||||
#include "fdbmonitor/SimpleIni.h"
|
||||
|
||||
#ifdef __linux__
|
||||
#include <execinfo.h>
|
||||
#include <signal.h>
|
||||
|
@ -79,8 +81,8 @@
|
|||
#include "flow/actorcompiler.h" // This must be the last #include.
|
||||
|
||||
enum {
|
||||
OPT_CONNFILE, OPT_SEEDCONNFILE, OPT_SEEDCONNSTRING, OPT_ROLE, OPT_LISTEN, OPT_PUBLICADDR, OPT_DATAFOLDER, OPT_LOGFOLDER, OPT_PARENTPID, OPT_NEWCONSOLE, OPT_NOBOX, OPT_TESTFILE, OPT_RESTARTING, OPT_RANDOMSEED, OPT_KEY, OPT_MEMLIMIT, OPT_STORAGEMEMLIMIT, OPT_MACHINEID, OPT_DCID, OPT_MACHINE_CLASS, OPT_BUGGIFY, OPT_VERSION, OPT_CRASHONERROR, OPT_HELP, OPT_NETWORKIMPL, OPT_NOBUFSTDOUT, OPT_BUFSTDOUTERR, OPT_TRACECLOCK, OPT_NUMTESTERS, OPT_DEVHELP, OPT_ROLLSIZE, OPT_MAXLOGS, OPT_MAXLOGSSIZE, OPT_KNOB, OPT_TESTSERVERS, OPT_TEST_ON_SERVERS, OPT_METRICSCONNFILE, OPT_METRICSPREFIX,
|
||||
OPT_LOGGROUP, OPT_LOCALITY, OPT_IO_TRUST_SECONDS, OPT_IO_TRUST_WARN_ONLY, OPT_FILESYSTEM, OPT_PROFILER_RSS_SIZE, OPT_KVFILE, OPT_TRACE_FORMAT, OPT_USE_OBJECT_SERIALIZER };
|
||||
OPT_CONNFILE, OPT_SEEDCONNFILE, OPT_SEEDCONNSTRING, OPT_ROLE, OPT_LISTEN, OPT_PUBLICADDR, OPT_DATAFOLDER, OPT_LOGFOLDER, OPT_PARENTPID, OPT_NEWCONSOLE, OPT_NOBOX, OPT_TESTFILE, OPT_RESTARTING, OPT_RESTORING, OPT_RANDOMSEED, OPT_KEY, OPT_MEMLIMIT, OPT_STORAGEMEMLIMIT, OPT_MACHINEID, OPT_DCID, OPT_MACHINE_CLASS, OPT_BUGGIFY, OPT_VERSION, OPT_CRASHONERROR, OPT_HELP, OPT_NETWORKIMPL, OPT_NOBUFSTDOUT, OPT_BUFSTDOUTERR, OPT_TRACECLOCK, OPT_NUMTESTERS, OPT_DEVHELP, OPT_ROLLSIZE, OPT_MAXLOGS, OPT_MAXLOGSSIZE, OPT_KNOB, OPT_TESTSERVERS, OPT_TEST_ON_SERVERS, OPT_METRICSCONNFILE, OPT_METRICSPREFIX,
|
||||
OPT_LOGGROUP, OPT_LOCALITY, OPT_IO_TRUST_SECONDS, OPT_IO_TRUST_WARN_ONLY, OPT_FILESYSTEM, OPT_PROFILER_RSS_SIZE, OPT_KVFILE, OPT_TRACE_FORMAT, OPT_USE_OBJECT_SERIALIZER, OPT_WHITELIST_BINPATH };
|
||||
|
||||
CSimpleOpt::SOption g_rgOptions[] = {
|
||||
{ OPT_CONNFILE, "-C", SO_REQ_SEP },
|
||||
|
@ -158,6 +160,7 @@ CSimpleOpt::SOption g_rgOptions[] = {
|
|||
{ OPT_TRACE_FORMAT , "--trace_format", SO_REQ_SEP },
|
||||
{ OPT_USE_OBJECT_SERIALIZER, "-S", SO_REQ_SEP },
|
||||
{ OPT_USE_OBJECT_SERIALIZER, "--object-serializer", SO_REQ_SEP },
|
||||
{ OPT_WHITELIST_BINPATH, "--whitelist_binpath", SO_REQ_SEP },
|
||||
|
||||
#ifndef TLS_DISABLED
|
||||
TLS_OPTION_FLAGS
|
||||
|
@ -913,6 +916,7 @@ int main(int argc, char* argv[]) {
|
|||
const char *testFile = "tests/default.txt";
|
||||
std::string kvFile;
|
||||
std::string testServersStr;
|
||||
std::string whitelistBinPaths;
|
||||
std::vector<std::string> publicAddressStrs, listenAddressStrs;
|
||||
const char *targetKey = NULL;
|
||||
uint64_t memLimit = 8LL << 30; // Nice to maintain the same default value for memLimit and SERVER_KNOBS->SERVER_MEM_LIMIT and SERVER_KNOBS->COMMIT_BATCHES_MEM_BYTES_HARD_LIMIT
|
||||
|
@ -1193,7 +1197,7 @@ int main(int argc, char* argv[]) {
|
|||
case OPT_RESTARTING:
|
||||
restarting = true;
|
||||
break;
|
||||
case OPT_RANDOMSEED: {
|
||||
case OPT_RANDOMSEED: {
|
||||
char* end;
|
||||
randomSeed = (uint32_t)strtoul( args.OptionArg(), &end, 0 );
|
||||
if( *end ) {
|
||||
|
@ -1299,6 +1303,9 @@ int main(int argc, char* argv[]) {
|
|||
}
|
||||
break;
|
||||
}
|
||||
case OPT_WHITELIST_BINPATH:
|
||||
whitelistBinPaths = args.OptionArg();
|
||||
break;
|
||||
#ifndef TLS_DISABLED
|
||||
case TLSOptions::OPT_TLS_PLUGIN:
|
||||
args.OptionArg();
|
||||
|
@ -1641,7 +1648,8 @@ int main(int argc, char* argv[]) {
|
|||
|
||||
std::vector<std::string> directories = platform::listDirectories( dataFolder );
|
||||
for(int i = 0; i < directories.size(); i++)
|
||||
if( directories[i].size() != 32 && directories[i] != "." && directories[i] != ".." && directories[i] != "backups") {
|
||||
if (directories[i].size() != 32 && directories[i] != "." && directories[i] != ".." &&
|
||||
directories[i] != "backups" && directories[i].find("snap") == std::string::npos) {
|
||||
TraceEvent(SevError, "IncompatibleDirectoryFound").detail("DataFolder", dataFolder).detail("SuspiciousFile", directories[i]);
|
||||
fprintf(stderr, "ERROR: Data folder `%s' had non fdb file `%s'; please use clean, fdb-only folder\n", dataFolder.c_str(), directories[i].c_str());
|
||||
flushAndExit(FDB_EXIT_ERROR);
|
||||
|
@ -1658,12 +1666,85 @@ int main(int argc, char* argv[]) {
|
|||
flushAndExit(FDB_EXIT_ERROR);
|
||||
}
|
||||
|
||||
int isRestoring = 0;
|
||||
if (!restarting) {
|
||||
platform::eraseDirectoryRecursive( dataFolder );
|
||||
platform::createDirectory( dataFolder );
|
||||
}
|
||||
} else {
|
||||
CSimpleIni ini;
|
||||
ini.SetUnicode();
|
||||
std::string absDataFolder = abspath(dataFolder);
|
||||
ini.LoadFile(joinPath(absDataFolder, "restartInfo.ini").c_str());
|
||||
int backupFailed = true;
|
||||
const char* isRestoringStr = ini.GetValue("RESTORE", "isRestoring", NULL);
|
||||
if (isRestoringStr) {
|
||||
isRestoring = atoi(isRestoringStr);
|
||||
const char* backupFailedStr = ini.GetValue("RESTORE", "BackupFailed", NULL);
|
||||
if (isRestoring && backupFailedStr) {
|
||||
backupFailed = atoi(backupFailedStr);
|
||||
}
|
||||
}
|
||||
if (isRestoring && !backupFailed) {
|
||||
std::vector<std::string> returnList;
|
||||
std::string ext = "";
|
||||
returnList = platform::listDirectories(absDataFolder);
|
||||
std::string snapStr = ini.GetValue("RESTORE", "RestoreSnapUID");
|
||||
|
||||
setupAndRun( dataFolder, testFile, restarting, tlsOptions );
|
||||
TraceEvent("RestoringDataFolder").detail("DataFolder", absDataFolder);
|
||||
TraceEvent("RestoreSnapUID").detail("UID", snapStr);
|
||||
|
||||
// delete all files (except fdb.cluster) in non-snap directories
|
||||
for (int i = 0; i < returnList.size(); i++) {
|
||||
if (returnList[i] == "." || returnList[i] == "..") {
|
||||
continue;
|
||||
}
|
||||
if (returnList[i].find(snapStr) != std::string::npos) {
|
||||
continue;
|
||||
}
|
||||
|
||||
std::string childf = absDataFolder + "/" + returnList[i];
|
||||
std::vector<std::string> returnFiles = platform::listFiles(childf, ext);
|
||||
for (int j = 0; j < returnFiles.size(); j++) {
|
||||
if (returnFiles[j] != "fdb.cluster" && returnFiles[j] != "fitness") {
|
||||
TraceEvent("DeletingNonSnapfiles")
|
||||
.detail("FileBeingDeleted", childf + "/" + returnFiles[j]);
|
||||
deleteFile(childf + "/" + returnFiles[j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
// move the contents from snap folder to the original folder,
|
||||
// delete snap folders
|
||||
for (int i = 0; i < returnList.size(); i++) {
|
||||
if (returnList[i] == "." || returnList[i] == "..") {
|
||||
continue;
|
||||
}
|
||||
std::string dirSrc = absDataFolder + "/" + returnList[i];
|
||||
// delete snap directories which are not part of restoreSnapUID
|
||||
if (returnList[i].find(snapStr) == std::string::npos) {
|
||||
if (returnList[i].find("snap") != std::string::npos) {
|
||||
platform::eraseDirectoryRecursive(dirSrc);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
// remove empty/partial snap directories
|
||||
std::vector<std::string> childrenList = platform::listFiles(dirSrc);
|
||||
if (childrenList.size() == 0) {
|
||||
TraceEvent("RemovingEmptySnapDirectory").detail("DirBeingDeleted", dirSrc);
|
||||
platform::eraseDirectoryRecursive(dirSrc);
|
||||
continue;
|
||||
}
|
||||
std::string origDir = returnList[i].substr(0, 32);
|
||||
std::string dirToRemove = absDataFolder + "/" + origDir;
|
||||
TraceEvent("DeletingOriginalNonSnapDirectory").detail("FileBeingDeleted", dirToRemove);
|
||||
platform::eraseDirectoryRecursive(dirToRemove);
|
||||
renameFile(dirSrc, dirToRemove);
|
||||
TraceEvent("RenamingSnapToOriginalDirectory")
|
||||
.detail("Oldname", dirSrc)
|
||||
.detail("Newname", dirToRemove);
|
||||
}
|
||||
}
|
||||
}
|
||||
setupAndRun( dataFolder, testFile, restarting, (isRestoring >= 1), whitelistBinPaths, tlsOptions);
|
||||
g_simulator.run();
|
||||
} else if (role == FDBD) {
|
||||
ASSERT( connectionFile );
|
||||
|
@ -1674,7 +1755,7 @@ int main(int argc, char* argv[]) {
|
|||
dataFolder = format("fdb/%d/", publicAddresses.address.port); // SOMEDAY: Better default
|
||||
|
||||
vector<Future<Void>> actors(listenErrors.begin(), listenErrors.end());
|
||||
actors.push_back( fdbd(connectionFile, localities, processClass, dataFolder, dataFolder, storageMemLimit, metricsConnFile, metricsPrefix, rsssize) );
|
||||
actors.push_back( fdbd(connectionFile, localities, processClass, dataFolder, dataFolder, storageMemLimit, metricsConnFile, metricsPrefix, rsssize, whitelistBinPaths) );
|
||||
//actors.push_back( recurring( []{}, .001 ) ); // for ASIO latency measurement
|
||||
|
||||
f = stopAfter( waitForAll(actors) );
|
||||
|
|
|
@ -47,6 +47,7 @@
|
|||
<ActorCompiler Include="KeyValueStoreCompressTestData.actor.cpp" />
|
||||
<ActorCompiler Include="IndirectShadowPager.actor.cpp" />
|
||||
<ClCompile Include="Knobs.cpp" />
|
||||
<ActorCompiler Include="FDBExecHelper.actor.cpp" />
|
||||
<ActorCompiler Include="QuietDatabase.actor.cpp" />
|
||||
<ActorCompiler Include="networktest.actor.cpp" />
|
||||
<ActorCompiler Include="workloads\Unreadable.actor.cpp" />
|
||||
|
@ -152,6 +153,7 @@
|
|||
<ActorCompiler Include="workloads\VersionStamp.actor.cpp" />
|
||||
<ActorCompiler Include="workloads\Serializability.actor.cpp" />
|
||||
<ActorCompiler Include="workloads\DiskDurability.actor.cpp" />
|
||||
<ActorCompiler Include="workloads\SnapTest.actor.cpp" />
|
||||
<ActorCompiler Include="workloads\Mako.actor.cpp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
|
@ -166,6 +168,9 @@
|
|||
</ActorCompiler>
|
||||
<ClInclude Include="DataDistributorInterface.h" />
|
||||
<ClInclude Include="DBCoreState.h" />
|
||||
<ActorCompiler Include="FDBExecHelper.actor.h">
|
||||
<EnableCompile>false</EnableCompile>
|
||||
</ActorCompiler>
|
||||
<ClInclude Include="IDiskQueue.h" />
|
||||
<ClInclude Include="IKeyValueStore.h" />
|
||||
<ClInclude Include="IndirectShadowPager.h" />
|
||||
|
|
|
@ -49,6 +49,7 @@
|
|||
#include "fdbserver/RecoveryState.h"
|
||||
#include "fdbserver/LogProtocolMessage.h"
|
||||
#include "fdbserver/LatencyBandConfig.h"
|
||||
#include "fdbserver/FDBExecHelper.actor.h"
|
||||
#include "flow/TDMetric.actor.h"
|
||||
#include "flow/actorcompiler.h" // This must be the last #include.
|
||||
|
||||
|
@ -1834,14 +1835,17 @@ void addMutation( Reference<T>& target, Version version, MutationRef const& muta
|
|||
}
|
||||
|
||||
template <class T>
|
||||
void splitMutations( KeyRangeMap<T>& map, VerUpdateRef const& update ) {
|
||||
for(auto& m : update.mutations) {
|
||||
splitMutation(map, m, update.version);
|
||||
void splitMutations(StorageServer* data, KeyRangeMap<T>& map, VerUpdateRef const& update, vector<int>& execIndex) {
|
||||
for(int i = 0; i < update.mutations.size(); i++) {
|
||||
splitMutation(data, map, update.mutations[i], update.version);
|
||||
if (update.mutations[i].type == MutationRef::Exec) {
|
||||
execIndex.push_back(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void splitMutation( KeyRangeMap<T>& map, MutationRef const& m, Version ver ) {
|
||||
void splitMutation(StorageServer* data, KeyRangeMap<T>& map, MutationRef const& m, Version ver) {
|
||||
if(isSingleKeyMutation((MutationRef::Type) m.type)) {
|
||||
if ( !SHORT_CIRCUT_ACTUAL_STORAGE || !normalKeys.contains(m.param1) )
|
||||
addMutation( map.rangeContaining(m.param1)->value(), ver, m );
|
||||
|
@ -1855,11 +1859,53 @@ void splitMutation( KeyRangeMap<T>& map, MutationRef const& m, Version ver ) {
|
|||
addMutation( i->value(), ver, MutationRef((MutationRef::Type)m.type, k.begin, k.end) );
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
} else if (m.type == MutationRef::Exec) {
|
||||
} else
|
||||
ASSERT(false); // Unknown mutation type in splitMutations
|
||||
}
|
||||
|
||||
ACTOR Future<Void>
|
||||
snapHelper(StorageServer* data, MutationRef m, Version ver)
|
||||
{
|
||||
state std::string cmd = m.param1.toString();
|
||||
if ((cmd == execDisableTLogPop) || (cmd == execEnableTLogPop)) {
|
||||
TraceEvent("IgnoreNonSnapCommands").detail("ExecCommand", cmd);
|
||||
return Void();
|
||||
}
|
||||
|
||||
state ExecCmdValueString execArg(m.param2);
|
||||
state StringRef uidStr = execArg.getBinaryArgValue(LiteralStringRef("uid"));
|
||||
state int err = 0;
|
||||
state Future<int> cmdErr;
|
||||
state UID execUID = UID::fromString(uidStr.toString());
|
||||
state bool skip = false;
|
||||
if (cmd == execSnap && isTLogInSameNode()) {
|
||||
skip = true;
|
||||
}
|
||||
// other storage has initiated the exec, so we can skip
|
||||
if (!skip && isExecOpInProgress(execUID)) {
|
||||
skip = true;
|
||||
}
|
||||
|
||||
if (!skip) {
|
||||
setExecOpInProgress(execUID);
|
||||
int err = wait(execHelper(&execArg, data->folder, "role=storage"));
|
||||
clearExecOpInProgress(execUID);
|
||||
}
|
||||
TraceEvent te = TraceEvent("ExecTraceStorage");
|
||||
te.detail("Uid", uidStr.toString());
|
||||
te.detail("Status", err);
|
||||
te.detail("Role", "storage");
|
||||
te.detail("Version", ver);
|
||||
te.detail("Mutation", m.toString());
|
||||
te.detail("Mid", data->thisServerID.toString());
|
||||
te.detail("DurableVersion", data->durableVersion.get());
|
||||
te.detail("DataVersion", data->version.get());
|
||||
te.detail("Tag", data->tag.toString());
|
||||
te.detail("SnapCreateSkipped", skip);
|
||||
return Void();
|
||||
}
|
||||
|
||||
ACTOR Future<Void> fetchKeys( StorageServer *data, AddingShard* shard ) {
|
||||
state TraceInterval interval("FetchKeys");
|
||||
state KeyRange keys = shard->keys;
|
||||
|
@ -1967,21 +2013,29 @@ ACTOR Future<Void> fetchKeys( StorageServer *data, AddingShard* shard ) {
|
|||
if (this_block.more) {
|
||||
Key nfk = this_block.readThrough.present() ? this_block.readThrough.get() : keyAfter( this_block.end()[-1].key );
|
||||
if (nfk != keys.end) {
|
||||
std::deque< Standalone<VerUpdateRef> > updatesToSplit = std::move( shard->updates );
|
||||
state std::deque< Standalone<VerUpdateRef> > updatesToSplit = std::move( shard->updates );
|
||||
|
||||
// This actor finishes committing the keys [keys.begin,nfk) that we already fetched.
|
||||
// The remaining unfetched keys [nfk,keys.end) will become a separate AddingShard with its own fetchKeys.
|
||||
shard->server->addShard( ShardInfo::addingSplitLeft( KeyRangeRef(keys.begin, nfk), shard ) );
|
||||
shard->server->addShard( ShardInfo::newAdding( data, KeyRangeRef(nfk, keys.end) ) );
|
||||
shard = data->shards.rangeContaining( keys.begin ).value()->adding;
|
||||
auto otherShard = data->shards.rangeContaining( nfk ).value()->adding;
|
||||
state AddingShard* otherShard = data->shards.rangeContaining( nfk ).value()->adding;
|
||||
keys = shard->keys;
|
||||
|
||||
// Split our prior updates. The ones that apply to our new, restricted key range will go back into shard->updates,
|
||||
// and the ones delivered to the new shard will be discarded because it is in WaitPrevious phase (hasn't chosen a fetchVersion yet).
|
||||
// What we are doing here is expensive and could get more expensive if we started having many more blocks per shard. May need optimization in the future.
|
||||
for(auto u = updatesToSplit.begin(); u != updatesToSplit.end(); ++u)
|
||||
splitMutations( data->shards, *u );
|
||||
state vector<int> execIdxVec;
|
||||
state std::deque< Standalone<VerUpdateRef> >::iterator u = updatesToSplit.begin();
|
||||
for(; u != updatesToSplit.end(); ++u) {
|
||||
ASSERT(execIdxVec.size() == 0);
|
||||
splitMutations(data, data->shards, *u, execIdxVec);
|
||||
for (auto execIdx : execIdxVec) {
|
||||
wait(snapHelper(data, u->mutations[execIdx], u->version));
|
||||
}
|
||||
execIdxVec.clear();
|
||||
}
|
||||
|
||||
TEST( true );
|
||||
TEST( shard->updates.size() );
|
||||
|
@ -2173,7 +2227,8 @@ void ShardInfo::addMutation(Version version, MutationRef const& mutation) {
|
|||
adding->addMutation(version, mutation);
|
||||
else if (readWrite)
|
||||
readWrite->addMutation(version, mutation, this->keys, readWrite->updateEagerReads);
|
||||
else if (mutation.type != MutationRef::ClearRange) {
|
||||
else if ((mutation.type != MutationRef::ClearRange)
|
||||
&& (mutation.type != MutationRef::Exec)) {
|
||||
TraceEvent(SevError, "DeliveredToNotAssigned").detail("Version", version).detail("Mutation", mutation.toString());
|
||||
ASSERT(false); // Mutation delivered to notAssigned shard!
|
||||
}
|
||||
|
@ -2382,7 +2437,7 @@ public:
|
|||
// debugMutation("SSUpdateMutation", changes[c].version, *m);
|
||||
//}
|
||||
|
||||
splitMutation( data->shards, m, ver );
|
||||
splitMutation(data, data->shards, m, ver);
|
||||
}
|
||||
|
||||
if (data->otherError.getFuture().isReady()) data->otherError.getFuture().get();
|
||||
|
@ -2588,6 +2643,9 @@ ACTOR Future<Void> update( StorageServer* data, bool* pReceivedUpdate )
|
|||
state VerUpdateRef* pUpdate = &fii.changes[changeNum];
|
||||
for(; mutationNum < pUpdate->mutations.size(); mutationNum++) {
|
||||
updater.applyMutation(data, pUpdate->mutations[mutationNum], pUpdate->version);
|
||||
if (pUpdate->mutations[mutationNum].type == MutationRef::Exec) {
|
||||
wait(snapHelper(data, pUpdate->mutations[mutationNum], pUpdate->version));
|
||||
}
|
||||
mutationBytes += pUpdate->mutations[mutationNum].totalSize();
|
||||
injectedChanges = true;
|
||||
if(mutationBytes > SERVER_KNOBS->DESIRED_UPDATE_BYTES) {
|
||||
|
@ -2660,6 +2718,9 @@ ACTOR Future<Void> update( StorageServer* data, bool* pReceivedUpdate )
|
|||
++data->counters.atomicMutations;
|
||||
break;
|
||||
}
|
||||
if (msg.type == MutationRef::Exec) {
|
||||
wait(snapHelper(data, msg, ver));
|
||||
}
|
||||
}
|
||||
else
|
||||
TraceEvent(SevError, "DiscardingPeekedData", data->thisServerID).detail("Mutation", msg.toString()).detail("Version", cloneCursor2->version().toString());
|
||||
|
|
|
@ -35,6 +35,7 @@
|
|||
#include "fdbserver/ClusterRecruitmentInterface.h"
|
||||
#include "fdbserver/DataDistributorInterface.h"
|
||||
#include "fdbserver/ServerDBInfo.h"
|
||||
#include "fdbserver/FDBExecHelper.actor.h"
|
||||
#include "fdbserver/CoordinationInterface.h"
|
||||
#include "fdbclient/FailureMonitorClient.h"
|
||||
#include "fdbclient/MonitorLeader.h"
|
||||
|
@ -66,6 +67,7 @@ extern IKeyValueStore* keyValueStoreCompressTestData(IKeyValueStore* store);
|
|||
# define KV_STORE(filename,uid) keyValueStoreMemory(filename,uid)
|
||||
#endif
|
||||
|
||||
|
||||
ACTOR static Future<Void> extractClientInfo( Reference<AsyncVar<ServerDBInfo>> db, Reference<AsyncVar<ClientDBInfo>> info ) {
|
||||
loop {
|
||||
info->set( db->get().client );
|
||||
|
@ -229,6 +231,7 @@ std::string filenameFromId( KeyValueStoreType storeType, std::string folder, std
|
|||
UNREACHABLE();
|
||||
}
|
||||
|
||||
|
||||
struct TLogOptions {
|
||||
TLogOptions() = default;
|
||||
TLogOptions( TLogVersion v, TLogSpillType s ) : version(v), spillType(s) {}
|
||||
|
@ -696,7 +699,8 @@ ACTOR Future<Void> workerServer(
|
|||
Reference<AsyncVar<ClusterControllerPriorityInfo>> asyncPriorityInfo,
|
||||
ProcessClass initialClass, std::string folder, int64_t memoryLimit,
|
||||
std::string metricsConnFile, std::string metricsPrefix,
|
||||
Promise<Void> recoveredDiskFiles, int64_t memoryProfileThreshold) {
|
||||
Promise<Void> recoveredDiskFiles, int64_t memoryProfileThreshold,
|
||||
std::string _coordFolder, std::string whitelistBinPaths) {
|
||||
state PromiseStream< ErrorInfo > errors;
|
||||
state Reference<AsyncVar<Optional<DataDistributorInterface>>> ddInterf( new AsyncVar<Optional<DataDistributorInterface>>() );
|
||||
state Reference<AsyncVar<Optional<RatekeeperInterface>>> rkInterf( new AsyncVar<Optional<RatekeeperInterface>>() );
|
||||
|
@ -717,6 +721,7 @@ ACTOR Future<Void> workerServer(
|
|||
// here is no, so that when running with log_version==3, all files should say V=3.
|
||||
state std::map<std::tuple<TLogVersion, KeyValueStoreType::StoreType, TLogSpillType>,
|
||||
std::pair<Future<Void>, PromiseStream<InitializeTLogRequest>>> sharedLogs;
|
||||
state std::string coordFolder = abspath(_coordFolder);
|
||||
|
||||
state WorkerInterface interf( locality );
|
||||
|
||||
|
@ -832,7 +837,7 @@ ACTOR Future<Void> workerServer(
|
|||
auto& logData = sharedLogs[std::make_tuple(s.tLogOptions.version, s.storeType, s.tLogOptions.spillType)];
|
||||
// FIXME: Shouldn't if logData.first isValid && !isReady, shouldn't we
|
||||
// be sending a fake InitializeTLogRequest rather than calling tLog() ?
|
||||
Future<Void> tl = tLogFn( kv, queue, dbInfo, locality, !logData.first.isValid() || logData.first.isReady() ? logData.second : PromiseStream<InitializeTLogRequest>(), s.storeID, true, oldLog, recovery, degraded );
|
||||
Future<Void> tl = tLogFn( kv, queue, dbInfo, locality, !logData.first.isValid() || logData.first.isReady() ? logData.second : PromiseStream<InitializeTLogRequest>(), s.storeID, true, oldLog, recovery, folder, degraded );
|
||||
recoveries.push_back(recovery.getFuture());
|
||||
|
||||
tl = handleIOErrors( tl, kv, s.storeID );
|
||||
|
@ -989,7 +994,7 @@ ACTOR Future<Void> workerServer(
|
|||
filesClosed.add( data->onClosed() );
|
||||
filesClosed.add( queue->onClosed() );
|
||||
|
||||
logData.first = tLogFn( data, queue, dbInfo, locality, logData.second, logId, false, Promise<Void>(), Promise<Void>(), degraded );
|
||||
logData.first = tLogFn( data, queue, dbInfo, locality, logData.second, logId, false, Promise<Void>(), Promise<Void>(), folder, degraded );
|
||||
logData.first = handleIOErrors( logData.first, data, logId );
|
||||
logData.first = handleIOErrors( logData.first, queue, logId );
|
||||
errorForwarders.add( forwardError( errors, Role::SHARED_TRANSACTION_LOG, logId, logData.first ) );
|
||||
|
@ -1053,7 +1058,7 @@ ACTOR Future<Void> workerServer(
|
|||
|
||||
//printf("Recruited as masterProxyServer\n");
|
||||
errorForwarders.add( zombie(recruited, forwardError( errors, Role::MASTER_PROXY, recruited.id(),
|
||||
masterProxyServer( recruited, req, dbInfo ) ) ) );
|
||||
masterProxyServer( recruited, req, dbInfo, whitelistBinPaths ) ) ) );
|
||||
req.reply.send(recruited);
|
||||
}
|
||||
when( InitializeResolverRequest req = waitNext(interf.resolver.getFuture()) ) {
|
||||
|
@ -1166,6 +1171,25 @@ ACTOR Future<Void> workerServer(
|
|||
systemMonitor();
|
||||
loggingTrigger = delay( loggingDelay, TaskFlushTrace );
|
||||
}
|
||||
when(state ExecuteRequest req = waitNext(interf.execReq.getFuture())) {
|
||||
state ExecCmdValueString execArg(req.execPayload);
|
||||
try {
|
||||
int err = wait(execHelper(&execArg, coordFolder, "role=coordinator"));
|
||||
StringRef uidStr = execArg.getBinaryArgValue(LiteralStringRef("uid"));
|
||||
auto tokenStr = "ExecTrace/Coordinators/" + uidStr.toString();
|
||||
auto te = TraceEvent("ExecTraceCoordinators");
|
||||
te.detail("Uid", uidStr.toString());
|
||||
te.detail("Status", err);
|
||||
te.detail("Role", "coordinator");
|
||||
te.detail("Value", coordFolder);
|
||||
te.detail("ExecPayload", execArg.getCmdValueString().toString());
|
||||
te.trackLatest(tokenStr.c_str());
|
||||
req.reply.send(Void());
|
||||
} catch (Error& e) {
|
||||
TraceEvent("ExecHelperError").error(e);
|
||||
req.reply.sendError(broken_promise());
|
||||
}
|
||||
}
|
||||
when( wait( errorForwarders.getResult() ) ) {}
|
||||
when( wait( handleErrors ) ) {}
|
||||
}
|
||||
|
@ -1317,12 +1341,16 @@ ACTOR Future<Void> fdbd(
|
|||
int64_t memoryLimit,
|
||||
std::string metricsConnFile,
|
||||
std::string metricsPrefix,
|
||||
int64_t memoryProfileThreshold)
|
||||
int64_t memoryProfileThreshold,
|
||||
std::string whitelistBinPaths)
|
||||
{
|
||||
try {
|
||||
|
||||
ServerCoordinators coordinators( connFile );
|
||||
TraceEvent("StartingFDBD").detail("ZoneID", localities.zoneId()).detail("MachineId", localities.machineId()).detail("DiskPath", dataFolder).detail("CoordPath", coordFolder);
|
||||
if (g_network->isSimulated()) {
|
||||
whitelistBinPaths = ",, random_path, /bin/snap_create.sh,,";
|
||||
}
|
||||
TraceEvent("StartingFDBD").detail("ZoneID", localities.zoneId()).detail("MachineId", localities.machineId()).detail("DiskPath", dataFolder).detail("CoordPath", coordFolder).detail("WhiteListBinPath", whitelistBinPaths);
|
||||
|
||||
// SOMEDAY: start the services on the machine in a staggered fashion in simulation?
|
||||
state vector<Future<Void>> v;
|
||||
|
@ -1344,7 +1372,7 @@ ACTOR Future<Void> fdbd(
|
|||
v.push_back( reportErrors( processClass == ProcessClass::TesterClass ? monitorLeader( connFile, cc ) : clusterController( connFile, cc , asyncPriorityInfo, recoveredDiskFiles.getFuture(), localities ), "ClusterController") );
|
||||
v.push_back( reportErrors(extractClusterInterface( cc, ci ), "ExtractClusterInterface") );
|
||||
v.push_back( reportErrors(failureMonitorClient( ci, true ), "FailureMonitorClient") );
|
||||
v.push_back( reportErrorsExcept(workerServer(connFile, cc, localities, asyncPriorityInfo, processClass, dataFolder, memoryLimit, metricsConnFile, metricsPrefix, recoveredDiskFiles, memoryProfileThreshold), "WorkerServer", UID(), &normalWorkerErrors()) );
|
||||
v.push_back( reportErrorsExcept(workerServer(connFile, cc, localities, asyncPriorityInfo, processClass, dataFolder, memoryLimit, metricsConnFile, metricsPrefix, recoveredDiskFiles, memoryProfileThreshold, coordFolder, whitelistBinPaths), "WorkerServer", UID(), &normalWorkerErrors()) );
|
||||
state Future<Void> firstConnect = reportErrors( printOnFirstConnected(ci), "ClusterFirstConnectedError" );
|
||||
|
||||
wait( quorum(v,1) );
|
||||
|
|
|
@ -102,13 +102,13 @@ struct CycleWorkload : TestWorkload {
|
|||
try {
|
||||
// Reverse next and next^2 node
|
||||
Optional<Value> v = wait( tr.get( self->key(r) ) );
|
||||
if (!v.present()) self->badRead("r", r, tr);
|
||||
if (!v.present()) self->badRead("KeyR", r, tr);
|
||||
state int r2 = self->fromValue(v.get());
|
||||
Optional<Value> v2 = wait( tr.get( self->key(r2) ) );
|
||||
if (!v2.present()) self->badRead("r2", r2, tr);
|
||||
if (!v2.present()) self->badRead("KeyR2", r2, tr);
|
||||
state int r3 = self->fromValue(v2.get());
|
||||
Optional<Value> v3 = wait( tr.get( self->key(r3) ) );
|
||||
if (!v3.present()) self->badRead("r3", r3, tr);
|
||||
if (!v3.present()) self->badRead("KeyR3", r3, tr);
|
||||
int r4 = self->fromValue(v3.get());
|
||||
|
||||
tr.clear( self->key(r) ); //< Shouldn't have an effect, but will break with wrong ordering
|
||||
|
|
|
@ -34,12 +34,14 @@ struct SaveAndKillWorkload : TestWorkload {
|
|||
|
||||
std::string restartInfo;
|
||||
double testDuration;
|
||||
int isRestoring;
|
||||
|
||||
SaveAndKillWorkload(WorkloadContext const& wcx)
|
||||
: TestWorkload(wcx)
|
||||
{
|
||||
restartInfo = getOption( options, LiteralStringRef("restartInfoLocation"), LiteralStringRef("simfdb/restartInfo.ini") ).toString();
|
||||
testDuration = getOption( options, LiteralStringRef("testDuration"), 10.0 );
|
||||
isRestoring = getOption( options, LiteralStringRef("isRestoring"), 0 );
|
||||
}
|
||||
|
||||
virtual std::string description() { return "SaveAndKillWorkload"; }
|
||||
|
@ -59,6 +61,7 @@ struct SaveAndKillWorkload : TestWorkload {
|
|||
ini.SetUnicode();
|
||||
ini.LoadFile(self->restartInfo.c_str());
|
||||
|
||||
ini.SetValue("RESTORE", "isRestoring", format("%d", self->isRestoring).c_str());
|
||||
ini.SetValue("META", "processesPerMachine", format("%d", g_simulator.processesPerMachine).c_str());
|
||||
ini.SetValue("META", "listenersPerProcess", format("%d", g_simulator.listenersPerProcess).c_str());
|
||||
ini.SetValue("META", "desiredCoordinators", format("%d", g_simulator.desiredCoordinators).c_str());
|
||||
|
|
|
@ -0,0 +1,367 @@
|
|||
#include <boost/lexical_cast.hpp>
|
||||
#include "fdbclient/ManagementAPI.actor.h"
|
||||
#include "fdbclient/NativeAPI.actor.h"
|
||||
#include "fdbclient/ReadYourWrites.h"
|
||||
#include "fdbrpc/ContinuousSample.h"
|
||||
#include "fdbmonitor/SimpleIni.h"
|
||||
#include "fdbserver/ClusterRecruitmentInterface.h"
|
||||
#include "fdbserver/Status.h"
|
||||
#include "fdbserver/TesterInterface.actor.h"
|
||||
#include "fdbserver/WorkerInterface.actor.h"
|
||||
#include "fdbserver/workloads/BulkSetup.actor.h"
|
||||
#include "fdbserver/workloads/workloads.actor.h"
|
||||
#include "flow/actorcompiler.h"
|
||||
|
||||
void getVersionAndnumTags(TraceEventFields md, Version& version, int& numTags) {
|
||||
version = -1;
|
||||
numTags = -1;
|
||||
|
||||
version = boost::lexical_cast<int64_t>(md.getValue("Version"));
|
||||
numTags = boost::lexical_cast<int>(md.getValue("NumTags"));
|
||||
}
|
||||
|
||||
void getTagAndDurableVersion(TraceEventFields md, Version version, Tag& tag, Version& durableVersion) {
|
||||
Version verifyVersion;
|
||||
durableVersion = -1;
|
||||
|
||||
verifyVersion = boost::lexical_cast<int64_t>(md.getValue("Version"));
|
||||
std::string tagString = md.getValue("Tag");
|
||||
int colon = tagString.find_first_of(':');
|
||||
std::string localityString = tagString.substr(0, colon);
|
||||
std::string idString = tagString.substr(colon + 1);
|
||||
tag.locality = boost::lexical_cast<int>(localityString);
|
||||
tag.id = boost::lexical_cast<int>(idString);
|
||||
|
||||
durableVersion = boost::lexical_cast<int64_t>(md.getValue("DurableVersion"));
|
||||
}
|
||||
|
||||
void getMinAndMaxTLogVersions(TraceEventFields md, Version version, Tag tag, Version& minTLogVersion,
|
||||
Version& maxTLogVersion) {
|
||||
Version verifyVersion;
|
||||
Tag verifyTag;
|
||||
minTLogVersion = maxTLogVersion = -1;
|
||||
|
||||
verifyVersion = boost::lexical_cast<int64_t>(md.getValue("Version"));
|
||||
std::string tagString = md.getValue("Tag");
|
||||
int colon = tagString.find_first_of(':');
|
||||
std::string localityString = tagString.substr(0, colon);
|
||||
std::string idString = tagString.substr(colon + 1);
|
||||
verifyTag.locality = boost::lexical_cast<int>(localityString);
|
||||
verifyTag.id = boost::lexical_cast<int>(idString);
|
||||
if (tag != verifyTag) {
|
||||
return;
|
||||
}
|
||||
minTLogVersion = boost::lexical_cast<int64_t>(md.getValue("PoppedTagVersion"));
|
||||
maxTLogVersion = boost::lexical_cast<int64_t>(md.getValue("QueueCommittedVersion"));
|
||||
}
|
||||
|
||||
void filterEmptyMessages(std::vector<Future<TraceEventFields>>& messages) {
|
||||
messages.erase(std::remove_if(messages.begin(), messages.end(),
|
||||
[](Future<TraceEventFields>const & msgFuture)
|
||||
{
|
||||
return !msgFuture.isReady() || msgFuture.get().size() == 0;
|
||||
}
|
||||
), messages.end());
|
||||
return;
|
||||
}
|
||||
|
||||
void printMessages(std::vector<Future<TraceEventFields>>& messages) {
|
||||
for (int i = 0; i < messages.size(); i++) {
|
||||
TraceEvent("SnapTestMessages").detail("I", i).detail("Value", messages[i].get().toString());
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
struct SnapTestWorkload : TestWorkload {
|
||||
public: // variables
|
||||
int numSnaps; // num of snapshots to be taken
|
||||
// FIXME: currently validation works on numSnap = 1
|
||||
double maxSnapDelay; // max delay before which a snapshot will be taken
|
||||
int testID; // test id
|
||||
UID snapUID; // UID used for snap name
|
||||
std::string restartInfoLocation; // file location to store the snap restore info
|
||||
int maxRetryCntToRetrieveMessage; // number of retires to do trackLatest
|
||||
bool skipCheck; // disable check if the exec fails
|
||||
|
||||
public: // ctor & dtor
|
||||
SnapTestWorkload(WorkloadContext const& wcx)
|
||||
: TestWorkload(wcx), numSnaps(0), maxSnapDelay(0.0), testID(0), snapUID() {
|
||||
TraceEvent("SnapTestWorkload Constructor");
|
||||
std::string workloadName = "SnapTest";
|
||||
maxRetryCntToRetrieveMessage = 10;
|
||||
|
||||
numSnaps = getOption(options, LiteralStringRef("numSnaps"), 0);
|
||||
maxSnapDelay = getOption(options, LiteralStringRef("maxSnapDelay"), 25.0);
|
||||
testID = getOption(options, LiteralStringRef("testID"), 0);
|
||||
restartInfoLocation =
|
||||
getOption(options, LiteralStringRef("restartInfoLocation"), LiteralStringRef("simfdb/restartInfo.ini"))
|
||||
.toString();
|
||||
skipCheck = false;
|
||||
}
|
||||
|
||||
public: // workload functions
|
||||
std::string description() override { return "SnapTest"; }
|
||||
Future<Void> setup(Database const& cx) override {
|
||||
TraceEvent("SnapTestWorkloadSetup");
|
||||
return Void();
|
||||
}
|
||||
Future<Void> start(Database const& cx) override {
|
||||
TraceEvent("SnapTestWorkloadStart");
|
||||
if (clientId == 0) {
|
||||
return _start(cx, this);
|
||||
}
|
||||
return Void();
|
||||
}
|
||||
|
||||
ACTOR Future<bool> _check(Database cx, SnapTestWorkload* self) {
|
||||
if (self->skipCheck) {
|
||||
TraceEvent(SevWarnAlways, "SnapCheckIgnored");
|
||||
return true;
|
||||
}
|
||||
state Transaction tr(cx);
|
||||
// read the key SnapFailedTLog.$UID
|
||||
loop {
|
||||
try {
|
||||
Standalone<StringRef> keyStr = snapTestFailStatus.withSuffix(StringRef(self->snapUID.toString()));
|
||||
TraceEvent("TestKeyStr").detail("Value", keyStr);
|
||||
tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||
Optional<Value> val = wait(tr.get(keyStr));
|
||||
if (val.present()) {
|
||||
break;
|
||||
}
|
||||
// wait for the key to be written out by TLogs
|
||||
wait(delay(0.1));
|
||||
} catch (Error &e) {
|
||||
wait(tr.onError(e));
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
Future<bool> check(Database const& cx) override {
|
||||
TraceEvent("SnapTestWorkloadCheck").detail("ClientID", clientId);
|
||||
if (clientId != 0) {
|
||||
return true;
|
||||
}
|
||||
if (this->testID != 5 && this->testID != 6) {
|
||||
return true;
|
||||
}
|
||||
return _check(cx, this);
|
||||
}
|
||||
|
||||
void getMetrics(vector<PerfMetric>& m) override { TraceEvent("SnapTestWorkloadGetMetrics"); }
|
||||
|
||||
ACTOR Future<Void> snapExecHelper(SnapTestWorkload* self, Database cx, StringRef keyRef, StringRef valueRef) {
|
||||
state Transaction tr(cx);
|
||||
loop {
|
||||
try {
|
||||
tr.execute(keyRef, valueRef);
|
||||
wait(tr.commit());
|
||||
break;
|
||||
} catch (Error& e) {
|
||||
try {
|
||||
wait(tr.onError(e));
|
||||
} catch (Error& e) {
|
||||
if (e.code() == error_code_cluster_not_fully_recovered
|
||||
|| e.code() == error_code_txn_exec_log_anti_quorum) {
|
||||
TraceEvent(SevWarnAlways, "ClusterNotFullyRecovered");
|
||||
self->skipCheck = true;
|
||||
break;
|
||||
}
|
||||
throw;
|
||||
}
|
||||
}
|
||||
}
|
||||
return Void();
|
||||
}
|
||||
|
||||
ACTOR Future<Void> _create_keys(Database cx, std::string prefix, bool even = true) {
|
||||
state Transaction tr(cx);
|
||||
state vector<int64_t> keys;
|
||||
|
||||
for (int i = 0; i < 1000; i++) {
|
||||
keys.push_back(deterministicRandom()->randomInt64(0, INT64_MAX - 2));
|
||||
}
|
||||
|
||||
state int retry = 0;
|
||||
tr.reset();
|
||||
loop {
|
||||
try {
|
||||
for (auto id : keys) {
|
||||
if (even) {
|
||||
if (id % 2 != 0) {
|
||||
id++;
|
||||
}
|
||||
} else {
|
||||
if (id % 2 == 0) {
|
||||
id++;
|
||||
}
|
||||
}
|
||||
std::string Key1 = prefix + std::to_string(id);
|
||||
Key key1Ref(Key1);
|
||||
std::string Val1 = std::to_string(id);
|
||||
Value val1Ref(Val1);
|
||||
tr.set(key1Ref, val1Ref, false);
|
||||
}
|
||||
wait(tr.commit());
|
||||
break;
|
||||
} catch (Error& e) {
|
||||
wait(tr.onError(e));
|
||||
}
|
||||
}
|
||||
return Void();
|
||||
}
|
||||
|
||||
ACTOR Future<Void> _start(Database cx, SnapTestWorkload* self) {
|
||||
state Transaction tr(cx);
|
||||
|
||||
if (self->testID == 0) {
|
||||
// create even keys before the snapshot
|
||||
wait(self->_create_keys(cx, "snapKey"));
|
||||
} else if (self->testID == 1) {
|
||||
// create a snapshot
|
||||
state double toDelay = fmod(deterministicRandom()->randomUInt32(), self->maxSnapDelay);
|
||||
TraceEvent("ToDelay").detail("Value", toDelay);
|
||||
ASSERT(toDelay < self->maxSnapDelay);
|
||||
wait(delay(toDelay));
|
||||
|
||||
state int retry = 0;
|
||||
state bool snapFailed = false;
|
||||
loop {
|
||||
self->snapUID = deterministicRandom()->randomUniqueID();
|
||||
try {
|
||||
StringRef snapCmdRef = LiteralStringRef("/bin/snap_create.sh");
|
||||
Future<Void> status = snapCreate(cx, snapCmdRef, self->snapUID);
|
||||
wait(status);
|
||||
break;
|
||||
} catch (Error& e) {
|
||||
if (e.code() == error_code_cluster_not_fully_recovered ||
|
||||
e.code() == error_code_txn_exec_log_anti_quorum) {
|
||||
++retry;
|
||||
if (retry > 3) {
|
||||
snapFailed = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
CSimpleIni ini;
|
||||
ini.SetUnicode();
|
||||
ini.LoadFile(self->restartInfoLocation.c_str());
|
||||
std::string uidStr = self->snapUID.toString();
|
||||
ini.SetValue("RESTORE", "RestoreSnapUID", uidStr.c_str());
|
||||
ini.SetValue("RESTORE", "BackupFailed", format("%d", snapFailed).c_str());
|
||||
ini.SaveFile(self->restartInfoLocation.c_str());
|
||||
// write the snapUID to a file
|
||||
TraceEvent("SnapshotCreateStatus").detail("Status", !snapFailed ? "Success" : "Failure");
|
||||
} else if (self->testID == 2) {
|
||||
// create odd keys after the snapshot
|
||||
wait(self->_create_keys(cx, "snapKey", false /*even*/));
|
||||
} else if (self->testID == 3) {
|
||||
CSimpleIni ini;
|
||||
ini.SetUnicode();
|
||||
ini.LoadFile(self->restartInfoLocation.c_str());
|
||||
bool backupFailed = atoi(ini.GetValue("RESTORE", "BackupFailed"));
|
||||
if (backupFailed) {
|
||||
// since backup failed, skip the restore checking
|
||||
TraceEvent(SevWarnAlways, "BackupFailedSkippingRestoreCheck");
|
||||
return Void();
|
||||
}
|
||||
state KeySelector begin = firstGreaterOrEqual(normalKeys.begin);
|
||||
state KeySelector end = firstGreaterOrEqual(normalKeys.end);
|
||||
state int cnt = 0;
|
||||
// read the entire normalKeys range and look at keys prefixed
|
||||
// with snapKeys 1) validate that all key ids are even ie -
|
||||
// created before snap 2) values are same as the key id 3) # of
|
||||
// keys adds up to the total keys created before snap
|
||||
tr.reset();
|
||||
loop {
|
||||
try {
|
||||
Standalone<RangeResultRef> kvRange = wait(tr.getRange(begin, end, 1000));
|
||||
if (!kvRange.more && kvRange.size() == 0) {
|
||||
TraceEvent("SnapTestNoMoreEntries");
|
||||
break;
|
||||
}
|
||||
|
||||
for (int i = 0; i < kvRange.size(); i++) {
|
||||
if (kvRange[i].key.startsWith(LiteralStringRef("snapKey"))) {
|
||||
std::string tmp1 = kvRange[i].key.substr(7).toString();
|
||||
int64_t id = strtol(tmp1.c_str(), nullptr, 0);
|
||||
if (id % 2 != 0) {
|
||||
throw operation_failed();
|
||||
}
|
||||
++cnt;
|
||||
std::string tmp2 = kvRange[i].value.toString();
|
||||
int64_t value = strtol(tmp2.c_str(), nullptr, 0);
|
||||
if (id != value) {
|
||||
throw operation_failed();
|
||||
}
|
||||
}
|
||||
}
|
||||
begin = firstGreaterThan(kvRange.end()[-1].key);
|
||||
} catch (Error& e) {
|
||||
wait(tr.onError(e));
|
||||
}
|
||||
}
|
||||
if (cnt != 1000) {
|
||||
TraceEvent(SevError, "SnapTestVerifyCntValue").detail("Value", cnt);
|
||||
throw operation_failed();
|
||||
}
|
||||
} else if (self->testID == 4) {
|
||||
// description: if disable of a TLog pop was not followed by a
|
||||
// corresponding enable, then TLog will automatically enable the
|
||||
// popping of TLogs. this test case validates that we auto
|
||||
// enable the popping of TLogs
|
||||
state Standalone<StringRef> payLoadRef = LiteralStringRef("empty-binary:uid=a36b2ca0e8dab0452ac3e12b6b926f4b");
|
||||
wait(self->snapExecHelper(self, cx, execDisableTLogPop, payLoadRef));
|
||||
} else if (self->testID == 5) {
|
||||
// snapshot create without disabling pop of the TLog
|
||||
StringRef uidStr = LiteralStringRef("d78b08d47f341158e9a54d4baaf4a4dd");
|
||||
self->snapUID = UID::fromString(uidStr.toString());
|
||||
state Standalone<StringRef> snapPayload = LiteralStringRef("/bin/"
|
||||
"snap_create.sh:uid=").withSuffix(uidStr);
|
||||
wait(self->snapExecHelper(self, cx, execSnap, snapPayload));
|
||||
} else if (self->testID == 6) {
|
||||
// disable popping of TLog and snapshot create with mis-matching
|
||||
payLoadRef = LiteralStringRef("empty-binary:uid=f49d27ddf7a28b6549d930743e0ebdbe");
|
||||
wait(self->snapExecHelper(self, cx, execDisableTLogPop, payLoadRef));
|
||||
if (self->skipCheck) {
|
||||
return Void();
|
||||
}
|
||||
|
||||
StringRef uidStr = LiteralStringRef("ba61e9612a561d60bd83ad83e1b63568");
|
||||
self->snapUID = UID::fromString(uidStr.toString());
|
||||
snapPayload = LiteralStringRef("/bin/snap_create.sh:uid=").withSuffix(uidStr);
|
||||
wait(self->snapExecHelper(self, cx, execSnap, snapPayload));
|
||||
} else if (self->testID == 7) {
|
||||
// create a snapshot with a non whitelisted binary path and operation
|
||||
// should fail
|
||||
state bool testedFailure = false;
|
||||
snapFailed = false;
|
||||
loop {
|
||||
self->snapUID = deterministicRandom()->randomUniqueID();
|
||||
try {
|
||||
StringRef snapCmdRef = LiteralStringRef("/bin/snap_create1.sh");
|
||||
Future<Void> status = snapCreate(cx, snapCmdRef, self->snapUID);
|
||||
wait(status);
|
||||
break;
|
||||
} catch (Error& e) {
|
||||
if (e.code() == error_code_cluster_not_fully_recovered ||
|
||||
e.code() == error_code_txn_exec_log_anti_quorum) {
|
||||
snapFailed = true;
|
||||
break;
|
||||
}
|
||||
if (e.code() == error_code_transaction_not_permitted) {
|
||||
testedFailure = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
ASSERT(testedFailure || snapFailed);
|
||||
}
|
||||
wait(delay(0.0));
|
||||
return Void();
|
||||
}
|
||||
};
|
||||
|
||||
WorkloadFactory<SnapTestWorkload> SnapTestWorkloadFactory("SnapTest");
|
|
@ -65,6 +65,9 @@ ERROR( lookup_failed, 1041, "DNS lookup failed" )
|
|||
ERROR( proxy_memory_limit_exceeded, 1042, "Proxy commit memory limit exceeded" )
|
||||
ERROR( shutdown_in_progress, 1043, "Operation no longer supported due to shutdown" )
|
||||
ERROR( serialization_failed, 1044, "Failed to deserialize an object" )
|
||||
ERROR( transaction_not_permitted, 1045, "Operation not permitted")
|
||||
ERROR( cluster_not_fully_recovered, 1046, "Cluster not fully recovered")
|
||||
ERROR( txn_exec_log_anti_quorum, 1047, "Execute Transaction not supported when log anti quorum is configured")
|
||||
|
||||
ERROR( broken_promise, 1100, "Broken promise" )
|
||||
ERROR( operation_cancelled, 1101, "Asynchronous operation cancelled" )
|
||||
|
|
|
@ -20,7 +20,7 @@
|
|||
|
||||
# -*- mode: makefile; -*-
|
||||
|
||||
flow_CFLAGS := -I$(BOOSTDIR) -I. -DUSE_UCONTEXT
|
||||
flow_CFLAGS := -isystem$(BOOSTDIR) -I. -DUSE_UCONTEXT
|
||||
flow_LDFLAGS :=
|
||||
|
||||
ifeq ($(PLATFORM),osx)
|
||||
|
|
|
@ -108,6 +108,7 @@ add_fdb_test(TEST_FILES fast/RandomUnitTests.txt)
|
|||
add_fdb_test(TEST_FILES fast/SelectorCorrectness.txt)
|
||||
add_fdb_test(TEST_FILES fast/Sideband.txt)
|
||||
add_fdb_test(TEST_FILES fast/SidebandWithStatus.txt)
|
||||
add_fdb_test(TEST_FILES fast/SnapTestFailAndDisablePop.txt)
|
||||
add_fdb_test(TEST_FILES fast/SwizzledRollbackSideband.txt)
|
||||
add_fdb_test(TEST_FILES fast/SystemRebootTestCycle.txt)
|
||||
add_fdb_test(TEST_FILES fast/TaskBucketCorrectness.txt)
|
||||
|
@ -141,6 +142,18 @@ add_fdb_test(
|
|||
add_fdb_test(
|
||||
TEST_FILES restarting/StorefrontTestRestart-1.txt
|
||||
restarting/StorefrontTestRestart-2.txt)
|
||||
add_fdb_test(
|
||||
TEST_FILES restarting/from_6.2.0/SnapTestSimpleRestart-1.txt
|
||||
restarting/from_6.2.0/SnapTestSimpleRestart-2.txt)
|
||||
add_fdb_test(
|
||||
TEST_FILES restarting/from_6.2.0/SnapTestRestart-1.txt
|
||||
restarting/from_6.2.0/SnapTestRestart-2.txt)
|
||||
add_fdb_test(
|
||||
TEST_FILES restarting/from_6.2.0/SnapCycleRestart-1.txt
|
||||
restarting/from_6.2.0/SnapCycleRestart-2.txt)
|
||||
add_fdb_test(
|
||||
TEST_FILES restarting/from_6.2.0/SnapTestAttrition-1.txt
|
||||
restarting/from_6.2.0/SnapTestAttrition-2.txt)
|
||||
add_fdb_test(
|
||||
TEST_FILES restarting/from_5.1.7/DrUpgradeRestart-1.txt
|
||||
restarting/from_5.1.7/DrUpgradeRestart-2.txt IGNORE)
|
||||
|
|
|
@ -0,0 +1,28 @@
|
|||
; verify that the TLog popping disable times out and switches to enable mode
|
||||
; automatically, if not enabled specifically
|
||||
testTitle=SnapTLogPopDisableTimeout
|
||||
testName=SnapTest
|
||||
numSnaps=1
|
||||
maxSnapDelay=3.0
|
||||
testID=4
|
||||
|
||||
; snapCreate without TLogPopDisable
|
||||
testTitle=SnapCreateWithNoDisablePop
|
||||
testName=SnapTest
|
||||
numSnaps=1
|
||||
maxSnapDelay=3.0
|
||||
testID=5
|
||||
|
||||
; snapCreate and tlogPopDisable with mis-matched UID
|
||||
testTitle=SnapCreateDisableTLogPopMismatch
|
||||
testName=SnapTest
|
||||
numSnaps=1
|
||||
maxSnapDelay=3.0
|
||||
testID=6
|
||||
|
||||
; snapCreate with binary path that is not whitelisted
|
||||
testTitle=SnapCreateNotWhitelistedBinaryPath
|
||||
testName=SnapTest
|
||||
numSnaps=1
|
||||
maxSnapDelay=3.0
|
||||
testID=7
|
|
@ -0,0 +1,21 @@
|
|||
testTitle=SnapCyclePre
|
||||
;Take snap and do cycle test
|
||||
clearAfterTest=false
|
||||
testName=Cycle
|
||||
transactionsPerSecond=2500.0
|
||||
nodeCount=2500
|
||||
testDuration=10.0
|
||||
expectedRate=0
|
||||
|
||||
testName=SnapTest
|
||||
numSnaps=1
|
||||
maxSnapDelay=10.0
|
||||
testID=1
|
||||
clearAfterTest=false
|
||||
|
||||
testTitle=SnapCycleShutdown
|
||||
;save and shutdown
|
||||
testName=SaveAndKill
|
||||
restartInfoLocation=simfdb/restartInfo.ini
|
||||
testDuration=10.0
|
||||
isRestoring=1
|
|
@ -0,0 +1,8 @@
|
|||
testTitle=SnapCycleRestore
|
||||
;Post snap restore test
|
||||
runSetup=false
|
||||
testName=Cycle
|
||||
transactionsPerSecond=2500.0
|
||||
nodeCount=2500
|
||||
testDuration=10.0
|
||||
expectedRate=0
|
|
@ -0,0 +1,45 @@
|
|||
testTitle=SnapTestPre
|
||||
;write 1000 Keys ending with even numbers
|
||||
testName=SnapTest
|
||||
numSnaps=1
|
||||
maxSnapDelay=3.0
|
||||
testID=0
|
||||
clearAfterTest=false
|
||||
|
||||
testTitle=SnapTestTakeSnap
|
||||
;Take snap and do read/write
|
||||
testName=ReadWrite
|
||||
testDuration=10.0
|
||||
transactionsPerSecond=10000
|
||||
writesPerTransactionA=0
|
||||
readsPerTransactionA=10
|
||||
writesPerTransactionB=10
|
||||
readsPerTransactionB=1
|
||||
alpha=0.5
|
||||
nodeCount=100000
|
||||
valueBytes=16
|
||||
discardEdgeMeasurements=false
|
||||
|
||||
testName=SnapTest
|
||||
numSnaps=1
|
||||
maxSnapDelay=10.0
|
||||
testID=1
|
||||
clearAfterTest=false
|
||||
|
||||
testName=Attrition
|
||||
testDuration=10.0
|
||||
|
||||
testTitle=SnapTestPost
|
||||
;write 1000 Keys ending with odd numbers
|
||||
testName=SnapTest
|
||||
numSnaps=1
|
||||
maxSnapDelay=25.0
|
||||
testID=2
|
||||
clearAfterTest=false
|
||||
|
||||
; save and shutdown
|
||||
testTitle=SnapSimpleShutdown
|
||||
testName=SaveAndKill
|
||||
restartInfoLocation=simfdb/restartInfo.ini
|
||||
testDuration=10.0
|
||||
isRestoring=1
|
|
@ -0,0 +1,7 @@
|
|||
; verify all keys are even numbered
|
||||
testTitle=SnapTestVerify
|
||||
testName=SnapTest
|
||||
numSnaps=1
|
||||
maxSnapDelay=3.0
|
||||
testID=3
|
||||
restartInfoLocation=simfdb/restartInfo.ini
|
|
@ -0,0 +1,42 @@
|
|||
testTitle=SnapTestPre
|
||||
;write 1000 Keys ending with even numbers
|
||||
testName=SnapTest
|
||||
numSnaps=1
|
||||
maxSnapDelay=3.0
|
||||
testID=0
|
||||
clearAfterTest=false
|
||||
|
||||
testTitle=SnapTestTakeSnap
|
||||
;Take snap and do read/write
|
||||
testName=ReadWrite
|
||||
testDuration=10.0
|
||||
transactionsPerSecond=10000
|
||||
writesPerTransactionA=0
|
||||
readsPerTransactionA=10
|
||||
writesPerTransactionB=10
|
||||
readsPerTransactionB=1
|
||||
alpha=0.5
|
||||
nodeCount=100000
|
||||
valueBytes=16
|
||||
discardEdgeMeasurements=false
|
||||
|
||||
testName=SnapTest
|
||||
numSnaps=1
|
||||
maxSnapDelay=10.0
|
||||
testID=1
|
||||
clearAfterTest=false
|
||||
|
||||
testTitle=SnapTestPost
|
||||
;write 1000 Keys ending with odd numbers
|
||||
testName=SnapTest
|
||||
numSnaps=1
|
||||
maxSnapDelay=25.0
|
||||
testID=2
|
||||
clearAfterTest=false
|
||||
|
||||
testTitle=SnapTestShutdown
|
||||
;save and shutdown
|
||||
testName=SaveAndKill
|
||||
restartInfoLocation=simfdb/restartInfo.ini
|
||||
testDuration=10.0
|
||||
isRestoring=1
|
|
@ -0,0 +1,6 @@
|
|||
; verify all keys are even numbered
|
||||
testTitle=SnapTestVerify
|
||||
testName=SnapTest
|
||||
numSnaps=1
|
||||
maxSnapDelay=3.0
|
||||
testID=3
|
|
@ -0,0 +1,30 @@
|
|||
;write 1000 Keys ending with even number
|
||||
testTitle=SnapSimplePre
|
||||
testName=SnapTest
|
||||
numSnaps=1
|
||||
maxSnapDelay=30.0
|
||||
testID=0
|
||||
clearAfterTest=false
|
||||
|
||||
;take snap
|
||||
testTitle=SnapSimpleTakeSnap
|
||||
testName=SnapTest
|
||||
numSnaps=1
|
||||
maxSnapDelay=5.0
|
||||
testID=1
|
||||
clearAfterTest=false
|
||||
|
||||
;write 1000 Keys ending with odd number
|
||||
testTitle=SnapSimplePost
|
||||
testName=SnapTest
|
||||
numSnaps=1
|
||||
maxSnapDelay=3.0
|
||||
testID=2
|
||||
clearAfterTest=false
|
||||
|
||||
; save and shutdown
|
||||
testTitle=SnapSimpleShutdown
|
||||
testName=SaveAndKill
|
||||
restartInfoLocation=simfdb/restartInfo.ini
|
||||
testDuration=10.0
|
||||
isRestoring=1
|
|
@ -0,0 +1,6 @@
|
|||
; verify all keys are even numbered
|
||||
testTitle=SnapSimpleVerify
|
||||
testName=SnapTest
|
||||
numSnaps=1
|
||||
maxSnapDelay=3.0
|
||||
testID=3
|
Loading…
Reference in New Issue