Merge pull request #7236 from sfc-gh-anoyes/anoyes/revert-5637-main

Revert most logic in #5637
This commit is contained in:
Jingyu Zhou 2022-05-24 18:00:37 -07:00 committed by GitHub
commit 75e9774197
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 62 additions and 48 deletions

View File

@ -2442,10 +2442,15 @@ ACTOR Future<Void> rejoinClusterController(TLogData* self,
// Read and cache cluster ID before displacing this tlog. We want
// to avoid removing the tlogs data if it has joined a new cluster
// with a different cluster ID.
state UID clusterId = wait(getClusterId(self));
ASSERT(clusterId.isValid());
self->ccClusterId = clusterId;
ev.detail("ClusterId", clusterId).detail("SelfClusterId", self->durableClusterId);
// TODO: #5375
/*
state UID clusterId = wait(getClusterId(self));
ASSERT(clusterId.isValid());
self->ccClusterId = clusterId;
ev.detail("ClusterId", clusterId).detail("SelfClusterId", self->durableClusterId);
*/
if (BUGGIFY)
wait(delay(SERVER_KNOBS->BUGGIFY_WORKER_REMOVED_MAX_LAG * deterministicRandom()->random01()));
throw worker_removed();
@ -3649,30 +3654,35 @@ ACTOR Future<Void> tLog(IKeyValueStore* persistentData,
}
}
} catch (Error& e) {
if (e.code() != error_code_worker_removed) {
throw;
}
// Don't need to worry about deleting data if there is no durable
// cluster ID.
if (!self.durableClusterId.isValid()) {
throw;
}
// When a tlog joins a new cluster and has data for an old cluster,
// it should automatically exclude itself to avoid being used in
// the new cluster.
auto recoveryState = self.dbInfo->get().recoveryState;
if (recoveryState == RecoveryState::FULLY_RECOVERED && self.ccClusterId.isValid() &&
self.durableClusterId.isValid() && self.ccClusterId != self.durableClusterId) {
state NetworkAddress address = g_network->getLocalAddress();
wait(excludeServers(self.cx, { AddressExclusion{ address.ip, address.port } }));
TraceEvent(SevWarnAlways, "TLogBelongsToExistingCluster")
.detail("ClusterId", self.durableClusterId)
.detail("NewClusterId", self.ccClusterId);
}
// If the tlog has a valid durable cluster ID, we don't want it to
// wipe its data! Throw this error to signal to `tlogTerminated` to
// close the persistent data store instead of deleting it.
throw invalid_cluster_id();
throw;
// TODO: #5375
/*
if (e.code() != error_code_worker_removed) {
throw;
}
// Don't need to worry about deleting data if there is no durable
// cluster ID.
if (!self.durableClusterId.isValid()) {
throw;
}
// When a tlog joins a new cluster and has data for an old cluster,
// it should automatically exclude itself to avoid being used in
// the new cluster.
auto recoveryState = self.dbInfo->get().recoveryState;
if (recoveryState == RecoveryState::FULLY_RECOVERED && self.ccClusterId.isValid() &&
self.durableClusterId.isValid() && self.ccClusterId != self.durableClusterId) {
state NetworkAddress address = g_network->getLocalAddress();
wait(excludeServers(self.cx, { AddressExclusion{ address.ip, address.port } }));
TraceEvent(SevWarnAlways, "TLogBelongsToExistingCluster")
.detail("ClusterId", self.durableClusterId)
.detail("NewClusterId", self.ccClusterId);
}
// If the tlog has a valid durable cluster ID, we don't want it to
// wipe its data! Throw this error to signal to `tlogTerminated` to
// close the persistent data store instead of deleting it.
throw invalid_cluster_id();
*/
}
} catch (Error& e) {
self.terminated.send(Void());

View File

@ -9435,26 +9435,30 @@ ACTOR Future<Void> storageServer(IKeyValueStore* persistentData,
if (e.isError()) {
Error e = f.getError();
if (e.code() != error_code_worker_removed) {
throw e;
}
state UID clusterId = wait(getClusterId(&self));
ASSERT(self.clusterId.isValid());
UID durableClusterId = wait(self.clusterId.getFuture());
ASSERT(durableClusterId.isValid());
if (clusterId == durableClusterId) {
throw worker_removed();
}
// When a storage server connects to a new cluster, it deletes its
// old data and creates a new, empty data file for the new cluster.
// We want to avoid this and force a manual removal of the storage
// servers' old data when being assigned to a new cluster to avoid
// accidental data loss.
TraceEvent(SevWarn, "StorageServerBelongsToExistingCluster")
.detail("ServerID", ssi.id())
.detail("ClusterID", durableClusterId)
.detail("NewClusterID", clusterId);
wait(Future<Void>(Never()));
throw e;
// TODO: #5375
/*
if (e.code() != error_code_worker_removed) {
throw e;
}
state UID clusterId = wait(getClusterId(&self));
ASSERT(self.clusterId.isValid());
UID durableClusterId = wait(self.clusterId.getFuture());
ASSERT(durableClusterId.isValid());
if (clusterId == durableClusterId) {
throw worker_removed();
}
// When a storage server connects to a new cluster, it deletes its
// old data and creates a new, empty data file for the new cluster.
// We want to avoid this and force a manual removal of the storage
// servers' old data when being assigned to a new cluster to avoid
// accidental data loss.
TraceEvent(SevWarn, "StorageServerBelongsToExistingCluster")
.detail("ServerID", ssi.id())
.detail("ClusterID", durableClusterId)
.detail("NewClusterID", clusterId);
wait(Future<Void>(Never()));
*/
}
self.interfaceRegistered =