diff --git a/fdbserver/ClusterController.actor.cpp b/fdbserver/ClusterController.actor.cpp index 142409deca..c54a50e7a8 100644 --- a/fdbserver/ClusterController.actor.cpp +++ b/fdbserver/ClusterController.actor.cpp @@ -1543,6 +1543,20 @@ public: return result; } + // Given datacenter ID, returns the primary and remote regions. + std::pair getPrimaryAndRemoteRegion(std::vector regions, Key dcId) { + RegionInfo region; + RegionInfo remoteRegion; + for (const auto& r : regions) { + if (r.dcId == dcId) { + region = r; + } else { + remoteRegion = r; + } + } + return std::make_pair(region, remoteRegion); + } + ErrorOr findWorkersForConfigurationFromDC(RecruitFromConfigurationRequest const& req, Optional dcId) { RecruitFromConfigurationReply result; @@ -1555,15 +1569,7 @@ public: primaryDC.insert(dcId); result.dcId = dcId; - RegionInfo region; - RegionInfo remoteRegion; - for (auto& r : req.configuration.regions) { - if (r.dcId == dcId.get()) { - region = r; - } else { - remoteRegion = r; - } - } + auto [region, remoteRegion] = getPrimaryAndRemoteRegion(req.configuration.regions, dcId.get()); if (req.recruitSeedServers) { auto primaryStorageServers = @@ -2008,67 +2014,82 @@ public: RecruitFromConfigurationReply findWorkersForConfiguration(RecruitFromConfigurationRequest const& req) { RecruitFromConfigurationReply rep = findWorkersForConfigurationDispatch(req); if (g_network->isSimulated()) { - RecruitFromConfigurationReply compare = findWorkersForConfigurationDispatch(req); + // FIXME: The logic to pick a satellite in a remote region is not + // deterministic and can therefore break this nondeterminism check. + // Since satellites will generally be in the primary region, + // disable the determinism check for remote region satellites. + bool remoteDCUsedAsSatellite = false; + if (req.configuration.regions.size() > 1) { + auto [region, remoteRegion] = getPrimaryAndRemoteRegion(req.configuration.regions, req.configuration.regions[0].dcId); + for (const auto& satellite : region.satellites) { + if (satellite.dcId == remoteRegion.dcId) { + remoteDCUsedAsSatellite = true; + } + } + } + if (!remoteDCUsedAsSatellite) { + RecruitFromConfigurationReply compare = findWorkersForConfigurationDispatch(req); - std::map>, int> firstUsed; - std::map>, int> secondUsed; - updateKnownIds(&firstUsed); - updateKnownIds(&secondUsed); + std::map>, int> firstUsed; + std::map>, int> secondUsed; + updateKnownIds(&firstUsed); + updateKnownIds(&secondUsed); - // auto mworker = id_worker.find(masterProcessId); - //TraceEvent("CompareAddressesMaster") - // .detail("Master", - // mworker != id_worker.end() ? mworker->second.details.interf.address() : NetworkAddress()); + // auto mworker = id_worker.find(masterProcessId); + //TraceEvent("CompareAddressesMaster") + // .detail("Master", + // mworker != id_worker.end() ? mworker->second.details.interf.address() : NetworkAddress()); - updateIdUsed(rep.tLogs, firstUsed); - updateIdUsed(compare.tLogs, secondUsed); - compareWorkers( - req.configuration, rep.tLogs, firstUsed, compare.tLogs, secondUsed, ProcessClass::TLog, "TLog"); - updateIdUsed(rep.satelliteTLogs, firstUsed); - updateIdUsed(compare.satelliteTLogs, secondUsed); - compareWorkers(req.configuration, - rep.satelliteTLogs, - firstUsed, - compare.satelliteTLogs, - secondUsed, - ProcessClass::TLog, - "Satellite"); - updateIdUsed(rep.commitProxies, firstUsed); - updateIdUsed(compare.commitProxies, secondUsed); - updateIdUsed(rep.grvProxies, firstUsed); - updateIdUsed(compare.grvProxies, secondUsed); - updateIdUsed(rep.resolvers, firstUsed); - updateIdUsed(compare.resolvers, secondUsed); - compareWorkers(req.configuration, - rep.commitProxies, - firstUsed, - compare.commitProxies, - secondUsed, - ProcessClass::CommitProxy, - "CommitProxy"); - compareWorkers(req.configuration, - rep.grvProxies, - firstUsed, - compare.grvProxies, - secondUsed, - ProcessClass::GrvProxy, - "GrvProxy"); - compareWorkers(req.configuration, - rep.resolvers, - firstUsed, - compare.resolvers, - secondUsed, - ProcessClass::Resolver, - "Resolver"); - updateIdUsed(rep.backupWorkers, firstUsed); - updateIdUsed(compare.backupWorkers, secondUsed); - compareWorkers(req.configuration, - rep.backupWorkers, - firstUsed, - compare.backupWorkers, - secondUsed, - ProcessClass::Backup, - "Backup"); + updateIdUsed(rep.tLogs, firstUsed); + updateIdUsed(compare.tLogs, secondUsed); + compareWorkers( + req.configuration, rep.tLogs, firstUsed, compare.tLogs, secondUsed, ProcessClass::TLog, "TLog"); + updateIdUsed(rep.satelliteTLogs, firstUsed); + updateIdUsed(compare.satelliteTLogs, secondUsed); + compareWorkers(req.configuration, + rep.satelliteTLogs, + firstUsed, + compare.satelliteTLogs, + secondUsed, + ProcessClass::TLog, + "Satellite"); + updateIdUsed(rep.commitProxies, firstUsed); + updateIdUsed(compare.commitProxies, secondUsed); + updateIdUsed(rep.grvProxies, firstUsed); + updateIdUsed(compare.grvProxies, secondUsed); + updateIdUsed(rep.resolvers, firstUsed); + updateIdUsed(compare.resolvers, secondUsed); + compareWorkers(req.configuration, + rep.commitProxies, + firstUsed, + compare.commitProxies, + secondUsed, + ProcessClass::CommitProxy, + "CommitProxy"); + compareWorkers(req.configuration, + rep.grvProxies, + firstUsed, + compare.grvProxies, + secondUsed, + ProcessClass::GrvProxy, + "GrvProxy"); + compareWorkers(req.configuration, + rep.resolvers, + firstUsed, + compare.resolvers, + secondUsed, + ProcessClass::Resolver, + "Resolver"); + updateIdUsed(rep.backupWorkers, firstUsed); + updateIdUsed(compare.backupWorkers, secondUsed); + compareWorkers(req.configuration, + rep.backupWorkers, + firstUsed, + compare.backupWorkers, + secondUsed, + ProcessClass::Backup, + "Backup"); + } } return rep; }