@ -288,353 +288,331 @@ struct ConnectPacket {
ACTOR static Future<Void> connectionReader(TransportData* transport, Reference<IConnection> conn, Reference<struct Peer> peer,
ACTOR static Future<Void> connectionReader(TransportData* transport, Reference<IConnection> conn, Reference<struct Peer> peer,
Promise<Reference<struct Peer>> onConnected);
Promise<Reference<struct Peer>> onConnected);
static PacketID sendPacket( TransportData* self, ISerializeSource const& what, const Endpoint& destination, bool reliable, bool openConnection );
static void sendLocal( TransportData* self, ISerializeSource const& what, const Endpoint& destination );
static ReliablePacket* sendPacket( TransportData* self, Reference<Peer> peer, ISerializeSource const& what, const Endpoint& destination, bool reliable );
struct Peer : public ReferenceCounted<Peer> {
ACTOR Future<Void> connectionMonitor( Reference<Peer> peer ) {
TransportData* transport;
state Endpoint remotePingEndpoint({ peer->destination }, WLTOKEN_PING_PACKET);
NetworkAddress destination;
loop {
UnsentPacketQueue unsent;
if (!FlowTransport::transport().isClient() && !peer->destination.isPublic() && peer->compatible) {
ReliablePacketList reliable;
// Don't send ping messages to clients unless necessary. Instead monitor incoming client pings.
AsyncTrigger dataToSend; // Triggered when unsent.empty() becomes false
state double lastRefreshed = now();
Future<Void> connect;
state int64_t lastBytesReceived = peer->bytesReceived;
AsyncTrigger resetPing;
bool compatible;
bool outgoingConnectionIdle; // We don't actually have a connection open and aren't trying to open one because we don't have anything to send
double lastConnectTime;
double reconnectionDelay;
int peerReferences;
bool incompatibleProtocolVersionNewer;
int64_t bytesReceived;
double lastDataPacketSentTime;
explicit Peer(TransportData* transport, NetworkAddress const& destination)
: transport(transport), destination(destination), outgoingConnectionIdle(false), lastConnectTime(0.0),
reconnectionDelay(FLOW_KNOBS->INITIAL_RECONNECTION_TIME), compatible(true),
incompatibleProtocolVersionNewer(false), peerReferences(-1), bytesReceived(0), lastDataPacketSentTime(now()) {}
void send(PacketBuffer* pb, ReliablePacket* rp, bool firstUnsent) {
if (rp) reliable.insert(rp);
if (firstUnsent) dataToSend.trigger();
void prependConnectPacket() {
// Send the ConnectPacket expected at the beginning of a new connection
ConnectPacket pkt;
if(transport->localAddresses.address.isTLS() == destination.isTLS()) {
pkt.canonicalRemotePort = transport->localAddresses.address.port;
} else if(transport->localAddresses.secondaryAddress.present()) {
pkt.canonicalRemotePort = transport->localAddresses.secondaryAddress.get().port;
} else {
// a "mixed" TLS/non-TLS connection is like a client/server connection - there's no way to reverse it
pkt.canonicalRemotePort = 0;
pkt.connectPacketLength = sizeof(pkt) - sizeof(pkt.connectPacketLength);
pkt.protocolVersion = currentProtocolVersion;
pkt.connectionId = transport->transportId;
PacketBuffer* pb_first = PacketBuffer::create();
PacketWriter wr( pb_first, nullptr, Unversioned() );
unsent.prependWriteBuffer(pb_first, wr.finish());
void discardUnreliablePackets() {
// Throw away the current unsent list, dropping the reference count on each PacketBuffer that accounts for presence in the unsent list
// If there are reliable packets, compact reliable packets into a new unsent range
if(!reliable.empty()) {
PacketBuffer* pb = unsent.getWriteBuffer();
pb = reliable.compact(pb, nullptr);
void onIncomingConnection( Reference<Peer> self, Reference<IConnection> conn, Future<Void> reader ) {
// In case two processes are trying to connect to each other simultaneously, the process with the larger canonical NetworkAddress
// gets to keep its outgoing connection.
if ( !destination.isPublic() && !outgoingConnectionIdle ) throw address_in_use();
NetworkAddress compatibleAddr = transport->localAddresses.address;
if(transport->localAddresses.secondaryAddress.present() && transport->localAddresses.secondaryAddress.get().isTLS() == destination.isTLS()) {
compatibleAddr = transport->localAddresses.secondaryAddress.get();
if ( !destination.isPublic() || outgoingConnectionIdle || destination > compatibleAddr ) {
// Keep the new connection
TraceEvent("IncomingConnection", conn->getDebugID())
.detail("FromAddr", conn->getPeerAddress())
.detail("CanonicalAddr", destination)
.detail("IsPublic", destination.isPublic());
connect = connectionKeeper( self, conn, reader );
} else {
TraceEvent("RedundantConnection", conn->getDebugID())
.detail("FromAddr", conn->getPeerAddress().toString())
.detail("CanonicalAddr", destination)
.detail("LocalAddr", compatibleAddr);
// Keep our prior connection
// Send an (ignored) packet to make sure that, if our outgoing connection died before the peer made this connection attempt,
// we eventually find out that our connection is dead, close it, and then respond to the next connection reattempt from peer.
ACTOR static Future<Void> connectionMonitor( Reference<Peer> peer ) {
state Endpoint remotePingEndpoint({ peer->destination }, WLTOKEN_PING_PACKET);
loop {
if (!FlowTransport::transport().isClient() && !peer->destination.isPublic()) {
// Don't send ping messages to clients unless necessary. Instead monitor incoming client pings.
state double lastRefreshed = now();
state int64_t lastBytesReceived = peer->bytesReceived;
loop {
if (lastBytesReceived < peer->bytesReceived) {
lastRefreshed = now();
lastBytesReceived = peer->bytesReceived;
} else if (lastRefreshed < now() - FLOW_KNOBS->CONNECTION_MONITOR_IDLE_TIMEOUT *
// If we have not received anything in this period, client must have closed
// connection by now. Break loop to check if it is still alive by sending a ping.
//We cannot let an error be thrown from connectionMonitor while still on the stack from scanPackets in connectionReader
//because then it would not call the destructor of connectionReader when connectionReader is cancelled.
if (peer->reliable.empty() && peer->unsent.empty()) {
if (peer->peerReferences == 0 &&
// TODO: What about when peerReference == -1?
throw connection_unreferenced();
} else if (FlowTransport::transport().isClient() && peer->compatible && peer->destination.isPublic() &&
(peer->lastConnectTime < now() - FLOW_KNOBS->CONNECTION_MONITOR_IDLE_TIMEOUT) &&
(peer->lastDataPacketSentTime < now() - FLOW_KNOBS->CONNECTION_MONITOR_IDLE_TIMEOUT)) {
// First condition is necessary because we may get here if we are server.
throw connection_idle();
// TODO: Stop monitoring and close the connection with no onDisconnect requests outstanding
state ReplyPromise<Void> reply;
FlowTransport::transport().sendUnreliable( SerializeSource<ReplyPromise<Void>>(reply), remotePingEndpoint );
state int64_t startingBytes = peer->bytesReceived;
state int timeouts = 0;
loop {
loop {
choose {
if (lastBytesReceived < peer->bytesReceived) {
if(startingBytes == peer->bytesReceived) {
lastRefreshed = now();
TraceEvent("ConnectionTimeout").suppressFor(1.0).detail("WithAddr", peer->destination);
lastBytesReceived = peer->bytesReceived;
throw connection_failed();
} else if (lastRefreshed < now() - FLOW_KNOBS->CONNECTION_MONITOR_IDLE_TIMEOUT *
if(timeouts > 1) {
// If we have not received anything in this period, client must have closed
TraceEvent(SevWarnAlways, "ConnectionSlowPing")
// connection by now. Break loop to check if it is still alive by sending a ping.
.detail("WithAddr", peer->destination)
.detail("Timeouts", timeouts);
startingBytes = peer->bytesReceived;
when (wait( reply.getFuture() )) {
when (wait( peer->resetPing.onTrigger())) {
ACTOR static Future<Void> connectionWriter( Reference<Peer> self, Reference<IConnection> conn ) {
//We cannot let an error be thrown from connectionMonitor while still on the stack from scanPackets in connectionReader
state double lastWriteTime = now();
//because then it would not call the destructor of connectionReader when connectionReader is cancelled.
loop {
//wait( delay(0, TaskPriority::WriteSocket) );
wait( delayJittered(std::max<double>(FLOW_KNOBS->MIN_COALESCE_DELAY, FLOW_KNOBS->MAX_COALESCE_DELAY - (now() - lastWriteTime)), TaskPriority::WriteSocket) );
//wait( delay(500e-6, TaskPriority::WriteSocket) );
//wait( yield(TaskPriority::WriteSocket) );
// Send until there is nothing left to send
if (peer->reliable.empty() && peer->unsent.empty() && peer->outstandingReplies==0) {
loop {
if (peer->peerReferences == 0 &&
lastWriteTime = now();
// TODO: What about when peerReference == -1?
int sent = conn->write(self->unsent.getUnsent(), /* limit= */ FLOW_KNOBS->MAX_PACKET_SEND_BYTES);
throw connection_unreferenced();
if (sent) {
} else if (FlowTransport::transport().isClient() && peer->compatible && peer->destination.isPublic() &&
self->transport->bytesSent += sent;
(peer->lastConnectTime < now() - FLOW_KNOBS->CONNECTION_MONITOR_IDLE_TIMEOUT) &&
(peer->lastDataPacketSentTime < now() - FLOW_KNOBS->CONNECTION_MONITOR_IDLE_TIMEOUT)) {
// First condition is necessary because we may get here if we are server.
if (self->unsent.empty()) break;
throw connection_idle();
TEST(true); // We didn't write everything, so apparently the write buffer is full. Wait for it to be nonfull.
wait( conn->onWritable() );
wait( yield(TaskPriority::WriteSocket) );
// Wait until there is something to send
while ( self->unsent.empty() )
wait( self->dataToSend.onTrigger() );
ACTOR static Future<Void> connectionKeeper( Reference<Peer> self,
Reference<IConnection> conn = Reference<IConnection>(),
Future<Void> reader = Void()) {
TraceEvent(SevDebug, "ConnectionKeeper", conn ? conn->getDebugID() : UID())
.detail("PeerAddr", self->destination)
.detail("ConnSet", (bool)conn);
// This is used only at client side and is used to override waiting for unsent data to update failure monitoring
// TODO: Stop monitoring and close the connection with no onDisconnect requests outstanding
// status. At client, if an existing connection fails, we retry making a connection and if that fails, then only
state ReplyPromise<Void> reply;
// we report that address as failed.
FlowTransport::transport().sendUnreliable( SerializeSource<ReplyPromise<Void>>(reply), remotePingEndpoint, true );
state bool clientReconnectDelay = false;
state int64_t startingBytes = peer->bytesReceived;
state int timeouts = 0;
loop {
loop {
try {
choose {
if (!conn) { // Always, except for the first loop with an incoming connection
self->outgoingConnectionIdle = true;
if(startingBytes == peer->bytesReceived) {
TraceEvent("ConnectionTimeout").suppressFor(1.0).detail("WithAddr", peer->destination);
// Wait until there is something to send.
while (self->unsent.empty()) {
if (FlowTransport::transport().isClient() && self->destination.isPublic() &&
clientReconnectDelay) {
ASSERT( self->destination.isPublic() );
self->outgoingConnectionIdle = false;
std::max(0.0, self->lastConnectTime + self->reconnectionDelay -
now()))); // Don't connect() to the same peer more than once per 2 sec
self->lastConnectTime = now();
TraceEvent("ConnectingTo", conn ? conn->getDebugID() : UID()).suppressFor(1.0).detail("PeerAddr", self->destination);
Reference<IConnection> _conn = wait( timeout( INetworkConnections::net()->connect(self->destination), FLOW_KNOBS->CONNECTION_MONITOR_TIMEOUT, Reference<IConnection>() ) );
if (_conn) {
if (FlowTransport::transport().isClient()) {
IFailureMonitor::failureMonitor().setStatus(self->destination, FailureStatus(false));
if (self->unsent.empty()) {
clientReconnectDelay = false;
} else {
conn = _conn;
TraceEvent("ConnectionExchangingConnectPacket", conn->getDebugID())
.detail("PeerAddr", self->destination);
} else {
TraceEvent("ConnectionTimedOut", conn ? conn->getDebugID() : UID()).suppressFor(1.0).detail("PeerAddr", self->destination);
if (FlowTransport::transport().isClient()) {
IFailureMonitor::failureMonitor().setStatus(self->destination, FailureStatus(true));
throw connection_failed();
throw connection_failed();
if(timeouts > 1) {
reader = connectionReader( self->transport, conn, self, Promise<Reference<Peer>>());
TraceEvent(SevWarnAlways, "ConnectionSlowPing")
} else {
self->outgoingConnectionIdle = false;
.detail("WithAddr", peer->destination)
.detail("Timeouts", timeouts);
try {
wait( connectionWriter( self, conn ) || reader || connectionMonitor(self) );
} catch (Error& e) {
if (e.code() == error_code_connection_failed || e.code() == error_code_actor_cancelled ||
e.code() == error_code_connection_unreferenced ||
(g_network->isSimulated() && e.code() == error_code_checksum_failed))
throw e;
ASSERT( false );
} catch (Error& e) {
if(now() - self->lastConnectTime > FLOW_KNOBS->RECONNECTION_RESET_TIME) {
} else {
self->reconnectionDelay = std::min(FLOW_KNOBS->MAX_RECONNECTION_TIME, self->reconnectionDelay * FLOW_KNOBS->RECONNECTION_TIME_GROWTH_RATE);
reader = Future<Void>();
bool ok = e.code() == error_code_connection_failed || e.code() == error_code_actor_cancelled ||
e.code() == error_code_connection_unreferenced || e.code() == error_code_connection_idle ||
(g_network->isSimulated() && e.code() == error_code_checksum_failed);
if(self->compatible) {
TraceEvent(ok ? SevInfo : SevWarnAlways, "ConnectionClosed", conn ? conn->getDebugID() : UID())
.error(e, true)
.detail("PeerAddr", self->destination);
else {
TraceEvent(ok ? SevInfo : SevWarnAlways, "IncompatibleConnectionClosed",
conn ? conn->getDebugID() : UID())
.error(e, true)
.detail("PeerAddr", self->destination);
if(self->destination.isPublic() && IFailureMonitor::failureMonitor().getState(self->destination).isAvailable()) {
auto& it = self->transport->closedPeers[self->destination];
it.first = now();
TraceEvent(SevWarnAlways, "TooManyConnectionsClosed", conn ? conn->getDebugID() : UID())
.detail("PeerAddr", self->destination);
it.second = now();
startingBytes = peer->bytesReceived;
when (wait( reply.getFuture() )) {
if (conn) {
if (FlowTransport::transport().isClient() && e.code() != error_code_connection_idle) {
clientReconnectDelay = true;
conn = Reference<IConnection>();
when (wait( peer->resetPing.onTrigger())) {
// Clients might send more packets in response, which needs to go out on the next connection
IFailureMonitor::failureMonitor().notifyDisconnect( self->destination );
if (e.code() == error_code_actor_cancelled) throw;
// Try to recover, even from serious errors, by retrying
if(self->peerReferences <= 0 && self->reliable.empty() && self->unsent.empty()) {
TraceEvent("PeerDestroy").error(e).suppressFor(1.0).detail("PeerAddr", self->destination);
return Void();
ACTOR Future<Void> connectionWriter( Reference<Peer> self, Reference<IConnection> conn ) {
state double lastWriteTime = now();
loop {
//wait( delay(0, TaskPriority::WriteSocket) );
wait( delayJittered(std::max<double>(FLOW_KNOBS->MIN_COALESCE_DELAY, FLOW_KNOBS->MAX_COALESCE_DELAY - (now() - lastWriteTime)), TaskPriority::WriteSocket) );
//wait( delay(500e-6, TaskPriority::WriteSocket) );
//wait( yield(TaskPriority::WriteSocket) );
// Send until there is nothing left to send
loop {
lastWriteTime = now();
int sent = conn->write(self->unsent.getUnsent(), /* limit= */ FLOW_KNOBS->MAX_PACKET_SEND_BYTES);
if (sent) {
self->transport->bytesSent += sent;
if (self->unsent.empty()) break;
TEST(true); // We didn't write everything, so apparently the write buffer is full. Wait for it to be nonfull.
wait( conn->onWritable() );
wait( yield(TaskPriority::WriteSocket) );
// Wait until there is something to send
while ( self->unsent.empty() )
wait( self->dataToSend.onTrigger() );
ACTOR Future<Void> connectionKeeper( Reference<Peer> self,
Reference<IConnection> conn = Reference<IConnection>(),
Future<Void> reader = Void()) {
TraceEvent(SevDebug, "ConnectionKeeper", conn ? conn->getDebugID() : UID())
.detail("PeerAddr", self->destination)
.detail("ConnSet", (bool)conn);
// This is used only at client side and is used to override waiting for unsent data to update failure monitoring
// status. At client, if an existing connection fails, we retry making a connection and if that fails, then only
// we report that address as failed.
state bool clientReconnectDelay = false;
loop {
try {
if (!conn) { // Always, except for the first loop with an incoming connection
self->outgoingConnectionIdle = true;
// Wait until there is something to send.
while (self->unsent.empty()) {
if (FlowTransport::transport().isClient() && self->destination.isPublic() &&
clientReconnectDelay) {
ASSERT( self->destination.isPublic() );
self->outgoingConnectionIdle = false;
std::max(0.0, self->lastConnectTime + self->reconnectionDelay -
now()))); // Don't connect() to the same peer more than once per 2 sec
self->lastConnectTime = now();
TraceEvent("ConnectingTo", conn ? conn->getDebugID() : UID()).suppressFor(1.0).detail("PeerAddr", self->destination);
Reference<IConnection> _conn = wait( timeout( INetworkConnections::net()->connect(self->destination), FLOW_KNOBS->CONNECTION_MONITOR_TIMEOUT, Reference<IConnection>() ) );
if (_conn) {
if (FlowTransport::transport().isClient()) {
IFailureMonitor::failureMonitor().setStatus(self->destination, FailureStatus(false));
if (self->unsent.empty()) {
clientReconnectDelay = false;
} else {
conn = _conn;
TraceEvent("ConnectionExchangingConnectPacket", conn->getDebugID())
.detail("PeerAddr", self->destination);
} else {
TraceEvent("ConnectionTimedOut", conn ? conn->getDebugID() : UID()).suppressFor(1.0).detail("PeerAddr", self->destination);
if (FlowTransport::transport().isClient()) {
IFailureMonitor::failureMonitor().setStatus(self->destination, FailureStatus(true));
throw connection_failed();
reader = connectionReader( self->transport, conn, self, Promise<Reference<Peer>>());
} else {
self->outgoingConnectionIdle = false;
try {
wait( connectionWriter( self, conn ) || reader || connectionMonitor(self) );
} catch (Error& e) {
if (e.code() == error_code_connection_failed || e.code() == error_code_actor_cancelled ||
e.code() == error_code_connection_unreferenced ||
(g_network->isSimulated() && e.code() == error_code_checksum_failed))
throw e;
ASSERT( false );
} catch (Error& e) {
if(now() - self->lastConnectTime > FLOW_KNOBS->RECONNECTION_RESET_TIME) {
} else {
self->reconnectionDelay = std::min(FLOW_KNOBS->MAX_RECONNECTION_TIME, self->reconnectionDelay * FLOW_KNOBS->RECONNECTION_TIME_GROWTH_RATE);
reader = Future<Void>();
bool ok = e.code() == error_code_connection_failed || e.code() == error_code_actor_cancelled ||
e.code() == error_code_connection_unreferenced || e.code() == error_code_connection_idle ||
(g_network->isSimulated() && e.code() == error_code_checksum_failed);
if(self->compatible) {
TraceEvent(ok ? SevInfo : SevWarnAlways, "ConnectionClosed", conn ? conn->getDebugID() : UID())
.error(e, true)
.detail("PeerAddr", self->destination);
else {
TraceEvent(ok ? SevInfo : SevWarnAlways, "IncompatibleConnectionClosed",
conn ? conn->getDebugID() : UID())
.error(e, true)
.detail("PeerAddr", self->destination);
if(self->destination.isPublic() && IFailureMonitor::failureMonitor().getState(self->destination).isAvailable()) {
auto& it = self->transport->closedPeers[self->destination];
it.first = now();
TraceEvent(SevWarnAlways, "TooManyConnectionsClosed", conn ? conn->getDebugID() : UID())
.detail("PeerAddr", self->destination);
it.second = now();
if (conn) {
if (FlowTransport::transport().isClient() && e.code() != error_code_connection_idle) {
clientReconnectDelay = true;
conn = Reference<IConnection>();
// Clients might send more packets in response, which needs to go out on the next connection
IFailureMonitor::failureMonitor().notifyDisconnect( self->destination );
if (e.code() == error_code_actor_cancelled) throw;
// Try to recover, even from serious errors, by retrying
if(self->peerReferences <= 0 && self->reliable.empty() && self->unsent.empty() && self->outstandingReplies==0) {
TraceEvent("PeerDestroy").error(e).suppressFor(1.0).detail("PeerAddr", self->destination);
return Void();
void Peer::send(PacketBuffer* pb, ReliablePacket* rp, bool firstUnsent) {
if (rp) reliable.insert(rp);
if (firstUnsent) dataToSend.trigger();
void Peer::prependConnectPacket() {
// Send the ConnectPacket expected at the beginning of a new connection
ConnectPacket pkt;
if(transport->localAddresses.address.isTLS() == destination.isTLS()) {
pkt.canonicalRemotePort = transport->localAddresses.address.port;
} else if(transport->localAddresses.secondaryAddress.present()) {
pkt.canonicalRemotePort = transport->localAddresses.secondaryAddress.get().port;
} else {
// a "mixed" TLS/non-TLS connection is like a client/server connection - there's no way to reverse it
pkt.canonicalRemotePort = 0;
pkt.connectPacketLength = sizeof(pkt) - sizeof(pkt.connectPacketLength);
pkt.protocolVersion = currentProtocolVersion;
pkt.connectionId = transport->transportId;
PacketBuffer* pb_first = PacketBuffer::create();
PacketWriter wr( pb_first, nullptr, Unversioned() );
unsent.prependWriteBuffer(pb_first, wr.finish());
void Peer::discardUnreliablePackets() {
// Throw away the current unsent list, dropping the reference count on each PacketBuffer that accounts for presence in the unsent list
// If there are reliable packets, compact reliable packets into a new unsent range
if(!reliable.empty()) {
PacketBuffer* pb = unsent.getWriteBuffer();
pb = reliable.compact(pb, nullptr);
void Peer::onIncomingConnection( Reference<Peer> self, Reference<IConnection> conn, Future<Void> reader ) {
// In case two processes are trying to connect to each other simultaneously, the process with the larger canonical NetworkAddress
// gets to keep its outgoing connection.
if ( !destination.isPublic() && !outgoingConnectionIdle ) throw address_in_use();
NetworkAddress compatibleAddr = transport->localAddresses.address;
if(transport->localAddresses.secondaryAddress.present() && transport->localAddresses.secondaryAddress.get().isTLS() == destination.isTLS()) {
compatibleAddr = transport->localAddresses.secondaryAddress.get();
if ( !destination.isPublic() || outgoingConnectionIdle || destination > compatibleAddr ) {
// Keep the new connection
TraceEvent("IncomingConnection", conn->getDebugID())
.detail("FromAddr", conn->getPeerAddress())
.detail("CanonicalAddr", destination)
.detail("IsPublic", destination.isPublic());
connect = connectionKeeper( self, conn, reader );
} else {
TraceEvent("RedundantConnection", conn->getDebugID())
.detail("FromAddr", conn->getPeerAddress().toString())
.detail("CanonicalAddr", destination)
.detail("LocalAddr", compatibleAddr);
// Keep our prior connection
// Send an (ignored) packet to make sure that, if our outgoing connection died before the peer made this connection attempt,
// we eventually find out that our connection is dead, close it, and then respond to the next connection reattempt from peer.
TransportData::~TransportData() {
TransportData::~TransportData() {
for(auto &p : peers) {
for(auto &p : peers) {
@ -671,9 +649,12 @@ ACTOR static void deliver(TransportData* self, Endpoint destination, ArenaReader
} else if (destination.token.first() & TOKEN_STREAM_FLAG) {
} else if (destination.token.first() & TOKEN_STREAM_FLAG) {
// We don't have the (stream) endpoint 'token', notify the remote machine
// We don't have the (stream) endpoint 'token', notify the remote machine
if (destination.token.first() != -1) {
if (destination.token.first() != -1) {
if (self->isLocalAddress(destination.getPrimaryAddress())) {
SerializeSource<Endpoint>(Endpoint(self->localAddresses, destination.token)),
sendLocal(self, SerializeSource<Endpoint>(Endpoint(self->localAddresses, destination.token)), Endpoint(destination.addresses, WLTOKEN_ENDPOINT_NOT_FOUND));
Endpoint(destination.addresses, WLTOKEN_ENDPOINT_NOT_FOUND), false, true);
} else {
Reference<Peer> peer = self->getPeer(destination.getPrimaryAddress());
sendPacket(self, peer, SerializeSource<Endpoint>(Endpoint(self->localAddresses, destination.token)), Endpoint(destination.addresses, WLTOKEN_ENDPOINT_NOT_FOUND), false);
@ -1013,7 +994,7 @@ Reference<Peer> TransportData::getPeer( NetworkAddress const& address, bool open
return Reference<Peer>();
return Reference<Peer>();
Reference<Peer> newPeer = Reference<Peer>( new Peer(this, address) );
Reference<Peer> newPeer = Reference<Peer>( new Peer(this, address) );
newPeer->connect = Peer::connectionKeeper(newPeer);
newPeer->connect = connectionKeeper(newPeer);
peers[address] = newPeer;
peers[address] = newPeer;
return newPeer;
return newPeer;
@ -1113,7 +1094,7 @@ void FlowTransport::removePeerReference(const Endpoint& endpoint, bool isStream)
.detail("Address", endpoint.getPrimaryAddress())
.detail("Address", endpoint.getPrimaryAddress())
.detail("Token", endpoint.token);
.detail("Token", endpoint.token);
if(peer->peerReferences == 0 && peer->reliable.empty() && peer->unsent.empty()) {
if(peer->peerReferences == 0 && peer->reliable.empty() && peer->unsent.empty() && peer->outstandingReplies==0) {
@ -1143,137 +1124,143 @@ void FlowTransport::addWellKnownEndpoint( Endpoint& endpoint, NetworkMessageRece
ASSERT( endpoint.token == otoken );
ASSERT( endpoint.token == otoken );
static PacketID sendPacket( TransportData* self, ISerializeSource const& what, const Endpoint& destination, bool reliable, bool openConnection ) {
static void sendLocal( TransportData* self, ISerializeSource const& what, const Endpoint& destination ) {
if (self->isLocalAddress(destination.getPrimaryAddress())) {
TEST(true); // "Loopback" delivery
TEST(true); // "Loopback" delivery
// SOMEDAY: Would it be better to avoid (de)serialization by doing this check in flow?
// SOMEDAY: Would it be better to avoid (de)serialization by doing this check in flow?
Standalone<StringRef> copy;
Standalone<StringRef> copy;
ObjectWriter wr(AssumeVersion(currentProtocolVersion));
ObjectWriter wr(AssumeVersion(currentProtocolVersion));
copy = wr.toStringRef();
copy = wr.toStringRef();
} else {
} else {
BinaryWriter wr( AssumeVersion(currentProtocolVersion) );
BinaryWriter wr( AssumeVersion(currentProtocolVersion) );
copy = wr.toValue();
copy = wr.toValue();
VALGRIND_CHECK_MEM_IS_DEFINED(copy.begin(), copy.size());
VALGRIND_CHECK_MEM_IS_DEFINED(copy.begin(), copy.size());
ASSERT(copy.size() > 0);
ASSERT(copy.size() > 0);
deliver(self, destination, ArenaReader(copy.arena(), copy, AssumeVersion(currentProtocolVersion)), false);
deliver(self, destination, ArenaReader(copy.arena(), copy, AssumeVersion(currentProtocolVersion)), false);
return (PacketID)nullptr;
static ReliablePacket* sendPacket( TransportData* self, Reference<Peer> peer, ISerializeSource const& what, const Endpoint& destination, bool reliable ) {
} else {
const bool checksumEnabled = !destination.getPrimaryAddress().isTLS();
const bool checksumEnabled = !destination.getPrimaryAddress().isTLS();
Reference<Peer> peer = self->getPeer(destination.getPrimaryAddress(), openConnection);
// If there isn't an open connection, a public address, or the peer isn't compatible, we can't send
if (!peer || (peer->outgoingConnectionIdle && !destination.getPrimaryAddress().isPublic()) || (peer->incompatibleProtocolVersionNewer && destination.token != WLTOKEN_PING_PACKET)) {
TEST(true); // Can't send to private address without a compatible open connection
return nullptr;
// If there isn't an open connection, a public address, or the peer isn't compatible, we can't send
bool firstUnsent = peer->unsent.empty();
if (!peer || (peer->outgoingConnectionIdle && !destination.getPrimaryAddress().isPublic()) || (peer->incompatibleProtocolVersionNewer && destination.token != WLTOKEN_PING_PACKET)) {
TEST(true); // Can't send to private address without a compatible open connection
PacketBuffer* pb = peer->unsent.getWriteBuffer();
return (PacketID)nullptr;
ReliablePacket* rp = reliable ? new ReliablePacket : 0;
int prevBytesWritten = pb->bytes_written;
PacketBuffer* checksumPb = pb;
PacketWriter wr(pb,rp,AssumeVersion(currentProtocolVersion)); // SOMEDAY: Can we downgrade to talk to older peers?
// Reserve some space for packet length and checksum, write them after serializing data
SplitBuffer packetInfoBuffer;
uint32_t len, checksum = 0;
int packetInfoSize = sizeof(len);
if (checksumEnabled) {
packetInfoSize += sizeof(checksum);
wr.writeAhead(packetInfoSize , &packetInfoBuffer);
wr << destination.token;
what.serializePacketWriter(wr, FLOW_KNOBS->USE_OBJECT_SERIALIZER);
pb = wr.finish();
len = wr.size() - packetInfoSize;
if (checksumEnabled) {
// Find the correct place to start calculating checksum
uint32_t checksumUnprocessedLength = len;
prevBytesWritten += packetInfoSize;
if (prevBytesWritten >= checksumPb->bytes_written) {
prevBytesWritten -= checksumPb->bytes_written;
checksumPb = checksumPb->nextPacketBuffer();
bool firstUnsent = peer->unsent.empty();
// Checksum calculation
while (checksumUnprocessedLength > 0) {
PacketBuffer* pb = peer->unsent.getWriteBuffer();
uint32_t processLength =
ReliablePacket* rp = reliable ? new ReliablePacket : 0;
std::min(checksumUnprocessedLength, (uint32_t)(checksumPb->bytes_written - prevBytesWritten));
checksum = crc32c_append(checksum, checksumPb->data() + prevBytesWritten, processLength);
int prevBytesWritten = pb->bytes_written;
checksumUnprocessedLength -= processLength;
PacketBuffer* checksumPb = pb;
checksumPb = checksumPb->nextPacketBuffer();
prevBytesWritten = 0;
PacketWriter wr(pb,rp,AssumeVersion(currentProtocolVersion)); // SOMEDAY: Can we downgrade to talk to older peers?
// Reserve some space for packet length and checksum, write them after serializing data
SplitBuffer packetInfoBuffer;
uint32_t len, checksum = 0;
int packetInfoSize = sizeof(len);
if (checksumEnabled) {
packetInfoSize += sizeof(checksum);
wr.writeAhead(packetInfoSize , &packetInfoBuffer);
// Write packet length and checksum into packet buffer
wr << destination.token;
packetInfoBuffer.write(&len, sizeof(len));
what.serializePacketWriter(wr, FLOW_KNOBS->USE_OBJECT_SERIALIZER);
if (checksumEnabled) {
pb = wr.finish();
packetInfoBuffer.write(&checksum, sizeof(checksum), sizeof(len));
len = wr.size() - packetInfoSize;
if (checksumEnabled) {
// Find the correct place to start calculating checksum
TraceEvent(SevError, "Net2_PacketLimitExceeded").detail("ToPeer", destination.getPrimaryAddress()).detail("Length", (int)len);
uint32_t checksumUnprocessedLength = len;
// throw platform_error(); // FIXME: How to recover from this situation?
prevBytesWritten += packetInfoSize;
if (prevBytesWritten >= checksumPb->bytes_written) {
else if (len > FLOW_KNOBS->PACKET_WARNING) {
prevBytesWritten -= checksumPb->bytes_written;
TraceEvent(self->warnAlwaysForLargePacket ? SevWarnAlways : SevWarn, "Net2_LargePacket")
checksumPb = checksumPb->nextPacketBuffer();
.detail("ToPeer", destination.getPrimaryAddress())
.detail("Length", (int)len)
.detail("Token", destination.token)
// Checksum calculation
while (checksumUnprocessedLength > 0) {
self->warnAlwaysForLargePacket = false;
uint32_t processLength =
std::min(checksumUnprocessedLength, (uint32_t)(checksumPb->bytes_written - prevBytesWritten));
checksum = crc32c_append(checksum, checksumPb->data() + prevBytesWritten, processLength);
checksumUnprocessedLength -= processLength;
checksumPb = checksumPb->nextPacketBuffer();
prevBytesWritten = 0;
// Write packet length and checksum into packet buffer
packetInfoBuffer.write(&len, sizeof(len));
if (checksumEnabled) {
packetInfoBuffer.write(&checksum, sizeof(checksum), sizeof(len));
TraceEvent(SevError, "Net2_PacketLimitExceeded").detail("ToPeer", destination.getPrimaryAddress()).detail("Length", (int)len);
// throw platform_error(); // FIXME: How to recover from this situation?
else if (len > FLOW_KNOBS->PACKET_WARNING) {
TraceEvent(self->warnAlwaysForLargePacket ? SevWarnAlways : SevWarn, "Net2_LargePacket")
.detail("ToPeer", destination.getPrimaryAddress())
.detail("Length", (int)len)
.detail("Token", destination.token)
self->warnAlwaysForLargePacket = false;
SendBuffer *checkbuf = pb;
SendBuffer *checkbuf = pb;
while (checkbuf) {
while (checkbuf) {
int size = checkbuf->bytes_written;
int size = checkbuf->bytes_written;
const uint8_t* data = checkbuf->data;
const uint8_t* data = checkbuf->data;
checkbuf = checkbuf -> next;
checkbuf = checkbuf -> next;
peer->send(pb, rp, firstUnsent);
peer->send(pb, rp, firstUnsent);
if (destination.token != WLTOKEN_PING_PACKET) {
if (destination.token != WLTOKEN_PING_PACKET) {
peer->lastDataPacketSentTime = now();
peer->lastDataPacketSentTime = now();
return (PacketID)rp;
return rp;
PacketID FlowTransport::sendReliable( ISerializeSource const& what, const Endpoint& destination ) {
ReliablePacket* FlowTransport::sendReliable( ISerializeSource const& what, const Endpoint& destination ) {
return sendPacket( self, what, destination, true, true );
if (self->isLocalAddress(destination.getPrimaryAddress())) {
sendLocal( self, what, destination );
return nullptr;
Reference<Peer> peer = self->getPeer(destination.getPrimaryAddress());
return sendPacket( self, peer, what, destination, true );
void FlowTransport::cancelReliable( PacketID pid ) {
void FlowTransport::cancelReliable( ReliablePacket* p ) {
ReliablePacket* p = (ReliablePacket*)pid;
if (p) p->remove();
if (p) p->remove();
// SOMEDAY: Call reliable.compact() if a lot of memory is wasted in PacketBuffers by formerly reliable packets mixed with a few reliable ones. Don't forget to delref the new PacketBuffers since they are unsent.
// SOMEDAY: Call reliable.compact() if a lot of memory is wasted in PacketBuffers by formerly reliable packets mixed with a few reliable ones. Don't forget to delref the new PacketBuffers since they are unsent.
void FlowTransport::sendUnreliable( ISerializeSource const& what, const Endpoint& destination, bool openConnection ) {
Reference<Peer> FlowTransport::sendUnreliable( ISerializeSource const& what, const Endpoint& destination, bool openConnection ) {
sendPacket( self, what, destination, false, openConnection );
if (self->isLocalAddress(destination.getPrimaryAddress())) {
sendLocal( self, what, destination );
return Reference<Peer>();
Reference<Peer> peer = self->getPeer(destination.getPrimaryAddress(), openConnection);
sendPacket( self, peer, what, destination, false );
return peer;
int FlowTransport::getEndpointCount() {
int FlowTransport::getEndpointCount() {