mirror of https://github.com/grpc/grpc-java.git
grpclb: should not ignore subchannels with CONNECTING state in aggregating the overall LB state (#7959)
We should treat both IDLE and CONNECTING subchannels as "connection in progress" when aggregating for the overall load balancing state. Otherwise, RPCs could fail prematurely if one subchannel enters TF while all the others are still in CONNECTING.
23d279660c
made each individual subchannel stay in TF until READY if it previously was in TF. So subchannels with CONNECTING state are those in first time connecting. We should give them time to connect.
This commit is contained in:
parent
afe883119d
commit
9c562c8a6f
|
@ -776,7 +776,7 @@ final class GrpclbState {
|
|||
case ROUND_ROBIN:
|
||||
pickList = new ArrayList<>(backendList.size());
|
||||
Status error = null;
|
||||
boolean hasIdle = false;
|
||||
boolean hasPending = false;
|
||||
for (BackendEntry entry : backendList) {
|
||||
Subchannel subchannel = entry.subchannel;
|
||||
Attributes attrs = subchannel.getAttributes();
|
||||
|
@ -785,12 +785,12 @@ final class GrpclbState {
|
|||
pickList.add(entry);
|
||||
} else if (stateInfo.getState() == TRANSIENT_FAILURE) {
|
||||
error = stateInfo.getStatus();
|
||||
} else if (stateInfo.getState() == IDLE) {
|
||||
hasIdle = true;
|
||||
} else {
|
||||
hasPending = true;
|
||||
}
|
||||
}
|
||||
if (pickList.isEmpty()) {
|
||||
if (error != null && !hasIdle) {
|
||||
if (error != null && !hasPending) {
|
||||
pickList.add(new ErrorEntry(error));
|
||||
state = TRANSIENT_FAILURE;
|
||||
} else {
|
||||
|
|
|
@ -1175,9 +1175,10 @@ public class GrpclbLoadBalancerTest {
|
|||
deliverSubchannelState(subchannel2, ConnectivityStateInfo.forNonError(CONNECTING));
|
||||
inOrder.verify(helper).updateBalancingState(eq(CONNECTING), any(SubchannelPicker.class));
|
||||
|
||||
// Switch subchannel1 to TRANSIENT_FAILURE, making the general state TRANSIENT_FAILURE too.
|
||||
Status error = Status.UNAVAILABLE.withDescription("error1");
|
||||
// Switch all subchannels to TRANSIENT_FAILURE, making the general state TRANSIENT_FAILURE too.
|
||||
Status error = Status.UNAVAILABLE.withDescription("error");
|
||||
deliverSubchannelState(subchannel1, ConnectivityStateInfo.forTransientFailure(error));
|
||||
deliverSubchannelState(subchannel2, ConnectivityStateInfo.forTransientFailure(error));
|
||||
inOrder.verify(helper).updateBalancingState(eq(TRANSIENT_FAILURE), pickerCaptor.capture());
|
||||
assertThat(((RoundRobinPicker) pickerCaptor.getValue()).pickList)
|
||||
.containsExactly(new ErrorEntry(error));
|
||||
|
|
Loading…
Reference in New Issue