From 10cb4a3bed4631d4991a2dcbb263c53a6dc607c8 Mon Sep 17 00:00:00 2001 From: Terry Wilson Date: Fri, 22 Mar 2024 14:40:06 -0700 Subject: [PATCH] util: Status desc for outlier detection ejection (#11036) Including a Status description makes it easier to debug subchannel closure issues if it's clear that a subchannel became unavailable because of an outlier detection ejection. --- .../util/OutlierDetectionLoadBalancer.java | 5 ++- .../OutlierDetectionLoadBalancerTest.java | 39 +++++++++++++++---- 2 files changed, 35 insertions(+), 9 deletions(-) diff --git a/util/src/main/java/io/grpc/util/OutlierDetectionLoadBalancer.java b/util/src/main/java/io/grpc/util/OutlierDetectionLoadBalancer.java index c24e238646..033d01ce51 100644 --- a/util/src/main/java/io/grpc/util/OutlierDetectionLoadBalancer.java +++ b/util/src/main/java/io/grpc/util/OutlierDetectionLoadBalancer.java @@ -377,8 +377,9 @@ public final class OutlierDetectionLoadBalancer extends LoadBalancer { void eject() { ejected = true; - subchannelStateListener.onSubchannelState( - ConnectivityStateInfo.forTransientFailure(Status.UNAVAILABLE)); + subchannelStateListener.onSubchannelState(ConnectivityStateInfo.forTransientFailure( + Status.UNAVAILABLE.withDescription( + "The subchannel has been ejected by outlier detection"))); logger.log(ChannelLogLevel.INFO, "Subchannel ejected: {0}", this); } diff --git a/util/src/test/java/io/grpc/util/OutlierDetectionLoadBalancerTest.java b/util/src/test/java/io/grpc/util/OutlierDetectionLoadBalancerTest.java index f8b700dca2..cc1fc3e699 100644 --- a/util/src/test/java/io/grpc/util/OutlierDetectionLoadBalancerTest.java +++ b/util/src/test/java/io/grpc/util/OutlierDetectionLoadBalancerTest.java @@ -19,8 +19,8 @@ package io.grpc.util; import static com.google.common.truth.Truth.assertThat; import static com.google.common.truth.Truth.assertWithMessage; import static io.grpc.ConnectivityState.READY; +import static io.grpc.ConnectivityState.TRANSIENT_FAILURE; import static org.mockito.ArgumentMatchers.any; -import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.times; @@ -50,6 +50,7 @@ import io.grpc.LoadBalancer.SubchannelStateListener; import io.grpc.LoadBalancerProvider; import io.grpc.Metadata; import io.grpc.Status; +import io.grpc.Status.Code; import io.grpc.SynchronizationContext; import io.grpc.internal.FakeClock; import io.grpc.internal.FakeClock.ScheduledTask; @@ -1203,9 +1204,21 @@ public class OutlierDetectionLoadBalancerTest { // The one subchannel that was returning errors should be ejected. assertEjectedSubchannels(ImmutableSet.of(ImmutableSet.copyOf(servers.get(0).getAddresses()))); if (hasHealthConsumer) { - verify(healthListeners.get(servers.get(0))).onSubchannelState(eq( - ConnectivityStateInfo.forTransientFailure(Status.UNAVAILABLE) - )); + ArgumentCaptor csiCaptor = ArgumentCaptor.forClass( + ConnectivityStateInfo.class); + verify(healthListeners.get(servers.get(0)), times(2)).onSubchannelState(csiCaptor.capture()); + List connectivityStateInfos = csiCaptor.getAllValues(); + + // The subchannel went through two state transitions... + assertThat(connectivityStateInfos).hasSize(2); + // ...it first went to the READY state... + assertThat(connectivityStateInfos.get(0).getState()).isEqualTo(READY); + + // ...and then to TRANSIENT_FAILURE as outlier detection ejected it. + assertThat(connectivityStateInfos.get(1).getState()).isEqualTo(TRANSIENT_FAILURE); + assertThat(connectivityStateInfos.get(1).getStatus().getCode()).isEqualTo(Code.UNAVAILABLE); + assertThat(connectivityStateInfos.get(1).getStatus().getDescription()).isEqualTo( + "The subchannel has been ejected by outlier detection"); } } @@ -1264,9 +1277,21 @@ public class OutlierDetectionLoadBalancerTest { // The one subchannel that was returning errors should be ejected. assertEjectedSubchannels(ImmutableSet.of(ImmutableSet.copyOf(servers.get(0).getAddresses()))); if (hasHealthConsumer) { - verify(healthListeners.get(servers.get(0))).onSubchannelState(eq( - ConnectivityStateInfo.forTransientFailure(Status.UNAVAILABLE) - )); + ArgumentCaptor csiCaptor = ArgumentCaptor.forClass( + ConnectivityStateInfo.class); + verify(healthListeners.get(servers.get(0)), times(2)).onSubchannelState(csiCaptor.capture()); + List connectivityStateInfos = csiCaptor.getAllValues(); + + // The subchannel went through two state transitions... + assertThat(connectivityStateInfos).hasSize(2); + // ...it first went to the READY state... + assertThat(connectivityStateInfos.get(0).getState()).isEqualTo(READY); + + // ...and then to TRANSIENT_FAILURE as outlier detection ejected it. + assertThat(connectivityStateInfos.get(1).getState()).isEqualTo(TRANSIENT_FAILURE); + assertThat(connectivityStateInfos.get(1).getStatus().getCode()).isEqualTo(Code.UNAVAILABLE); + assertThat(connectivityStateInfos.get(1).getStatus().getDescription()).isEqualTo( + "The subchannel has been ejected by outlier detection"); } }