From 90e3dcf1aea17eecf472f341363304bee252a7c0 Mon Sep 17 00:00:00 2001 From: ZHANG Dapeng Date: Thu, 3 Dec 2020 14:07:24 -0800 Subject: [PATCH] xds: provide more debug info for priority lb error-picker When all priorities fail, update the overall picker with the picker from the last priority. This still does not provide the full detail why all priorities fail but is better than nothing being provided currently. --- .../io/grpc/xds/PriorityLoadBalancer.java | 6 ++- .../io/grpc/xds/PriorityLoadBalancerTest.java | 43 +++++++++++++------ 2 files changed, 34 insertions(+), 15 deletions(-) diff --git a/xds/src/main/java/io/grpc/xds/PriorityLoadBalancer.java b/xds/src/main/java/io/grpc/xds/PriorityLoadBalancer.java index d5876fd743..cace127b35 100644 --- a/xds/src/main/java/io/grpc/xds/PriorityLoadBalancer.java +++ b/xds/src/main/java/io/grpc/xds/PriorityLoadBalancer.java @@ -153,7 +153,9 @@ final class PriorityLoadBalancer extends LoadBalancer { } // TODO(zdapeng): Include error details of each priority. logger.log(XdsLogLevel.DEBUG, "All priority failed"); - updateOverallState(TRANSIENT_FAILURE, new ErrorPicker(Status.UNAVAILABLE)); + String lastPriority = priorityNames.get(priorityNames.size() - 1); + SubchannelPicker errorPicker = children.get(lastPriority).picker; + updateOverallState(TRANSIENT_FAILURE, errorPicker); } private void updateOverallState(ConnectivityState state, SubchannelPicker picker) { @@ -190,6 +192,8 @@ final class PriorityLoadBalancer extends LoadBalancer { // The child is deactivated. return; } + picker = new ErrorPicker( + Status.UNAVAILABLE.withDescription("Connection timeout for priority " + priority)); logger.log(XdsLogLevel.DEBUG, "Priority {0} failed over to next", priority); tryNextPriority(true); } diff --git a/xds/src/test/java/io/grpc/xds/PriorityLoadBalancerTest.java b/xds/src/test/java/io/grpc/xds/PriorityLoadBalancerTest.java index 390b35a8a2..fb522193ae 100644 --- a/xds/src/test/java/io/grpc/xds/PriorityLoadBalancerTest.java +++ b/xds/src/test/java/io/grpc/xds/PriorityLoadBalancerTest.java @@ -281,7 +281,7 @@ public class PriorityLoadBalancerTest { return PickResult.withSubchannel(subchannel0); } }); - assertLatestSubchannelPicker(subchannel0); + assertCurrentPickerPicksSubchannel(subchannel0); // p0 fails over to p1 immediately. helper0.updateBalancingState(TRANSIENT_FAILURE, new ErrorPicker(Status.ABORTED)); @@ -308,7 +308,7 @@ public class PriorityLoadBalancerTest { return PickResult.withSubchannel(subchannel1); } }); - assertLatestSubchannelPicker(subchannel1); + assertCurrentPickerPicksSubchannel(subchannel1); // p0 gets back to READY final Subchannel subchannel2 = mock(Subchannel.class); @@ -320,11 +320,11 @@ public class PriorityLoadBalancerTest { return PickResult.withSubchannel(subchannel2); } }); - assertLatestSubchannelPicker(subchannel2); + assertCurrentPickerPicksSubchannel(subchannel2); // p2 fails but does not affect overall picker helper2.updateBalancingState(TRANSIENT_FAILURE, new ErrorPicker(Status.UNAVAILABLE)); - assertLatestSubchannelPicker(subchannel2); + assertCurrentPickerPicksSubchannel(subchannel2); // p0 fails over to p3 immediately since p1 already timeout and p2 already in TRANSIENT_FAILURE. helper0.updateBalancingState(TRANSIENT_FAILURE, new ErrorPicker(Status.UNAVAILABLE)); @@ -334,9 +334,14 @@ public class PriorityLoadBalancerTest { LoadBalancer balancer3 = Iterables.getLast(fooBalancers); Helper helper3 = Iterables.getLast(fooHelpers); - // p3 fails then the channel should go to TRANSIENT_FAILURE - helper3.updateBalancingState(TRANSIENT_FAILURE, new ErrorPicker(Status.UNAVAILABLE)); - assertLatestConnectivityState(TRANSIENT_FAILURE); + // p3 timeout then the channel should go to TRANSIENT_FAILURE + fakeClock.forwardTime(10, TimeUnit.SECONDS); + assertCurrentPickerReturnsError(Status.Code.UNAVAILABLE, "timeout"); + + // p3 fails then the picker should have error status updated + helper3.updateBalancingState( + TRANSIENT_FAILURE, new ErrorPicker(Status.DATA_LOSS.withDescription("foo"))); + assertCurrentPickerReturnsError(Status.Code.DATA_LOSS, "foo"); // p2 gets back to READY final Subchannel subchannel3 = mock(Subchannel.class); @@ -348,7 +353,7 @@ public class PriorityLoadBalancerTest { return PickResult.withSubchannel(subchannel3); } }); - assertLatestSubchannelPicker(subchannel3); + assertCurrentPickerPicksSubchannel(subchannel3); // p0 gets back to READY final Subchannel subchannel4 = mock(Subchannel.class); @@ -360,11 +365,11 @@ public class PriorityLoadBalancerTest { return PickResult.withSubchannel(subchannel4); } }); - assertLatestSubchannelPicker(subchannel4); + assertCurrentPickerPicksSubchannel(subchannel4); // p0 fails over to p2 and picker is updated to p2's existing picker. helper0.updateBalancingState(TRANSIENT_FAILURE, new ErrorPicker(Status.UNAVAILABLE)); - assertLatestSubchannelPicker(subchannel3); + assertCurrentPickerPicksSubchannel(subchannel3); // Deactivate child balancer get deleted. fakeClock.forwardTime(15, TimeUnit.MINUTES); @@ -380,10 +385,20 @@ public class PriorityLoadBalancerTest { assertThat(connectivityStateCaptor.getValue()).isEqualTo(expectedState); } - private void assertLatestSubchannelPicker(Subchannel expectedSubchannelToPick) { + private void assertCurrentPickerReturnsError( + Status.Code expectedCode, String expectedDescription) { + assertLatestConnectivityState(TRANSIENT_FAILURE); + Status error = + pickerCaptor.getValue().pickSubchannel(mock(PickSubchannelArgs.class)).getStatus(); + assertThat(error.getCode()).isEqualTo(expectedCode); + if (expectedDescription != null) { + assertThat(error.getDescription()).contains(expectedDescription); + } + } + + private void assertCurrentPickerPicksSubchannel(Subchannel expectedSubchannelToPick) { assertLatestConnectivityState(READY); - assertThat( - pickerCaptor.getValue().pickSubchannel(mock(PickSubchannelArgs.class)).getSubchannel()) - .isEqualTo(expectedSubchannelToPick); + PickResult pickResult = pickerCaptor.getValue().pickSubchannel(mock(PickSubchannelArgs.class)); + assertThat(pickResult.getSubchannel()).isEqualTo(expectedSubchannelToPick); } }