Skip to content
This repository has been archived by the owner on Dec 23, 2023. It is now read-only.

Commit

Permalink
implement gRPC client retry stats measures and views (#2084)
Browse files Browse the repository at this point in the history
* implement gRPC client retry stats measures and views
based on https://github.com/grpc/proposal/blob/master/A45-retry-stats.md#metrics-to-expose

* fix aggregation arguments for new RpcViewConstants

* attempt to fix checkstyle

* add more sensible bucket boundaries for retry per call histograms

* remove extraneous buckets and make >= 5 the upper bound

* apply fix pointed out by @asafdav2
  • Loading branch information
mackenziestarr authored Jan 24, 2022
1 parent 81225af commit 2e90f49
Show file tree
Hide file tree
Showing 6 changed files with 157 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,37 @@ public final class RpcMeasureConstants {
Measure.MeasureLong.create(
"grpc.io/client/started_rpcs", "Number of started client RPCs.", COUNT);

/**
* {@link Measure} for total number of retry or hedging attempts excluding transparent retries
* made during the client call.
*
* @since 0.31.0
*/
public static final MeasureLong GRPC_CLIENT_RETRIES_PER_CALL =
Measure.MeasureLong.create(
"grpc.io/client/retries_per_call", "Number of retries per call.", COUNT);

/**
* {@link Measure} for total number of transparent retries made during the client call.
*
* @since 0.28
*/
public static final MeasureLong GRPC_CLIENT_TRANSPARENT_RETRIES_PER_CALL =
Measure.MeasureLong.create(
"grpc.io/client/transparent_retries_per_call",
"Number of transparent retries per call.",
COUNT);

/**
* {@link Measure} for total time of delay while there is no active attempt during the client
* call.
*
* @since 0.28
*/
public static final MeasureLong GRPC_CLIENT_RETRY_DELAY_PER_CALL =
Measure.MeasureLong.create(
"grpc.io/client/retry_delay_per_call", "Retry delay per call.", MILLISECOND);

/**
* {@link Measure} for gRPC client error counts.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
import static io.opencensus.contrib.grpc.metrics.RpcMeasureConstants.GRPC_CLIENT_RECEIVED_BYTES_PER_RPC;
import static io.opencensus.contrib.grpc.metrics.RpcMeasureConstants.GRPC_CLIENT_RECEIVED_MESSAGES_PER_METHOD;
import static io.opencensus.contrib.grpc.metrics.RpcMeasureConstants.GRPC_CLIENT_RECEIVED_MESSAGES_PER_RPC;
import static io.opencensus.contrib.grpc.metrics.RpcMeasureConstants.GRPC_CLIENT_RETRIES_PER_CALL;
import static io.opencensus.contrib.grpc.metrics.RpcMeasureConstants.GRPC_CLIENT_RETRY_DELAY_PER_CALL;
import static io.opencensus.contrib.grpc.metrics.RpcMeasureConstants.GRPC_CLIENT_ROUNDTRIP_LATENCY;
import static io.opencensus.contrib.grpc.metrics.RpcMeasureConstants.GRPC_CLIENT_SENT_BYTES_PER_METHOD;
import static io.opencensus.contrib.grpc.metrics.RpcMeasureConstants.GRPC_CLIENT_SENT_BYTES_PER_RPC;
Expand All @@ -29,6 +31,7 @@
import static io.opencensus.contrib.grpc.metrics.RpcMeasureConstants.GRPC_CLIENT_SERVER_LATENCY;
import static io.opencensus.contrib.grpc.metrics.RpcMeasureConstants.GRPC_CLIENT_STARTED_RPCS;
import static io.opencensus.contrib.grpc.metrics.RpcMeasureConstants.GRPC_CLIENT_STATUS;
import static io.opencensus.contrib.grpc.metrics.RpcMeasureConstants.GRPC_CLIENT_TRANSPARENT_RETRIES_PER_CALL;
import static io.opencensus.contrib.grpc.metrics.RpcMeasureConstants.GRPC_SERVER_METHOD;
import static io.opencensus.contrib.grpc.metrics.RpcMeasureConstants.GRPC_SERVER_RECEIVED_BYTES_PER_METHOD;
import static io.opencensus.contrib.grpc.metrics.RpcMeasureConstants.GRPC_SERVER_RECEIVED_BYTES_PER_RPC;
Expand Down Expand Up @@ -131,6 +134,10 @@ public final class RpcViewConstants {
0.0, 1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0, 512.0, 1024.0, 2048.0,
4096.0, 8192.0, 16384.0, 32768.0, 65536.0));

@VisibleForTesting
static final List<Double> RETRY_COUNT_PER_CALL_BUCKET_BOUNDARIES =
Collections.unmodifiableList(Arrays.asList(1.0, 2.0, 3.0, 4.0, 5.0));

// Use Aggregation.Mean to record sum and count stats at the same time.
@VisibleForTesting static final Aggregation MEAN = Aggregation.Mean.create();
@VisibleForTesting static final Aggregation COUNT = Count.create();
Expand All @@ -151,6 +158,10 @@ public final class RpcViewConstants {
static final Aggregation AGGREGATION_WITH_COUNT_HISTOGRAM =
Distribution.create(BucketBoundaries.create(RPC_COUNT_BUCKET_BOUNDARIES));

@VisibleForTesting
static final Aggregation AGGREGATION_WITH_COUNT_RETRY_HISTOGRAM =
Distribution.create(BucketBoundaries.create(RETRY_COUNT_PER_CALL_BUCKET_BOUNDARIES));

@VisibleForTesting static final Duration MINUTE = Duration.create(60, 0);
@VisibleForTesting static final Duration HOUR = Duration.create(60 * 60, 0);

Expand Down Expand Up @@ -502,6 +513,71 @@ public final class RpcViewConstants {
COUNT,
Arrays.asList(GRPC_CLIENT_METHOD));

/**
* {@link View} for client retries per call.
*
* @since 0.28
*/
public static final View GRPC_CLIENT_RETRIES_PER_CALL_VIEW =
View.create(
View.Name.create("grpc.io/client/retries_per_call"),
"Number of client retries per call",
GRPC_CLIENT_RETRIES_PER_CALL,
AGGREGATION_WITH_COUNT_RETRY_HISTOGRAM,
Arrays.asList(GRPC_CLIENT_METHOD));

/**
* {@link View} for total transparent client retries across calls.
*
* @since 0.28
*/
public static final View GRPC_CLIENT_TRANSPARENT_RETRIES_VIEW =
View.create(
View.Name.create("grpc.io/client/transparent_retries"),
"Total number of transparent client retries across calls",
GRPC_CLIENT_TRANSPARENT_RETRIES_PER_CALL,
SUM,
Arrays.asList(GRPC_CLIENT_METHOD));

/**
* {@link View} for total time of delay while there is no active attempt during the client call.
*
* @since 0.28
*/
public static final View GRPC_CLIENT_RETRY_DELAY_PER_CALL_VIEW =
View.create(
View.Name.create("grpc.io/client/retry_delay_per_call"),
"Total time of delay while there is no active attempt during the client call",
GRPC_CLIENT_RETRY_DELAY_PER_CALL,
AGGREGATION_WITH_MILLIS_HISTOGRAM,
Arrays.asList(GRPC_CLIENT_METHOD));

/**
* {@link View} for total retries across all calls, excluding transparent retries.
*
* @since 0.28
*/
public static final View GRPC_CLIENT_RETRIES_VIEW =
View.create(
View.Name.create("grpc.io/client/retries"),
"Total number of client retries across all calls",
GRPC_CLIENT_RETRIES_PER_CALL,
SUM,
Arrays.asList(GRPC_CLIENT_METHOD));

/**
* {@link View} for transparent retries per call.
*
* @since 0.28
*/
public static final View GRPC_CLIENT_TRANSPARENT_RETRIES_PER_CALL_VIEW =
View.create(
View.Name.create("grpc.io/client/transparent_retries_per_call"),
"Number of transparent client retries per call",
GRPC_CLIENT_TRANSPARENT_RETRIES_PER_CALL,
AGGREGATION_WITH_COUNT_RETRY_HISTOGRAM,
Arrays.asList(GRPC_CLIENT_METHOD));

// Rpc server cumulative views.

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,15 @@ public final class RpcViews {
RpcViewConstants.GRPC_CLIENT_ROUNDTRIP_LATENCY_VIEW,
RpcViewConstants.GRPC_CLIENT_STARTED_RPC_VIEW);

@VisibleForTesting
static final ImmutableSet<View> GRPC_CLIENT_RETRY_VIEWS_SET =
ImmutableSet.of(
RpcViewConstants.GRPC_CLIENT_RETRIES_PER_CALL_VIEW,
RpcViewConstants.GRPC_CLIENT_RETRIES_VIEW,
RpcViewConstants.GRPC_CLIENT_TRANSPARENT_RETRIES_PER_CALL_VIEW,
RpcViewConstants.GRPC_CLIENT_TRANSPARENT_RETRIES_VIEW,
RpcViewConstants.GRPC_CLIENT_RETRY_DELAY_PER_CALL_VIEW);

@VisibleForTesting
static final ImmutableSet<View> GRPC_SERVER_BASIC_VIEWS_SET =
ImmutableSet.of(
Expand Down Expand Up @@ -188,6 +197,24 @@ static void registerClientGrpcViews(ViewManager viewManager) {
}
}

/**
* Registers client retry gRPC views.
*
* <p>It is recommended to call this method before doing any RPC call to avoid missing stats.
*
* @since 0.31.0
*/
public static void registerClientRetryGrpcViews() {
registerClientRetryGrpcViews(Stats.getViewManager());
}

@VisibleForTesting
static void registerClientRetryGrpcViews(ViewManager viewManager) {
for (View view : GRPC_CLIENT_RETRY_VIEWS_SET) {
viewManager.registerView(view);
}
}

/**
* Registers all standard server gRPC views.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,9 @@ public void testConstants() {
assertThat(RpcMeasureConstants.GRPC_CLIENT_SERVER_LATENCY).isNotNull();
assertThat(RpcMeasureConstants.GRPC_CLIENT_ROUNDTRIP_LATENCY).isNotNull();
assertThat(RpcMeasureConstants.GRPC_CLIENT_STARTED_RPCS).isNotNull();
assertThat(RpcMeasureConstants.GRPC_CLIENT_RETRIES_PER_CALL).isNotNull();
assertThat(RpcMeasureConstants.GRPC_CLIENT_TRANSPARENT_RETRIES_PER_CALL).isNotNull();
assertThat(RpcMeasureConstants.GRPC_CLIENT_RETRY_DELAY_PER_CALL).isNotNull();

// Test server measurement descriptors.
assertThat(RpcMeasureConstants.RPC_SERVER_ERROR_COUNT).isNotNull();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,9 @@ public void testConstants() {
0.0, 1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0, 512.0, 1024.0, 2048.0, 4096.0,
8192.0, 16384.0, 32768.0, 65536.0)
.inOrder();
assertThat(RpcViewConstants.RETRY_COUNT_PER_CALL_BUCKET_BOUNDARIES)
.containsExactly(1.0, 2.0, 3.0, 4.0, 5.0)
.inOrder();

// Test Aggregations
assertThat(RpcViewConstants.MEAN).isEqualTo(Mean.create());
Expand All @@ -82,7 +85,10 @@ public void testConstants() {
.isEqualTo(
Distribution.create(
BucketBoundaries.create(RpcViewConstants.RPC_COUNT_BUCKET_BOUNDARIES)));

assertThat(RpcViewConstants.AGGREGATION_WITH_COUNT_RETRY_HISTOGRAM)
.isEqualTo(
Distribution.create(
BucketBoundaries.create(RpcViewConstants.RETRY_COUNT_PER_CALL_BUCKET_BOUNDARIES)));
// Test Duration and Window
assertThat(RpcViewConstants.MINUTE).isEqualTo(Duration.create(60, 0));
assertThat(RpcViewConstants.HOUR).isEqualTo(Duration.create(60 * 60, 0));
Expand Down Expand Up @@ -112,7 +118,11 @@ public void testConstants() {
assertThat(RpcViewConstants.GRPC_CLIENT_SENT_MESSAGES_PER_METHOD_VIEW).isNotNull();
assertThat(RpcViewConstants.GRPC_CLIENT_RECEIVED_MESSAGES_PER_METHOD_VIEW).isNotNull();
assertThat(RpcViewConstants.GRPC_CLIENT_SERVER_LATENCY_VIEW).isNotNull();
assertThat(RpcViewConstants.GRPC_CLIENT_STARTED_RPC_VIEW).isNotNull();
assertThat(RpcViewConstants.GRPC_CLIENT_RETRIES_PER_CALL_VIEW).isNotNull();
assertThat(RpcViewConstants.GRPC_CLIENT_RETRIES_VIEW).isNotNull();
assertThat(RpcViewConstants.GRPC_CLIENT_TRANSPARENT_RETRIES_PER_CALL_VIEW).isNotNull();
assertThat(RpcViewConstants.GRPC_CLIENT_TRANSPARENT_RETRIES_VIEW).isNotNull();
assertThat(RpcViewConstants.GRPC_CLIENT_RETRY_DELAY_PER_CALL_VIEW).isNotNull();

// Test server distribution view descriptors.
assertThat(RpcViewConstants.RPC_SERVER_ERROR_COUNT_VIEW).isNotNull();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,14 @@ public void registerClientGrpcViews() {
.containsExactlyElementsIn(RpcViews.GRPC_CLIENT_VIEWS_SET);
}

@Test
public void registerClientRetryGrpcViews() {
FakeViewManager fakeViewManager = new FakeViewManager();
RpcViews.registerClientRetryGrpcViews(fakeViewManager);
assertThat(fakeViewManager.getRegisteredViews())
.containsExactlyElementsIn(RpcViews.GRPC_CLIENT_RETRY_VIEWS_SET);
}

@Test
public void registerServerGrpcViews() {
FakeViewManager fakeViewManager = new FakeViewManager();
Expand Down

0 comments on commit 2e90f49

Please sign in to comment.