Skip to content

Commit 5251219

Browse files
authored
feat(spanner): Cleanup GcpFallbackChannel creation and enable by default (#12707)
* Clean up setupGcpFallback by using new getDecoratedChannelBuilder instead of capturing cloudpath ChannelConfigurator * Set EEF period to 3 minutes * Enable fallback by default (still only used when direct access is enabled)
1 parent f1faf60 commit 5251219

3 files changed

Lines changed: 92 additions & 129 deletions

File tree

java-spanner/google-cloud-spanner/src/main/java/com/google/cloud/spanner/SpannerOptions.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1041,7 +1041,7 @@ default boolean isEnableDirectAccess() {
10411041
}
10421042

10431043
default boolean isEnableGcpFallback() {
1044-
return false;
1044+
return true;
10451045
}
10461046

10471047
default boolean isEnableBuiltInMetrics() {
@@ -1136,7 +1136,8 @@ public boolean isEnableDirectAccess() {
11361136

11371137
@Override
11381138
public boolean isEnableGcpFallback() {
1139-
return Boolean.parseBoolean(System.getenv(GOOGLE_SPANNER_ENABLE_GCP_FALLBACK));
1139+
String enableGcpFallback = System.getenv(GOOGLE_SPANNER_ENABLE_GCP_FALLBACK);
1140+
return enableGcpFallback == null ? true : Boolean.parseBoolean(enableGcpFallback);
11401141
}
11411142

11421143
@Override

java-spanner/google-cloud-spanner/src/main/java/com/google/cloud/spanner/spi/v1/GapicSpannerRpc.java

Lines changed: 6 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,6 @@
230230
import java.util.concurrent.Future;
231231
import java.util.concurrent.ScheduledExecutorService;
232232
import java.util.concurrent.TimeUnit;
233-
import java.util.concurrent.atomic.AtomicReference;
234233
import java.util.stream.Collectors;
235234
import java.util.stream.Stream;
236235
import javax.annotation.Nullable;
@@ -587,6 +586,7 @@ GcpFallbackChannelOptions createFallbackChannelOptions(
587586
.setPrimaryChannelName("directpath")
588587
.setFallbackChannelName("cloudpath")
589588
.setMinFailedCalls(minFailedCalls)
589+
.setPeriod(Duration.ofMinutes(3))
590590
.setGcpFallbackOpenTelemetry(fallbackTelemetry)
591591
.build();
592592
}
@@ -659,27 +659,13 @@ private void setupGcpFallback(
659659
createChannelProviderBuilder(
660660
options, headerProviderWithUserAgent, /* isEnableDirectAccess= */ false);
661661

662-
final ApiFunction<ManagedChannelBuilder, ManagedChannelBuilder> existingCloudPathConfigurator =
663-
cloudPathProviderBuilder.getChannelConfigurator();
664-
final AtomicReference<ManagedChannelBuilder> cloudPathBuilderRef = new AtomicReference<>();
665-
cloudPathProviderBuilder.setChannelConfigurator(
666-
builder -> {
667-
ManagedChannelBuilder effectiveBuilder = builder;
668-
if (existingCloudPathConfigurator != null) {
669-
effectiveBuilder = existingCloudPathConfigurator.apply(effectiveBuilder);
670-
}
671-
cloudPathBuilderRef.set(effectiveBuilder);
672-
return effectiveBuilder;
673-
});
674-
675-
// Build the cloudPathProvider to extract the builder which will be provided to
676-
// FallbackChannelBuilder.
677-
try (TransportChannel ignored = cloudPathProviderBuilder.build().getTransportChannel()) {
678-
} catch (Exception e) {
662+
InstantiatingGrpcChannelProvider cloudPathProvider = cloudPathProviderBuilder.build();
663+
ManagedChannelBuilder cloudPathBuilder;
664+
try {
665+
cloudPathBuilder = cloudPathProvider.createDecoratedChannelBuilder();
666+
} catch (IOException e) {
679667
throw asSpannerException(e);
680668
}
681-
682-
ManagedChannelBuilder cloudPathBuilder = cloudPathBuilderRef.get();
683669
if (cloudPathBuilder == null) {
684670
throw new IllegalStateException("CloudPath builder was not captured.");
685671
}

java-spanner/google-cloud-spanner/src/test/java/com/google/cloud/spanner/spi/v1/GapicSpannerRpcTest.java

Lines changed: 83 additions & 107 deletions
Original file line numberDiff line numberDiff line change
@@ -1442,61 +1442,49 @@ public void testFallbackIntegration_doesNotSwitchWhenThresholdNotMet() throws Ex
14421442
OpenTelemetrySdk openTelemetry =
14431443
OpenTelemetrySdk.builder().setMeterProvider(meterProvider).build();
14441444

1445-
SpannerOptions.useEnvironment(
1446-
new SpannerOptions.SpannerEnvironment() {
1447-
@Override
1448-
public boolean isEnableGcpFallback() {
1449-
return true;
1450-
}
1451-
});
1445+
SpannerOptions.Builder builder =
1446+
SpannerOptions.newBuilder()
1447+
.setProjectId("test-project")
1448+
.setEnableDirectAccess(true)
1449+
.setHost("http://localhost:1") // Closed port
1450+
.setCredentials(NoCredentials.getInstance())
1451+
.setOpenTelemetry(openTelemetry);
1452+
// Make sure the ExecuteBatchDml RPC fails quickly to keep the test fast.
1453+
// Note that the timeout is actually not used. It is the fact that it does not retry that
1454+
// makes it fail fast.
1455+
builder
1456+
.getSpannerStubSettingsBuilder()
1457+
.executeBatchDmlSettings()
1458+
.setSimpleTimeoutNoRetriesDuration(Duration.ofSeconds(10));
1459+
// Setup Options with invalid host to force error
1460+
SpannerOptions options = builder.build();
1461+
1462+
TestableGapicSpannerRpc rpc = new TestableGapicSpannerRpc(options);
14521463
try {
1453-
SpannerOptions.Builder builder =
1454-
SpannerOptions.newBuilder()
1455-
.setProjectId("test-project")
1456-
.setEnableDirectAccess(true)
1457-
.setHost("http://localhost:1") // Closed port
1458-
.setCredentials(NoCredentials.getInstance())
1459-
.setOpenTelemetry(openTelemetry);
1460-
// Make sure the ExecuteBatchDml RPC fails quickly to keep the test fast.
1461-
// Note that the timeout is actually not used. It is the fact that it does not retry that
1462-
// makes it fail fast.
1463-
builder
1464-
.getSpannerStubSettingsBuilder()
1465-
.executeBatchDmlSettings()
1466-
.setSimpleTimeoutNoRetriesDuration(Duration.ofSeconds(10));
1467-
// Setup Options with invalid host to force error
1468-
SpannerOptions options = builder.build();
1469-
1470-
TestableGapicSpannerRpc rpc = new TestableGapicSpannerRpc(options);
1471-
try {
1472-
// Make a call that is expected to fail
1473-
SpannerException exception =
1474-
assertThrows(
1475-
SpannerException.class,
1476-
() ->
1477-
rpc.executeBatchDml(
1478-
com.google.spanner.v1.ExecuteBatchDmlRequest.newBuilder()
1479-
.setSession("projects/p/instances/i/databases/d/sessions/s")
1480-
.build(),
1481-
null));
1482-
assertEquals(ErrorCode.UNAVAILABLE, exception.getErrorCode());
1483-
1484-
// Wait briefly for the 10ms period to trigger the fallback check
1485-
Thread.sleep(10);
1486-
1487-
// Verify Fallback via Metrics
1488-
Collection<MetricData> metrics = metricReader.collectAllMetrics();
1489-
boolean fallbackOccurred =
1490-
metrics.stream()
1491-
.anyMatch(md -> md.getName().contains("fallback_count") && hasValue(md));
1492-
1493-
assertFalse("Fallback metric should not be present", fallbackOccurred);
1464+
// Make a call that is expected to fail
1465+
SpannerException exception =
1466+
assertThrows(
1467+
SpannerException.class,
1468+
() ->
1469+
rpc.executeBatchDml(
1470+
com.google.spanner.v1.ExecuteBatchDmlRequest.newBuilder()
1471+
.setSession("projects/p/instances/i/databases/d/sessions/s")
1472+
.build(),
1473+
null));
1474+
assertEquals(ErrorCode.UNAVAILABLE, exception.getErrorCode());
1475+
1476+
// Wait briefly for the 10ms period to trigger the fallback check
1477+
Thread.sleep(10);
1478+
1479+
// Verify Fallback via Metrics
1480+
Collection<MetricData> metrics = metricReader.collectAllMetrics();
1481+
boolean fallbackOccurred =
1482+
metrics.stream().anyMatch(md -> md.getName().contains("fallback_count") && hasValue(md));
1483+
1484+
assertFalse("Fallback metric should not be present", fallbackOccurred);
14941485

1495-
} finally {
1496-
rpc.shutdown();
1497-
}
14981486
} finally {
1499-
SpannerOptions.useDefaultEnvironment();
1487+
rpc.shutdown();
15001488
}
15011489
}
15021490

@@ -1533,64 +1521,52 @@ public void testFallbackIntegration_switchesToFallbackOnFailure() throws Excepti
15331521
OpenTelemetrySdk openTelemetry =
15341522
OpenTelemetrySdk.builder().setMeterProvider(meterProvider).build();
15351523

1536-
SpannerOptions.useEnvironment(
1537-
new SpannerOptions.SpannerEnvironment() {
1538-
@Override
1539-
public boolean isEnableGcpFallback() {
1540-
return true;
1541-
}
1542-
});
1524+
SpannerOptions.Builder builder =
1525+
SpannerOptions.newBuilder()
1526+
.setProjectId("test-project")
1527+
.setEnableDirectAccess(true)
1528+
.setHost("http://localhost:1") // Closed port
1529+
.setCredentials(NoCredentials.getInstance())
1530+
.setOpenTelemetry(openTelemetry);
1531+
// Make sure the ExecuteBatchDml RPC fails quickly to keep the test fast.
1532+
// Note that the timeout is actually not used. It is the fact that it does not retry that
1533+
// makes it fail fast.
1534+
builder
1535+
.getSpannerStubSettingsBuilder()
1536+
.executeBatchDmlSettings()
1537+
.setSimpleTimeoutNoRetriesDuration(Duration.ofSeconds(10));
1538+
// Setup Options with invalid host to force error
1539+
SpannerOptions options = builder.build();
1540+
1541+
TestableGapicSpannerRpcWithLowerMinFailedCalls rpc =
1542+
new TestableGapicSpannerRpcWithLowerMinFailedCalls(options);
15431543
try {
1544-
SpannerOptions.Builder builder =
1545-
SpannerOptions.newBuilder()
1546-
.setProjectId("test-project")
1547-
.setEnableDirectAccess(true)
1548-
.setHost("http://localhost:1") // Closed port
1549-
.setCredentials(NoCredentials.getInstance())
1550-
.setOpenTelemetry(openTelemetry);
1551-
// Make sure the ExecuteBatchDml RPC fails quickly to keep the test fast.
1552-
// Note that the timeout is actually not used. It is the fact that it does not retry that
1553-
// makes it fail fast.
1554-
builder
1555-
.getSpannerStubSettingsBuilder()
1556-
.executeBatchDmlSettings()
1557-
.setSimpleTimeoutNoRetriesDuration(Duration.ofSeconds(10));
1558-
// Setup Options with invalid host to force error
1559-
SpannerOptions options = builder.build();
1560-
1561-
TestableGapicSpannerRpcWithLowerMinFailedCalls rpc =
1562-
new TestableGapicSpannerRpcWithLowerMinFailedCalls(options);
1563-
try {
1564-
// Make a call that is expected to fail
1565-
SpannerException exception =
1566-
assertThrows(
1567-
SpannerException.class,
1568-
() ->
1569-
rpc.executeBatchDml(
1570-
com.google.spanner.v1.ExecuteBatchDmlRequest.newBuilder()
1571-
.setSession("projects/p/instances/i/databases/d/sessions/s")
1572-
.build(),
1573-
null));
1574-
assertEquals(ErrorCode.UNAVAILABLE, exception.getErrorCode());
1575-
1576-
// Wait briefly for the 10ms period to trigger the fallback check
1577-
Thread.sleep(10);
1578-
1579-
// Verify Fallback via Metrics
1580-
Collection<MetricData> metrics = metricReader.collectAllMetrics();
1581-
boolean fallbackOccurred =
1582-
metrics.stream()
1583-
.anyMatch(md -> md.getName().contains("fallback_count") && hasValue(md));
1584-
1585-
assertTrue(
1586-
"Fallback metric should be present, indicating GcpFallbackChannel is active",
1587-
fallbackOccurred);
1544+
// Make a call that is expected to fail
1545+
SpannerException exception =
1546+
assertThrows(
1547+
SpannerException.class,
1548+
() ->
1549+
rpc.executeBatchDml(
1550+
com.google.spanner.v1.ExecuteBatchDmlRequest.newBuilder()
1551+
.setSession("projects/p/instances/i/databases/d/sessions/s")
1552+
.build(),
1553+
null));
1554+
assertEquals(ErrorCode.UNAVAILABLE, exception.getErrorCode());
1555+
1556+
// Wait briefly for the 10ms period to trigger the fallback check
1557+
Thread.sleep(10);
1558+
1559+
// Verify Fallback via Metrics
1560+
Collection<MetricData> metrics = metricReader.collectAllMetrics();
1561+
boolean fallbackOccurred =
1562+
metrics.stream().anyMatch(md -> md.getName().contains("fallback_count") && hasValue(md));
1563+
1564+
assertTrue(
1565+
"Fallback metric should be present, indicating GcpFallbackChannel is active",
1566+
fallbackOccurred);
15881567

1589-
} finally {
1590-
rpc.shutdown();
1591-
}
15921568
} finally {
1593-
SpannerOptions.useDefaultEnvironment();
1569+
rpc.shutdown();
15941570
}
15951571
}
15961572

0 commit comments

Comments
 (0)