|
39 | 39 | import com.linkedin.datastream.common.DatastreamRuntimeException; |
40 | 40 | import com.linkedin.datastream.common.DatastreamSource; |
41 | 41 | import com.linkedin.datastream.common.DatastreamUtils; |
| 42 | +import com.linkedin.datastream.common.PollUtils; |
42 | 43 | import com.linkedin.datastream.common.VerifiableProperties; |
43 | 44 | import com.linkedin.datastream.metrics.BrooklinMetricInfo; |
44 | 45 | import com.linkedin.datastream.server.DatastreamTask; |
@@ -71,6 +72,8 @@ public class KafkaTransportProviderAdmin implements TransportProviderAdmin { |
71 | 72 | private static final int DEFAULT_NUMBER_PARTITIONS = 1; |
72 | 73 | private static final String DEFAULT_MIN_INSYNC_REPLICAS_CONFIG_VALUE = "2"; |
73 | 74 | private static final String METADATA_KAFKA_BROKERS = DatastreamMetadataConstants.SYSTEM_DESTINATION_PREFIX + "KafkaBrokers"; |
| 75 | + private static final int GET_RETENTION_RETRY_PERIOD_MS = 2000; |
| 76 | + private static final int GET_RETENTION_RETRY_TIMEOUT_MS = 10000; |
74 | 77 |
|
75 | 78 | private final String _transportProviderMetricsNamesPrefix; |
76 | 79 | private final int _numProducersPerConnector; |
@@ -250,23 +253,26 @@ public Duration getRetention(Datastream datastream) { |
250 | 253 | Validate.notNull(destination, "null destination URI"); |
251 | 254 | String topicName = KafkaTransportProviderUtils.getTopicName(destination); |
252 | 255 |
|
253 | | - try { |
254 | | - // TODO: In rare cases it may happen that even though topic has been created, its metadata hasn't synced |
255 | | - // to all the brokers yet and adminClient might query such a broker in which case topic retention will |
256 | | - // not be present in the topic config. To circumvent this we need to query only the controller node in |
257 | | - // Kafka cluster which will always return successful result, but that is more involved. |
258 | | - ConfigResource topicResource = new ConfigResource(ConfigResource.Type.TOPIC, topicName); |
259 | | - Map<ConfigResource, Config> topicConfigMap = adminClient.describeConfigs(Collections.singletonList(topicResource)).all().get(); |
260 | | - Config topicConfig = topicConfigMap.get(topicResource); |
261 | | - ConfigEntry entry = topicConfig.get(TOPIC_RETENTION_MS); |
262 | | - if (entry != null) { |
263 | | - return Duration.ofMillis(Long.parseLong(entry.value())); |
| 256 | + // In rare cases it may happen that even though topic has been created, its metadata hasn't synced |
| 257 | + // to all the brokers yet and adminClient might query such a broker in which case topic retention will |
| 258 | + // not be present in the topic config. Therefore we retry for a few times before giving up. |
| 259 | + return PollUtils.poll(() -> { |
| 260 | + try { |
| 261 | + ConfigResource topicResource = new ConfigResource(ConfigResource.Type.TOPIC, topicName); |
| 262 | + Map<ConfigResource, Config> topicConfigMap = |
| 263 | + adminClient.describeConfigs(Collections.singletonList(topicResource)).all().get(); |
| 264 | + Config topicConfig = topicConfigMap.get(topicResource); |
| 265 | + ConfigEntry entry = topicConfig.get(TOPIC_RETENTION_MS); |
| 266 | + if (entry != null) { |
| 267 | + LOG.info("Retention time for topic {} is {}", topicName, entry.value()); |
| 268 | + return Duration.ofMillis(Long.parseLong(entry.value())); |
| 269 | + } |
| 270 | + } catch (ExecutionException e) { |
| 271 | + LOG.warn("Failed to retrieve config for topic {}.", topicName, e); |
264 | 272 | } |
265 | | - } catch (InterruptedException | ExecutionException e) { |
266 | | - LOG.warn("Failed to retrieve config for topic {}.", topicName, e); |
267 | | - } |
268 | | - |
269 | | - return null; |
| 273 | + LOG.warn("Failed to retrieve retention time for topic {}", topicName); |
| 274 | + return null; |
| 275 | + }, Objects::nonNull, GET_RETENTION_RETRY_PERIOD_MS, GET_RETENTION_RETRY_TIMEOUT_MS).orElse(null); |
270 | 276 | } |
271 | 277 |
|
272 | 278 | /** |
|
0 commit comments