From 7173494eafdc0ddaaf5da4f29beb10adb888d8c3 Mon Sep 17 00:00:00 2001 From: Fabian Meiswinkel Date: Fri, 29 May 2026 14:39:43 +0000 Subject: [PATCH 01/20] Fixes UnsupportedOperationException when using readManyByPartitionKey for empty pages --- ...nyByPartitionKeyContinuationTokenTest.java | 24 +++++++++ .../UtilsImmutableMapTests.java | 52 +++++++++++++++++++ .../azure/cosmos/implementation/Utils.java | 27 ++++++++++ .../com/azure/cosmos/models/FeedResponse.java | 27 +++++----- 4 files changed, 117 insertions(+), 13 deletions(-) create mode 100644 sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/UtilsImmutableMapTests.java diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ReadManyByPartitionKeyContinuationTokenTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ReadManyByPartitionKeyContinuationTokenTest.java index 86057a353021..7c31a8ea2565 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ReadManyByPartitionKeyContinuationTokenTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ReadManyByPartitionKeyContinuationTokenTest.java @@ -150,6 +150,30 @@ public void setFeedResponseContinuationToken_handlesEmptyHeadersWithoutCopyingNo assertThat(normalResponse.getResponseHeaders()).isSameAs(normalHeaders); } + /** + * Reproduces the customer-reported failure path: the parallel query pipeline's + * "artificial empty page" branch (ParallelDocumentQueryExecutionContext.headerResponse) + * emits a FeedResponse whose header map is {@code Utils.immutableMapOf(...)} - i.e. a + * non-empty {@code Collections.unmodifiableMap} wrapper. When such a page reaches the + * readManyByPartitionKeys stamping lambda, setFeedResponseContinuationToken attempts + * to put the composite token into the immutable map, throwing UnsupportedOperationException. + */ + @Test(groups = { "unit" }) + public void setFeedResponseContinuationToken_immutableNonEmptyHeaders_doesNotThrow() { + Map immutableSingleEntryHeaders = + Utils.immutableMapOf(HttpConstants.HttpHeaders.REQUEST_CHARGE, "1.23"); + FeedResponse response = ModelBridgeInternal.createFeedResponse( + Collections.emptyList(), + immutableSingleEntryHeaders); + + ModelBridgeInternal.setFeedResponseContinuationToken("composite-token", response); + + assertThat(response.getContinuationToken()).isEqualTo("composite-token"); + assertThat(response.getResponseHeaders()) + .containsEntry(HttpConstants.HttpHeaders.CONTINUATION, "composite-token") + .containsEntry(HttpConstants.HttpHeaders.REQUEST_CHARGE, "1.23"); + } + @Test(groups = { "unit" }) public void deserialize_malformedInput_throws() { // Either the base64 decoder or the JSON parsing layer rejects garbage; both raise diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/UtilsImmutableMapTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/UtilsImmutableMapTests.java new file mode 100644 index 000000000000..f0524146dbb9 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/UtilsImmutableMapTests.java @@ -0,0 +1,52 @@ +/* + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. + */ + +package com.azure.cosmos.implementation; + +import org.testng.annotations.Test; + +import java.util.Collections; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.Map; + +import static org.assertj.core.api.Assertions.assertThat; + +/** + * Unit tests for {@link Utils#immutableMapOf(Object, Object)} and the matching detector + * {@link Utils#isImmutableMap(Map)}. These live next to the factory so that any future + * change to the factory's runtime class shape is caught by the same regression suite. + */ +public class UtilsImmutableMapTests { + + @Test(groups = { "unit" }) + public void immutableMapOf_isDetectedAsImmutable() { + Map m = Utils.immutableMapOf("k", "v"); + assertThat(Utils.isImmutableMap(m)).isTrue(); + } + + @Test(groups = { "unit" }) + public void emptyMap_isDetectedAsImmutable() { + assertThat(Utils.isImmutableMap(Collections.emptyMap())).isTrue(); + } + + @Test(groups = { "unit" }) + public void hashMap_isNotDetectedAsImmutable() { + assertThat(Utils.isImmutableMap(new HashMap<>())).isFalse(); + Map populated = new HashMap<>(); + populated.put("k", "v"); + assertThat(Utils.isImmutableMap(populated)).isFalse(); + } + + @Test(groups = { "unit" }) + public void linkedHashMap_isNotDetectedAsImmutable() { + assertThat(Utils.isImmutableMap(new LinkedHashMap<>())).isFalse(); + } + + @Test(groups = { "unit" }) + public void nullMap_isNotDetectedAsImmutable() { + assertThat(Utils.isImmutableMap(null)).isFalse(); + } +} diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Utils.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Utils.java index 0b01e87d985b..1e57ff4bedca 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Utils.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Utils.java @@ -574,6 +574,33 @@ public static List immutableListOf() { return map; } + // Runtime classes produced by the immutable factory methods above. Captured once at + // class-init time so that callers can perform an O(1) reference-equality check to + // decide whether they need a defensive mutable copy, without resorting to + // exception-driven probing on the hot path. + private static final Class UNMODIFIABLE_MAP_CLASS = + Collections.unmodifiableMap(new HashMap<>()).getClass(); + private static final Class EMPTY_MAP_CLASS = Collections.emptyMap().getClass(); + + /** + * Returns {@code true} if {@code map} is one of the immutable map shapes produced by + * the factory methods in this class ({@link #immutableMapOf(Object, Object)}) or by + * {@link Collections#emptyMap()}. The check is a single reference comparison and is + * safe to call from hot paths. + *

+ * Note: this is intentionally narrow - it only recognizes the wrappers the Cosmos + * pipeline actually emits. It does not attempt to recognize every possible JDK or + * third-party immutable map (e.g. {@code Map.of(...)}, Guava {@code ImmutableMap}). + * Add new sentinels here if a new immutable producer is introduced. + */ + public static boolean isImmutableMap(Map map) { + if (map == null) { + return false; + } + Class clazz = map.getClass(); + return clazz == UNMODIFIABLE_MAP_CLASS || clazz == EMPTY_MAP_CLASS; + } + public static V firstOrDefault(List list) { return list.size() > 0? list.get(0) : null ; } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/FeedResponse.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/FeedResponse.java index da8764c95f85..a62720d7970e 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/FeedResponse.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/FeedResponse.java @@ -16,6 +16,7 @@ import com.azure.cosmos.implementation.QueryMetricsConstants; import com.azure.cosmos.implementation.RxDocumentServiceResponse; import com.azure.cosmos.implementation.Strings; +import com.azure.cosmos.implementation.Utils; import com.azure.cosmos.implementation.apachecommons.lang.StringUtils; import com.azure.cosmos.implementation.query.queryadvisor.QueryAdvice; import com.azure.cosmos.implementation.query.QueryInfo; @@ -44,7 +45,7 @@ private static ImplementationBridgeHelpers.CosmosDiagnosticsHelper.CosmosDiagnos private static final Pattern DELIMITER_CHARS_PATTERN = Pattern.compile(Constants.Quota.DELIMITER_CHARS); private final List results; - private final Map header; + private Map header; private final HashMap usageHeaders; private final HashMap quotaHeaders; private final boolean useEtagAsContinuation; @@ -449,19 +450,19 @@ void setContinuationToken(String continuationToken) { } private void setContinuationTokenInternal(String headerName, String continuationToken) { - if (!Strings.isNullOrWhiteSpace(continuationToken)) { + boolean setting = !Strings.isNullOrWhiteSpace(continuationToken); + boolean clearing = !setting && !this.header.isEmpty() && this.header.containsKey(headerName); + if (!setting && !clearing) { + return; + } + + if (Utils.isImmutableMap(this.header)) { + this.header = new HashMap<>(this.header); + } + + if (setting) { this.header.put(headerName, continuationToken); - } else if (!this.header.isEmpty() && this.header.containsKey(headerName)) { - // The query API returns unmodifiable header collections for empty - // responses (no documents returned - when only header set is request charge) - // the protection here to check for existence of the header before attempting - // to remove it would not be robust enough against unknown headers - // but since we only ever call our own query pipeline - // avoiding cloning in all cases and gating on continuation header - // existence is a reasonable trade-off - test coverage exists that uncovered - // the problem - so, this acts as regression test as well - // --> the test coverage is in ItemsPartitionReaderWithReadManyByPartitionKeyITest - // it should "return empty results for non-existent partition keys" + } else { this.header.remove(headerName); } } From a442db365fc0fd4df7d5050b6998d56ea2e83f1f Mon Sep 17 00:00:00 2001 From: Fabian Meiswinkel Date: Fri, 29 May 2026 14:49:43 +0000 Subject: [PATCH 02/20] Updated changelogs --- sdk/cosmos/azure-cosmos-spark_3-3_2-12/CHANGELOG.md | 1 + sdk/cosmos/azure-cosmos-spark_3-4_2-12/CHANGELOG.md | 1 + sdk/cosmos/azure-cosmos-spark_3-5_2-12/CHANGELOG.md | 1 + sdk/cosmos/azure-cosmos-spark_3-5_2-13/CHANGELOG.md | 1 + sdk/cosmos/azure-cosmos-spark_4-0_2-13/CHANGELOG.md | 1 + sdk/cosmos/azure-cosmos-spark_4-1_2-13/CHANGELOG.md | 1 + sdk/cosmos/azure-cosmos/CHANGELOG.md | 1 + 7 files changed, 7 insertions(+) diff --git a/sdk/cosmos/azure-cosmos-spark_3-3_2-12/CHANGELOG.md b/sdk/cosmos/azure-cosmos-spark_3-3_2-12/CHANGELOG.md index 5045991f3542..acaee40c6cd9 100644 --- a/sdk/cosmos/azure-cosmos-spark_3-3_2-12/CHANGELOG.md +++ b/sdk/cosmos/azure-cosmos-spark_3-3_2-12/CHANGELOG.md @@ -8,6 +8,7 @@ #### Bugs Fixed * Improved partition planning performance for change feed with large number of feed ranges. - See [PR 49086](https://github.com/Azure/azure-sdk-for-java/pull/49086) +* Fixed `UnsupportedOperationException` when using `readManyByPartitionKeys` for empty pages. - See [PR 49311](https://github.com/Azure/azure-sdk-for-java/pull/49311) #### Other Changes diff --git a/sdk/cosmos/azure-cosmos-spark_3-4_2-12/CHANGELOG.md b/sdk/cosmos/azure-cosmos-spark_3-4_2-12/CHANGELOG.md index 777177708547..09b19a4119c5 100644 --- a/sdk/cosmos/azure-cosmos-spark_3-4_2-12/CHANGELOG.md +++ b/sdk/cosmos/azure-cosmos-spark_3-4_2-12/CHANGELOG.md @@ -8,6 +8,7 @@ #### Bugs Fixed * Improved partition planning performance for change feed with large number of feed ranges. - See [PR 49086](https://github.com/Azure/azure-sdk-for-java/pull/49086) +* Fixed `UnsupportedOperationException` when using `readManyByPartitionKeys` for empty pages. - See [PR 49311](https://github.com/Azure/azure-sdk-for-java/pull/49311) #### Other Changes diff --git a/sdk/cosmos/azure-cosmos-spark_3-5_2-12/CHANGELOG.md b/sdk/cosmos/azure-cosmos-spark_3-5_2-12/CHANGELOG.md index 1360614308a3..a36a8521c71b 100644 --- a/sdk/cosmos/azure-cosmos-spark_3-5_2-12/CHANGELOG.md +++ b/sdk/cosmos/azure-cosmos-spark_3-5_2-12/CHANGELOG.md @@ -8,6 +8,7 @@ #### Bugs Fixed * Improved partition planning performance for change feed with large number of feed ranges. - See [PR 49086](https://github.com/Azure/azure-sdk-for-java/pull/49086) +* Fixed `UnsupportedOperationException` when using `readManyByPartitionKeys` for empty pages. - See [PR 49311](https://github.com/Azure/azure-sdk-for-java/pull/49311) #### Other Changes diff --git a/sdk/cosmos/azure-cosmos-spark_3-5_2-13/CHANGELOG.md b/sdk/cosmos/azure-cosmos-spark_3-5_2-13/CHANGELOG.md index 6674b6f8bb74..5d46eb30dde5 100644 --- a/sdk/cosmos/azure-cosmos-spark_3-5_2-13/CHANGELOG.md +++ b/sdk/cosmos/azure-cosmos-spark_3-5_2-13/CHANGELOG.md @@ -8,6 +8,7 @@ #### Bugs Fixed * Improved partition planning performance for change feed with large number of feed ranges. - See [PR 49086](https://github.com/Azure/azure-sdk-for-java/pull/49086) +* Fixed `UnsupportedOperationException` when using `readManyByPartitionKeys` for empty pages. - See [PR 49311](https://github.com/Azure/azure-sdk-for-java/pull/49311) #### Other Changes diff --git a/sdk/cosmos/azure-cosmos-spark_4-0_2-13/CHANGELOG.md b/sdk/cosmos/azure-cosmos-spark_4-0_2-13/CHANGELOG.md index 9b87a3fcf675..9b971d9786c0 100644 --- a/sdk/cosmos/azure-cosmos-spark_4-0_2-13/CHANGELOG.md +++ b/sdk/cosmos/azure-cosmos-spark_4-0_2-13/CHANGELOG.md @@ -8,6 +8,7 @@ #### Bugs Fixed * Improved partition planning performance for change feed with large number of feed ranges. - See [PR 49086](https://github.com/Azure/azure-sdk-for-java/pull/49086) +* Fixed `UnsupportedOperationException` when using `readManyByPartitionKeys` for empty pages. - See [PR 49311](https://github.com/Azure/azure-sdk-for-java/pull/49311) #### Other Changes diff --git a/sdk/cosmos/azure-cosmos-spark_4-1_2-13/CHANGELOG.md b/sdk/cosmos/azure-cosmos-spark_4-1_2-13/CHANGELOG.md index 570aec149b2c..b3c35541850b 100644 --- a/sdk/cosmos/azure-cosmos-spark_4-1_2-13/CHANGELOG.md +++ b/sdk/cosmos/azure-cosmos-spark_4-1_2-13/CHANGELOG.md @@ -8,6 +8,7 @@ #### Bugs Fixed * Improved partition planning performance for change feed with large number of feed ranges. - See [PR 49086](https://github.com/Azure/azure-sdk-for-java/pull/49086) +* Fixed `UnsupportedOperationException` when using `readManyByPartitionKeys` for empty pages. - See [PR 49311](https://github.com/Azure/azure-sdk-for-java/pull/49311) #### Other Changes diff --git a/sdk/cosmos/azure-cosmos/CHANGELOG.md b/sdk/cosmos/azure-cosmos/CHANGELOG.md index 6a5ba9e04600..e1efa993fa73 100644 --- a/sdk/cosmos/azure-cosmos/CHANGELOG.md +++ b/sdk/cosmos/azure-cosmos/CHANGELOG.md @@ -8,6 +8,7 @@ #### Breaking Changes #### Bugs Fixed +* Fixed `UnsupportedOperationException` when using `readManyByPartitionKeys` for empty pages. - See [PR 49311](https://github.com/Azure/azure-sdk-for-java/pull/49311) #### Other Changes * Replaced per-client `Schedulers.newSingle()` schedulers in `GlobalEndpointManager` and `GlobalPartitionEndpointManagerForPerPartitionCircuitBreaker` with shared `BoundedElastic` schedulers in `CosmosSchedulers` to prevent thread count from scaling linearly with client/tenant count. - See [PR 49062](https://github.com/Azure/azure-sdk-for-java/pull/49062) From 364d93156e846e440ff56b28968224f17a183770 Mon Sep 17 00:00:00 2001 From: Annie Liang Date: Fri, 29 May 2026 10:22:19 -0700 Subject: [PATCH 03/20] Update spark.yml JarStorageAccountName for ephemeral tenant rotation 202605 Update JarStorageAccountName from oltpsparkcijarstore0326 to oltpsparkcijarstore0529 to point to the new storage account provisioned in the current ephemeral tenant rotation. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- sdk/cosmos/spark.yml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/sdk/cosmos/spark.yml b/sdk/cosmos/spark.yml index 91e8bfea69c6..05e5bd58232b 100644 --- a/sdk/cosmos/spark.yml +++ b/sdk/cosmos/spark.yml @@ -28,7 +28,7 @@ stages: JarStorageAccountKey: $(spark-databricks-cosmos-spn-clientIdCert) JarReadOnlySasUri: $(spark-databricks-cosmos-spn-clientCertBase64) JarReadOnlySasUriIndex: 0 - JarStorageAccountName: 'oltpsparkcijarstore0326' + JarStorageAccountName: 'oltpsparkcijarstore0529' JarName: 'azure-cosmos-spark_3-5_2-12-latest-ci-candidate.jar' - template: /sdk/cosmos/spark.databricks.yml parameters: @@ -50,7 +50,7 @@ stages: JarStorageAccountKey: $(spark-databricks-cosmos-spn-clientIdCert) JarReadOnlySasUri: $(spark-databricks-cosmos-spn-clientCertBase64) JarReadOnlySasUriIndex: 0 - JarStorageAccountName: 'oltpsparkcijarstore0326' + JarStorageAccountName: 'oltpsparkcijarstore0529' JarName: 'azure-cosmos-spark_3-5_2-12-latest-ci-candidate.jar' - template: /sdk/cosmos/spark.databricks.yml parameters: @@ -73,7 +73,7 @@ stages: JarStorageAccountKey: $(spark-databricks-cosmos-spn-clientIdCert) JarReadOnlySasUri: $(spark-databricks-cosmos-spn-clientCertBase64) JarReadOnlySasUriIndex: 0 - JarStorageAccountName: 'oltpsparkcijarstore0326' + JarStorageAccountName: 'oltpsparkcijarstore0529' JarName: 'azure-cosmos-spark_3-5_2-12-latest-ci-candidate.jar' - template: /sdk/cosmos/spark.databricks.yml parameters: @@ -96,7 +96,7 @@ stages: JarStorageAccountKey: $(spark-databricks-cosmos-spn-clientIdCert) JarReadOnlySasUri: $(spark-databricks-cosmos-spn-clientCertBase64) JarReadOnlySasUriIndex: 0 - JarStorageAccountName: 'oltpsparkcijarstore0326' + JarStorageAccountName: 'oltpsparkcijarstore0529' JarName: 'azure-cosmos-spark_3-5_2-12-latest-ci-candidate.jar' - template: /sdk/cosmos/spark.databricks.yml parameters: @@ -119,7 +119,7 @@ stages: JarStorageAccountKey: $(spark-databricks-cosmos-spn-clientIdCert) JarReadOnlySasUri: $(spark-databricks-cosmos-spn-clientCertBase64) JarReadOnlySasUriIndex: 1 - JarStorageAccountName: 'oltpsparkcijarstore0326' + JarStorageAccountName: 'oltpsparkcijarstore0529' JarName: 'azure-cosmos-spark_3-5_2-13-latest-ci-candidate.jar' - template: /sdk/cosmos/spark.databricks.yml parameters: @@ -142,7 +142,7 @@ stages: JarStorageAccountKey: $(spark-databricks-cosmos-spn-clientIdCert) JarReadOnlySasUri: $(spark-databricks-cosmos-spn-clientCertBase64) JarReadOnlySasUriIndex: 2 - JarStorageAccountName: 'oltpsparkcijarstore0326' + JarStorageAccountName: 'oltpsparkcijarstore0529' JarName: 'azure-cosmos-spark_4-0_2-13-latest-ci-candidate.jar' - template: /sdk/cosmos/spark.databricks.yml parameters: @@ -165,5 +165,5 @@ stages: JarStorageAccountKey: $(spark-databricks-cosmos-spn-clientIdCert) JarReadOnlySasUri: $(spark-databricks-cosmos-spn-clientCertBase64) JarReadOnlySasUriIndex: 3 - JarStorageAccountName: 'oltpsparkcijarstore0326' + JarStorageAccountName: 'oltpsparkcijarstore0529' JarName: 'azure-cosmos-spark_4-1_2-13-latest-ci-candidate.jar' From 671d37d7ec0bee8ffefb839a90ddd8b9d1d0d1d2 Mon Sep 17 00:00:00 2001 From: Fabian Meiswinkel Date: Sun, 31 May 2026 23:38:12 +0000 Subject: [PATCH 04/20] Keeping FeedResponse.header final --- ...nyByPartitionKeyContinuationTokenTest.java | 9 ++-- .../com/azure/cosmos/models/FeedResponse.java | 42 ++++++++----------- 2 files changed, 23 insertions(+), 28 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ReadManyByPartitionKeyContinuationTokenTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ReadManyByPartitionKeyContinuationTokenTest.java index 7c31a8ea2565..8afec5905d11 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ReadManyByPartitionKeyContinuationTokenTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ReadManyByPartitionKeyContinuationTokenTest.java @@ -127,17 +127,20 @@ public void roundtrip_lastBatchNoContinuation() { @Test(groups = { "unit" }) public void setFeedResponseContinuationToken_handlesEmptyHeadersWithoutCopyingNormalCase() { - Map immutableEmptyHeaders = Collections.emptyMap(); + // Immutable inputs are normalized to a mutable map at FeedResponse construction + // time (so the field stays final). Clearing a continuation token on an empty + // header map is a no-op and must not throw. FeedResponse emptyResponse = ModelBridgeInternal.createFeedResponse( Collections.emptyList(), - immutableEmptyHeaders); + Collections.emptyMap()); ModelBridgeInternal.setFeedResponseContinuationToken(null, emptyResponse); assertThat(emptyResponse.getContinuationToken()).isNull(); - assertThat(emptyResponse.getResponseHeaders()).isSameAs(immutableEmptyHeaders); assertThat(emptyResponse.getResponseHeaders()).isEmpty(); + // Mutable header maps are passed through without copying, preserving the + // reference returned by getResponseHeaders(). Map normalHeaders = new HashMap<>(); normalHeaders.put(HttpConstants.HttpHeaders.ACTIVITY_ID, "test-activity-id"); FeedResponse normalResponse = ModelBridgeInternal.createFeedResponse( diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/FeedResponse.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/FeedResponse.java index a62720d7970e..52792049d325 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/FeedResponse.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/FeedResponse.java @@ -45,7 +45,7 @@ private static ImplementationBridgeHelpers.CosmosDiagnosticsHelper.CosmosDiagnos private static final Pattern DELIMITER_CHARS_PATTERN = Pattern.compile(Constants.Quota.DELIMITER_CHARS); private final List results; - private Map header; + private final Map header; private final HashMap usageHeaders; private final HashMap quotaHeaders; private final boolean useEtagAsContinuation; @@ -56,19 +56,21 @@ private static ImplementationBridgeHelpers.CosmosDiagnosticsHelper.CosmosDiagnos private QueryInfo queryInfo; private QueryInfo.QueryPlanDiagnosticsContext queryPlanDiagnosticsContext; - // All header maps are produced by the SDK's own query pipeline. Non-null maps - // are always mutable (HashMap or ConcurrentHashMap) - the SDK intentionally - // allows callers to add/modify headers on FeedResponse. The only known - // exception is empty-page responses where the query pipeline may pass null. - // We do NOT clone non-null maps here to avoid unnecessary allocations on every - // FeedResponse construction - the wider blast radius of cloning (every query, - // change feed, readMany response) is not justified by the narrow null case. - // If a future code path introduces an immutable non-null header map, the - // setContinuationTokenInternal method will fail fast with - // UnsupportedOperationException, and the fix should be to make the upstream - // pipeline emit a mutable map rather than adding defensive cloning here. + // The header map stored on FeedResponse must be mutable: downstream stages + // (e.g. the readManyByPartitionKeys stamping lambda) may add or replace + // headers in place. Normalize at construction time so the field can stay + // final and getResponseHeaders() consistently returns the same instance. + // Mutable inputs are passed through without copying; null and the known + // immutable shapes produced by Utils.immutableMapOf / Collections.emptyMap + // are replaced with a fresh HashMap (preserving entries). private static Map ensureMutableHeadersMap(Map headers) { - return headers == null ? new HashMap<>() : headers; + if (headers == null) { + return new HashMap<>(); + } + if (Utils.isImmutableMap(headers)) { + return new HashMap<>(headers); + } + return headers; } FeedResponse(List results, Map headers) { @@ -450,19 +452,9 @@ void setContinuationToken(String continuationToken) { } private void setContinuationTokenInternal(String headerName, String continuationToken) { - boolean setting = !Strings.isNullOrWhiteSpace(continuationToken); - boolean clearing = !setting && !this.header.isEmpty() && this.header.containsKey(headerName); - if (!setting && !clearing) { - return; - } - - if (Utils.isImmutableMap(this.header)) { - this.header = new HashMap<>(this.header); - } - - if (setting) { + if (!Strings.isNullOrWhiteSpace(continuationToken)) { this.header.put(headerName, continuationToken); - } else { + } else if (!this.header.isEmpty() && this.header.containsKey(headerName)) { this.header.remove(headerName); } } From 03be254089c598036a1fe3c1add3bd794c7d8b3a Mon Sep 17 00:00:00 2001 From: Fabian Meiswinkel Date: Mon, 1 Jun 2026 09:18:07 +0000 Subject: [PATCH 05/20] Unifying 449 to be retried client-side across Gateway modes --- ...njectionServerErrorRuleOnGatewayTests.java | 2 +- ...ectionServerErrorRuleOnGatewayV2Tests.java | 2 +- .../GatewayRetryWithRetryPolicyTest.java | 93 +++++++++++++++++ .../RxGatewayStoreModelTest.java | 91 +++++++++++++++++ .../GoneAndRetryWithRetryPolicyTest.java | 4 +- .../GatewayRetryWithRetryPolicy.java | 58 +++++++++++ .../cosmos/implementation/HttpConstants.java | 1 + .../implementation/RetryWithRetryPolicy.java | 99 +++++++++++++++++++ .../implementation/RxGatewayStoreModel.java | 30 +++++- .../implementation/ThinClientStoreModel.java | 5 + .../GoneAndRetryWithRetryPolicy.java | 90 ++--------------- 11 files changed, 387 insertions(+), 88 deletions(-) create mode 100644 sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/GatewayRetryWithRetryPolicyTest.java create mode 100644 sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GatewayRetryWithRetryPolicy.java create mode 100644 sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RetryWithRetryPolicy.java diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/faultinjection/FaultInjectionServerErrorRuleOnGatewayTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/faultinjection/FaultInjectionServerErrorRuleOnGatewayTests.java index f2636f313f83..6d23fe732342 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/faultinjection/FaultInjectionServerErrorRuleOnGatewayTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/faultinjection/FaultInjectionServerErrorRuleOnGatewayTests.java @@ -123,7 +123,7 @@ public static Object[][] faultInjectionServerErrorResponseProvider() { return new Object[][]{ // faultInjectionServerError, will SDK retry, check for multi-region setup, errorStatusCode, errorSubStatusCode { FaultInjectionServerErrorType.INTERNAL_SERVER_ERROR, false, false, 500, 0 }, - { FaultInjectionServerErrorType.RETRY_WITH, false, false, 449, 0 }, + { FaultInjectionServerErrorType.RETRY_WITH, true, false, 449, 0 }, { FaultInjectionServerErrorType.TOO_MANY_REQUEST, true, false, 429, HttpConstants.SubStatusCodes.USER_REQUEST_RATE_TOO_LARGE }, { FaultInjectionServerErrorType.READ_SESSION_NOT_AVAILABLE, true, false, 404, 1002 }, { FaultInjectionServerErrorType.SERVICE_UNAVAILABLE, true, true, 503, 21008 } diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/faultinjection/FaultInjectionServerErrorRuleOnGatewayV2Tests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/faultinjection/FaultInjectionServerErrorRuleOnGatewayV2Tests.java index b018161dcd0f..53c373d1a101 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/faultinjection/FaultInjectionServerErrorRuleOnGatewayV2Tests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/faultinjection/FaultInjectionServerErrorRuleOnGatewayV2Tests.java @@ -116,7 +116,7 @@ public static Object[][] faultInjectionServerErrorResponseProvider() { return new Object[][]{ // faultInjectionServerError, will SDK retry, check for multi-region setup, errorStatusCode, errorSubStatusCode { FaultInjectionServerErrorType.INTERNAL_SERVER_ERROR, false, false, 500, 0 }, - { FaultInjectionServerErrorType.RETRY_WITH, false, false, 449, 0 }, + { FaultInjectionServerErrorType.RETRY_WITH, true, false, 449, 0 }, { FaultInjectionServerErrorType.TOO_MANY_REQUEST, true, false, 429, HttpConstants.SubStatusCodes.USER_REQUEST_RATE_TOO_LARGE }, { FaultInjectionServerErrorType.READ_SESSION_NOT_AVAILABLE, true, false, 404, 1002 }, { FaultInjectionServerErrorType.SERVICE_UNAVAILABLE, true, true, 503, 21008 } diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/GatewayRetryWithRetryPolicyTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/GatewayRetryWithRetryPolicyTest.java new file mode 100644 index 000000000000..a446a22b9af4 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/GatewayRetryWithRetryPolicyTest.java @@ -0,0 +1,93 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.cosmos.implementation; + +import com.azure.cosmos.BridgeInternal; +import com.azure.cosmos.CosmosException; +import com.azure.cosmos.implementation.routing.RegionalRoutingContext; +import org.mockito.Mockito; +import org.testng.annotations.Test; + +import java.net.URI; + +import static com.azure.cosmos.implementation.TestUtils.mockDiagnosticsClientContext; +import static org.assertj.core.api.Assertions.assertThat; + +public class GatewayRetryWithRetryPolicyTest { + private static final int TIMEOUT = 60000; + + @Test(groups = { "unit" }, timeOut = TIMEOUT) + public void shouldRetryGenericRetryWithException() throws Exception { + GatewayRetryWithRetryPolicy retryPolicy = new GatewayRetryWithRetryPolicy( + createRequest(), + Mockito.mock(GlobalEndpointManager.class), + 30); + + CosmosException retryWithException = BridgeInternal.createCosmosException(HttpConstants.StatusCodes.RETRY_WITH); + + ShouldRetryResult shouldRetryResult = retryPolicy.shouldRetry(retryWithException).block(); + assertThat(shouldRetryResult.shouldRetry).isTrue(); + assertThat(shouldRetryResult.nonRelatedException).isFalse(); + assertThat(shouldRetryResult.policyArg.getValue0()).isFalse(); + assertThat(shouldRetryResult.policyArg.getValue1()).isTrue(); + assertThat(shouldRetryResult.policyArg.getValue3()).isEqualTo(1); + validateRetryWithTimeRange(10, shouldRetryResult, 5); + + shouldRetryResult = retryPolicy.shouldRetry(retryWithException).block(); + assertThat(shouldRetryResult.shouldRetry).isTrue(); + assertThat(shouldRetryResult.policyArg.getValue3()).isEqualTo(2); + validateRetryWithTimeRange(20, shouldRetryResult, 5); + } + + @Test(groups = { "unit" }, timeOut = TIMEOUT) + public void shouldNotRetryGoneException() throws Exception { + GatewayRetryWithRetryPolicy retryPolicy = new GatewayRetryWithRetryPolicy( + createRequest(), + Mockito.mock(GlobalEndpointManager.class), + 30); + + CosmosException goneException = BridgeInternal.createCosmosException(HttpConstants.StatusCodes.GONE); + + ShouldRetryResult shouldRetryResult = retryPolicy.shouldRetry(goneException).block(); + assertThat(shouldRetryResult.shouldRetry).isFalse(); + assertThat(shouldRetryResult.nonRelatedException).isTrue(); + assertThat(shouldRetryResult.exception).isSameAs(goneException); + } + + @Test(groups = { "unit" }, timeOut = TIMEOUT) + public void shouldStopRetryingRetryWithAfterTimeout() throws Exception { + GatewayRetryWithRetryPolicy retryPolicy = new GatewayRetryWithRetryPolicy( + createRequest(), + Mockito.mock(GlobalEndpointManager.class), + 0); + + CosmosException retryWithException = BridgeInternal.createCosmosException(HttpConstants.StatusCodes.RETRY_WITH); + + ShouldRetryResult shouldRetryResult = retryPolicy.shouldRetry(retryWithException).block(); + assertThat(shouldRetryResult.shouldRetry).isFalse(); + assertThat(shouldRetryResult.exception).isSameAs(retryWithException); + assertThat(shouldRetryResult.nonRelatedException).isFalse(); + } + + private static RxDocumentServiceRequest createRequest() throws Exception { + DiagnosticsClientContext clientContext = mockDiagnosticsClientContext(); + RxDocumentServiceRequest request = RxDocumentServiceRequest.create( + clientContext, + OperationType.Read, + ResourceType.Document); + + request.requestContext.cosmosDiagnostics = clientContext.createDiagnostics(); + request.requestContext.regionalRoutingContextToRoute = new RegionalRoutingContext(new URI("https://localhost")); + return request; + } + + private static void validateRetryWithTimeRange( + int expectedDelayInMs, + ShouldRetryResult retryResult, + int saltValueInMs) { + + assertThat(retryResult.backOffTime.toMillis()).isGreaterThan(expectedDelayInMs - saltValueInMs); + assertThat(retryResult.backOffTime.toMillis()).isLessThan(expectedDelayInMs + saltValueInMs); + } +} \ No newline at end of file diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxGatewayStoreModelTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxGatewayStoreModelTest.java index edf16806489c..62004bee9fa7 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxGatewayStoreModelTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxGatewayStoreModelTest.java @@ -596,6 +596,97 @@ public void nullAdditionalHeadersDoesNotAffectPerformRequest() throws Exception assertThat(headers.toMap().get(HttpConstants.HttpHeaders.WORKLOAD_ID)).isNull(); } + @Test(groups = "unit") + public void gatewayAddsNoRetry449Header() throws Exception { + DiagnosticsClientContext clientContext = mockDiagnosticsClientContext(); + ISessionContainer sessionContainer = Mockito.mock(ISessionContainer.class); + GlobalEndpointManager globalEndpointManager = Mockito.mock(GlobalEndpointManager.class); + + Mockito.doReturn(new RegionalRoutingContext(new URI("https://localhost"))) + .when(globalEndpointManager).resolveServiceEndpoint(any()); + + HttpClient httpClient = Mockito.mock(HttpClient.class); + ArgumentCaptor httpClientRequestCaptor = ArgumentCaptor.forClass(HttpRequest.class); + Mockito.when(httpClient.send(any(), any())).thenReturn(Mono.error(new ConnectTimeoutException())); + + RxGatewayStoreModel storeModel = new RxGatewayStoreModel( + clientContext, + sessionContainer, + ConsistencyLevel.SESSION, + QueryCompatibilityMode.Default, + new UserAgentContainer(), + globalEndpointManager, + httpClient, + null, + null); + + RxDocumentServiceRequest request = RxDocumentServiceRequest.createFromName( + clientContext, + OperationType.Read, + "/dbs/db/colls/col/docs/doc1", + ResourceType.Document); + request.requestContext = new DocumentServiceRequestContext(); + request.requestContext.regionalRoutingContextToRoute = new RegionalRoutingContext(new URI("https://localhost")); + + try { + storeModel.performRequest(request).block(); + fail("Request should fail"); + } catch (Exception expectedException) { + // expected + } + + Mockito.verify(httpClient).send(httpClientRequestCaptor.capture(), any()); + HttpHeaders headers = ReflectionUtils.getHttpHeaders(httpClientRequestCaptor.getValue()); + assertThat(headers.toMap().get(HttpConstants.HttpHeaders.NO_RETRY_449)).isEqualTo("true"); + } + + @Test(groups = "unit") + public void gatewayOverridesNoRetry449Header() throws Exception { + DiagnosticsClientContext clientContext = mockDiagnosticsClientContext(); + ISessionContainer sessionContainer = Mockito.mock(ISessionContainer.class); + GlobalEndpointManager globalEndpointManager = Mockito.mock(GlobalEndpointManager.class); + + Mockito.doReturn(new RegionalRoutingContext(new URI("https://localhost"))) + .when(globalEndpointManager).resolveServiceEndpoint(any()); + + HttpClient httpClient = Mockito.mock(HttpClient.class); + ArgumentCaptor httpClientRequestCaptor = ArgumentCaptor.forClass(HttpRequest.class); + Mockito.when(httpClient.send(any(), any())).thenReturn(Mono.error(new ConnectTimeoutException())); + + Map additionalHeaders = new HashMap<>(); + additionalHeaders.put(HttpConstants.HttpHeaders.NO_RETRY_449, "false"); + RxGatewayStoreModel storeModel = new RxGatewayStoreModel( + clientContext, + sessionContainer, + ConsistencyLevel.SESSION, + QueryCompatibilityMode.Default, + new UserAgentContainer(), + globalEndpointManager, + httpClient, + null, + additionalHeaders); + + RxDocumentServiceRequest request = RxDocumentServiceRequest.createFromName( + clientContext, + OperationType.Read, + "/dbs/db/colls/col/docs/doc1", + ResourceType.Document); + request.requestContext = new DocumentServiceRequestContext(); + request.requestContext.regionalRoutingContextToRoute = new RegionalRoutingContext(new URI("https://localhost")); + request.getHeaders().put(HttpConstants.HttpHeaders.NO_RETRY_449, "false"); + + try { + storeModel.performRequest(request).block(); + fail("Request should fail"); + } catch (Exception expectedException) { + // expected + } + + Mockito.verify(httpClient).send(httpClientRequestCaptor.capture(), any()); + HttpHeaders headers = ReflectionUtils.getHttpHeaders(httpClientRequestCaptor.getValue()); + assertThat(headers.toMap().get(HttpConstants.HttpHeaders.NO_RETRY_449)).isEqualTo("true"); + } + enum SessionTokenType { NONE, // no session token applied USER, // userControlled session token diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/GoneAndRetryWithRetryPolicyTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/GoneAndRetryWithRetryPolicyTest.java index 7c5c9be002bd..213a71ebff59 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/GoneAndRetryWithRetryPolicyTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/GoneAndRetryWithRetryPolicyTest.java @@ -19,8 +19,6 @@ import com.azure.cosmos.implementation.RxDocumentServiceRequest; import com.azure.cosmos.implementation.ShouldRetryResult; import com.azure.cosmos.implementation.guava25.base.Supplier; -import com.azure.cosmos.implementation.RetryWithException; -import org.mockito.Mockito; import org.testng.annotations.Test; import reactor.core.publisher.Mono; @@ -356,7 +354,7 @@ public void retryWithDefaultTimeouts() { ResourceType.Document); GoneAndRetryWithRetryPolicy goneAndRetryWithRetryPolicy = new GoneAndRetryWithRetryPolicy(request, 30); - RetryWithException retryWithException = Mockito.mock(RetryWithException.class); + RetryWithException retryWithException = new RetryWithException("Test", null, null); Mono singleShouldRetry = goneAndRetryWithRetryPolicy.shouldRetry(retryWithException); ShouldRetryResult shouldRetryResult = singleShouldRetry.block(); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GatewayRetryWithRetryPolicy.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GatewayRetryWithRetryPolicy.java new file mode 100644 index 000000000000..8e83dba5d9f0 --- /dev/null +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GatewayRetryWithRetryPolicy.java @@ -0,0 +1,58 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.cosmos.implementation; + +import com.azure.cosmos.BridgeInternal; +import reactor.core.publisher.Mono; + +import java.time.Duration; +import java.time.Instant; + +public class GatewayRetryWithRetryPolicy implements IRetryPolicy { + private final RetryWithRetryPolicy retryWithRetryPolicy; + private final MetadataRequestRetryPolicy metadataRequestRetryPolicy; + private final Instant start; + + public GatewayRetryWithRetryPolicy( + RxDocumentServiceRequest request, + GlobalEndpointManager globalEndpointManager, + Integer waitTimeInSeconds) { + + this.start = Instant.now(); + RetryContext retryContext = BridgeInternal.getRetryContext(request.requestContext.cosmosDiagnostics); + this.retryWithRetryPolicy = new RetryWithRetryPolicy( + waitTimeInSeconds, + retryContext, + () -> Duration.between(this.start, Instant.now()).toMillis(), + null); + this.metadataRequestRetryPolicy = new MetadataRequestRetryPolicy(globalEndpointManager); + this.metadataRequestRetryPolicy.onBeforeSendRequest(request); + } + + @Override + public Mono shouldRetry(Exception exception) { + return this.retryWithRetryPolicy.shouldRetry(exception).flatMap(retryWithResult -> { + if (!retryWithResult.nonRelatedException) { + return Mono.just(retryWithResult); + } + + return this.metadataRequestRetryPolicy.shouldRetry(exception).map(metadataRequestRetryResult -> { + if (metadataRequestRetryResult.shouldRetry || metadataRequestRetryResult.nonRelatedException) { + return metadataRequestRetryResult; + } + + if (metadataRequestRetryResult.exception != null) { + return ShouldRetryResult.errorOnNonRelatedException(metadataRequestRetryResult.exception); + } + + return ShouldRetryResult.noRetryOnNonRelatedException(); + }); + }); + } + + @Override + public RetryContext getRetryContext() { + return this.retryWithRetryPolicy.getRetryContext(); + } +} \ No newline at end of file diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/HttpConstants.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/HttpConstants.java index 859acf9a748f..570bdb010f25 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/HttpConstants.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/HttpConstants.java @@ -119,6 +119,7 @@ public static class HttpHeaders { public static final String REQUEST_VALIDATION_FAILURE = "x-ms-request-validation-failure"; public static final String WRITE_REQUEST_TRIGGER_ADDRESS_REFRESH = "x-ms-write-request-trigger-refresh"; + public static final String NO_RETRY_449 = "x-ms-noretry-449"; // Quota Info public static final String MAX_RESOURCE_QUOTA = "x-ms-resource-quota"; diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RetryWithRetryPolicy.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RetryWithRetryPolicy.java new file mode 100644 index 000000000000..079985b48cda --- /dev/null +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RetryWithRetryPolicy.java @@ -0,0 +1,99 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.cosmos.implementation; + +import com.azure.cosmos.CosmosException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import reactor.core.publisher.Mono; + +import java.time.Duration; +import java.util.concurrent.ThreadLocalRandom; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.Consumer; +import java.util.function.LongSupplier; + +public class RetryWithRetryPolicy implements IRetryPolicy { + private final static Logger logger = LoggerFactory.getLogger(RetryWithRetryPolicy.class); + private final static int DEFAULT_WAIT_TIME_IN_SECONDS = 30; + private final static int MAXIMUM_BACKOFF_TIME_IN_MS = 1000; + private final static int INITIAL_BACKOFF_TIME_MS = 10; + private final static int BACK_OFF_MULTIPLIER = 2; + private final static int RANDOM_SALT_IN_MS = 5; + + private final AtomicInteger attemptCount = new AtomicInteger(1); + private final AtomicInteger currentBackoffMilliseconds = + new AtomicInteger(RetryWithRetryPolicy.INITIAL_BACKOFF_TIME_MS); + private final int waitTimeInSeconds; + private final RetryContext retryContext; + private final LongSupplier elapsedTimeInMillisSupplier; + private final Consumer retryWithExceptionConsumer; + + public RetryWithRetryPolicy( + Integer waitTimeInSeconds, + RetryContext retryContext, + LongSupplier elapsedTimeInMillisSupplier, + Consumer retryWithExceptionConsumer) { + + this.waitTimeInSeconds = waitTimeInSeconds != null ? waitTimeInSeconds : DEFAULT_WAIT_TIME_IN_SECONDS; + this.retryContext = retryContext; + this.elapsedTimeInMillisSupplier = elapsedTimeInMillisSupplier; + this.retryWithExceptionConsumer = retryWithExceptionConsumer; + } + + @Override + public Mono shouldRetry(Exception exception) { + CosmosException cosmosException = Utils.as(exception, CosmosException.class); + if (cosmosException == null || cosmosException.getStatusCode() != HttpConstants.StatusCodes.RETRY_WITH) { + logger.debug("Operation will NOT be retried. Current attempt {}, Exception: ", this.attemptCount.get(), + exception); + return Mono.just(ShouldRetryResult.noRetryOnNonRelatedException()); + } + + if (this.retryWithExceptionConsumer != null) { + this.retryWithExceptionConsumer.accept(cosmosException); + } + + long remainingMilliseconds = + (this.waitTimeInSeconds * 1_000L) - this.elapsedTimeInMillisSupplier.getAsLong(); + int currentRetryAttemptCount = this.attemptCount.getAndIncrement(); + + if (remainingMilliseconds <= 0) { + logger.warn("Received RetryWith response after backoff/retry. Will fail the request.", cosmosException); + return Mono.just(ShouldRetryResult.error(cosmosException)); + } + + Duration backoffTime = Duration.ofMillis( + Math.min( + Math.min( + this.currentBackoffMilliseconds.get() + + ThreadLocalRandom.current().nextInt(RetryWithRetryPolicy.RANDOM_SALT_IN_MS), + remainingMilliseconds), + RetryWithRetryPolicy.MAXIMUM_BACKOFF_TIME_IN_MS)); + + this.currentBackoffMilliseconds.set( + Math.max( + RetryWithRetryPolicy.INITIAL_BACKOFF_TIME_MS, + Math.min( + RetryWithRetryPolicy.MAXIMUM_BACKOFF_TIME_IN_MS, + this.currentBackoffMilliseconds.get() * RetryWithRetryPolicy.BACK_OFF_MULTIPLIER)) + ); + + logger.debug("BackoffTime: {} ms.", backoffTime.toMillis()); + + long timeoutInMillSec = remainingMilliseconds - backoffTime.toMillis(); + Duration timeout = timeoutInMillSec > 0 ? Duration.ofMillis(timeoutInMillSec) + : Duration.ofMillis(RetryWithRetryPolicy.MAXIMUM_BACKOFF_TIME_IN_MS); + + logger.debug("Received RetryWith response, will retry, ", exception); + + return Mono.just(ShouldRetryResult.retryAfter(backoffTime, + Quadruple.with(false, true, timeout, currentRetryAttemptCount))); + } + + @Override + public RetryContext getRetryContext() { + return this.retryContext; + } +} \ No newline at end of file diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxGatewayStoreModel.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxGatewayStoreModel.java index 4afa44571ba0..24f27d42e18c 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxGatewayStoreModel.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxGatewayStoreModel.java @@ -73,6 +73,8 @@ private static ImplementationBridgeHelpers.CosmosExceptionHelper.CosmosException private static final boolean leakDetectionDebuggingEnabled = ResourceLeakDetector.getLevel().ordinal() >= ResourceLeakDetector.Level.ADVANCED.ordinal(); private static final boolean HTTP_CONNECTION_WITHOUT_TLS_ALLOWED = Configs.isHttpConnectionWithoutTLSAllowed(); + private static final int GATEWAY_RETRY_WITH_TIMEOUT_IN_SECONDS = 30; + private static final int STRONG_GATEWAY_RETRY_WITH_TIMEOUT_IN_SECONDS = 60; private static final List headersNeedToBeEscaped = Arrays.asList( HttpConstants.HttpHeaders.PARTITION_KEY, HttpConstants.HttpHeaders.POST_TRIGGER_EXCLUDE, @@ -298,6 +300,8 @@ public Mono performRequest(RxDocumentServiceRequest r } } + this.applyGatewayRetryWithHeaders(request); + URI uri = getUri(request); request.requestContext.resourcePhysicalAddress = uri.toString(); @@ -315,6 +319,10 @@ protected boolean partitionKeyRangeResolutionNeeded(RxDocumentServiceRequest req return false; } + protected void applyGatewayRetryWithHeaders(RxDocumentServiceRequest request) { + request.getHeaders().put(HttpConstants.HttpHeaders.NO_RETRY_449, "true"); + } + /** * Given the request it creates an flux which upon subscription issues HTTP call and emits one RxDocumentServiceResponse. * @@ -794,12 +802,28 @@ private Mono invokeAsyncInternal(RxDocumentServiceReq private Mono invokeAsync(RxDocumentServiceRequest request) { + if (request.requestContext.cosmosDiagnostics == null) { + request.requestContext.cosmosDiagnostics = clientContext.createDiagnostics(); + } + Callable> funcDelegate = () -> invokeAsyncInternal(request).single(); - MetadataRequestRetryPolicy metadataRequestRetryPolicy = new MetadataRequestRetryPolicy(this.globalEndpointManager); - metadataRequestRetryPolicy.onBeforeSendRequest(request); + GatewayRetryWithRetryPolicy gatewayRetryWithRetryPolicy = new GatewayRetryWithRetryPolicy( + request, + this.globalEndpointManager, + this.getGatewayRetryWithTimeoutInSeconds()); + + return BackoffRetryUtility.executeRetry(funcDelegate, gatewayRetryWithRetryPolicy); + } + + private int getGatewayRetryWithTimeoutInSeconds() { + ConsistencyLevel effectiveConsistencyLevel = this.gatewayServiceConfigurationReader != null + ? this.gatewayServiceConfigurationReader.getDefaultConsistencyLevel() + : this.defaultConsistencyLevel; - return BackoffRetryUtility.executeRetry(funcDelegate, metadataRequestRetryPolicy); + return effectiveConsistencyLevel == ConsistencyLevel.STRONG + ? STRONG_GATEWAY_RETRY_WITH_TIMEOUT_IN_SECONDS + : GATEWAY_RETRY_WITH_TIMEOUT_IN_SECONDS; } @Override diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ThinClientStoreModel.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ThinClientStoreModel.java index c90b6940fef1..3a849b4c5baa 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ThinClientStoreModel.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ThinClientStoreModel.java @@ -78,6 +78,11 @@ public Mono processMessage(RxDocumentServiceRequest r return super.processMessage(request); } + @Override + protected void applyGatewayRetryWithHeaders(RxDocumentServiceRequest request) { + // ThinClient does not use the Gateway V1 server-side 449 retry loop. + } + @Override protected Map getDefaultHeaders( ApiType apiType, diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GoneAndRetryWithRetryPolicy.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GoneAndRetryWithRetryPolicy.java index 5bfe3d8078dd..1822baf02b23 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GoneAndRetryWithRetryPolicy.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GoneAndRetryWithRetryPolicy.java @@ -16,6 +16,7 @@ import com.azure.cosmos.implementation.PartitionKeyRangeIsSplittingException; import com.azure.cosmos.implementation.Quadruple; import com.azure.cosmos.implementation.RetryContext; +import com.azure.cosmos.implementation.RetryWithRetryPolicy; import com.azure.cosmos.implementation.RetryWithException; import com.azure.cosmos.implementation.RxDocumentServiceRequest; import com.azure.cosmos.implementation.ShouldRetryResult; @@ -27,7 +28,6 @@ import java.time.Duration; import java.time.Instant; -import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.atomic.AtomicInteger; import static com.azure.cosmos.implementation.guava25.base.Preconditions.checkNotNull; @@ -46,7 +46,6 @@ private static ImplementationBridgeHelpers.CosmosExceptionHelper.CosmosException private volatile RetryWithException lastRetryWithException; private RetryContext retryContext; - private static final ThreadLocalRandom random = ThreadLocalRandom.current(); public GoneAndRetryWithRetryPolicy(RxDocumentServiceRequest request, Integer waitTimeInSeconds) { this.retryContext = BridgeInternal.getRetryContext(request.requestContext.cosmosDiagnostics); @@ -55,8 +54,16 @@ public GoneAndRetryWithRetryPolicy(RxDocumentServiceRequest request, Integer wai waitTimeInSeconds, this.retryContext ); - this.retryWithRetryPolicy = new RetryWithRetryPolicy(waitTimeInSeconds, this.retryContext); this.start = Instant.now(); + this.retryWithRetryPolicy = new RetryWithRetryPolicy( + waitTimeInSeconds, + this.retryContext, + () -> GoneAndRetryWithRetryPolicy.this.getElapsedTime().toMillis(), + cosmosException -> { + if (cosmosException instanceof RetryWithException) { + GoneAndRetryWithRetryPolicy.this.lastRetryWithException = (RetryWithException) cosmosException; + } + }); } @Override @@ -311,81 +318,4 @@ private Pair, Boolean> handlePartitionKeyIsSplittingExce } } - class RetryWithRetryPolicy implements IRetryPolicy { - private final static int DEFAULT_WAIT_TIME_IN_SECONDS = 30; - private final static int MAXIMUM_BACKOFF_TIME_IN_MS = 1000; - private final static int INITIAL_BACKOFF_TIME_MS = 10; - private final static int BACK_OFF_MULTIPLIER = 2; - private final static int RANDOM_SALT_IN_MS = 5; - - private final AtomicInteger attemptCount = new AtomicInteger(1); - private final AtomicInteger currentBackoffMilliseconds = new AtomicInteger(RetryWithRetryPolicy.INITIAL_BACKOFF_TIME_MS); - - private final int waitTimeInSeconds; - private final RetryContext retryContext; - - public RetryWithRetryPolicy(Integer waitTimeInSeconds, RetryContext retryContext) { - this.waitTimeInSeconds = waitTimeInSeconds != null ? waitTimeInSeconds : DEFAULT_WAIT_TIME_IN_SECONDS; - this.retryContext = retryContext; - } - - @Override - public Mono shouldRetry(Exception exception) { - Duration backoffTime; - Duration timeout; - - if (!(exception instanceof RetryWithException)) { - logger.debug("Operation will NOT be retried. Current attempt {}, Exception: ", this.attemptCount.get(), - exception); - return Mono.just(ShouldRetryResult.noRetryOnNonRelatedException()); - } - - RetryWithException lastRetryWithException = (RetryWithException)exception; - GoneAndRetryWithRetryPolicy.this.lastRetryWithException = lastRetryWithException; - - long remainingMilliseconds = - (this.waitTimeInSeconds * 1_000L) - - GoneAndRetryWithRetryPolicy.this.getElapsedTime().toMillis(); - int currentRetryAttemptCount = this.attemptCount.getAndIncrement(); - - if (remainingMilliseconds <= 0) { - logger.warn("Received RetryWithException after backoff/retry. Will fail the request.", - lastRetryWithException); - return Mono.just(ShouldRetryResult.error(lastRetryWithException)); - } - - backoffTime = Duration.ofMillis( - Math.min( - Math.min(this.currentBackoffMilliseconds.get() + random.nextInt(RANDOM_SALT_IN_MS), remainingMilliseconds), - RetryWithRetryPolicy.MAXIMUM_BACKOFF_TIME_IN_MS)); - - this.currentBackoffMilliseconds.set( - Math.max( - RetryWithRetryPolicy.INITIAL_BACKOFF_TIME_MS, - Math.min( - RetryWithRetryPolicy.MAXIMUM_BACKOFF_TIME_IN_MS, - this.currentBackoffMilliseconds.get() * RetryWithRetryPolicy.BACK_OFF_MULTIPLIER)) - ); - - logger.debug("BackoffTime: {} ms.", backoffTime.toMillis()); - - // Calculate the remaining time based after accounting for the backoff that we - // will perform - long timeoutInMillSec = remainingMilliseconds - backoffTime.toMillis(); - timeout = timeoutInMillSec > 0 ? Duration.ofMillis(timeoutInMillSec) - : Duration.ofMillis(RetryWithRetryPolicy.MAXIMUM_BACKOFF_TIME_IN_MS); - - logger.debug("Received RetryWithException, will retry, ", exception); - - // For RetryWithException, prevent the caller - // from refreshing any caches. - return Mono.just(ShouldRetryResult.retryAfter(backoffTime, - Quadruple.with(false, true, timeout, currentRetryAttemptCount))); - } - - @Override - public RetryContext getRetryContext() { - return this.retryContext; - } - } } From 4169fbcd2c30ddc6e954d8f4b9ee0db775277e7e Mon Sep 17 00:00:00 2001 From: Fabian Meiswinkel Date: Mon, 1 Jun 2026 10:13:48 +0000 Subject: [PATCH 06/20] Honoring remaining timeout --- ...njectionServerErrorRuleOnGatewayTests.java | 18 ++++++++ ...ectionServerErrorRuleOnGatewayV2Tests.java | 18 ++++++++ .../cosmos/implementation/Exceptions.java | 4 ++ .../implementation/RxGatewayStoreModel.java | 42 +++++++++++++++++-- 4 files changed, 79 insertions(+), 3 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/faultinjection/FaultInjectionServerErrorRuleOnGatewayTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/faultinjection/FaultInjectionServerErrorRuleOnGatewayTests.java index 6d23fe732342..ffacb05d8a54 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/faultinjection/FaultInjectionServerErrorRuleOnGatewayTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/faultinjection/FaultInjectionServerErrorRuleOnGatewayTests.java @@ -592,9 +592,27 @@ private void validateFaultInjectionRuleApplied( assertThat(gatewayStatistics.get("statusCode").asInt()).isEqualTo(statusCode); assertThat(gatewayStatistics.get("subStatusCode").asInt()).isEqualTo(subStatusCode); assertThat(gatewayStatistics.get("faultInjectionRuleId").asText()).isEqualTo(ruleId); + + if (canRetryOnFaultInjectedError && statusCode == HttpConstants.StatusCodes.RETRY_WITH) { + this.validateRetryWithGatewayStatistics(gatewayStatisticsList); + } } } + private void validateRetryWithGatewayStatistics(JsonNode gatewayStatisticsList) { + JsonNode retryGatewayStatistics = gatewayStatisticsList.get(1); + assertThat(retryGatewayStatistics).isNotNull(); + assertThat(retryGatewayStatistics.get("statusCode").asInt()) + .isNotEqualTo(HttpConstants.StatusCodes.RETRY_WITH); + + JsonNode retryResponseTimeout = retryGatewayStatistics.get("httpNetworkResponseTimeout"); + assertThat(retryResponseTimeout).isNotNull(); + + Duration retryTimeout = Duration.parse(retryResponseTimeout.asText()); + assertThat(retryTimeout).isGreaterThan(Duration.ofSeconds(10)); + assertThat(retryTimeout).isLessThanOrEqualTo(Duration.ofSeconds(30)); + } + private void validateNoFaultInjectionApplied( CosmosDiagnostics cosmosDiagnostics, OperationType operationType, diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/faultinjection/FaultInjectionServerErrorRuleOnGatewayV2Tests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/faultinjection/FaultInjectionServerErrorRuleOnGatewayV2Tests.java index 53c373d1a101..5e0340b7a504 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/faultinjection/FaultInjectionServerErrorRuleOnGatewayV2Tests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/faultinjection/FaultInjectionServerErrorRuleOnGatewayV2Tests.java @@ -748,9 +748,27 @@ private void validateFaultInjectionRuleApplied( AssertionsForClassTypes.assertThat(gatewayStatistics.get("statusCode").asInt()).isEqualTo(statusCode); AssertionsForClassTypes.assertThat(gatewayStatistics.get("subStatusCode").asInt()).isEqualTo(subStatusCode); AssertionsForClassTypes.assertThat(gatewayStatistics.get("faultInjectionRuleId").asText()).isEqualTo(ruleId); + + if (canRetryOnFaultInjectedError && statusCode == HttpConstants.StatusCodes.RETRY_WITH) { + this.validateRetryWithGatewayStatistics(gatewayStatisticsList); + } } } + private void validateRetryWithGatewayStatistics(JsonNode gatewayStatisticsList) { + JsonNode retryGatewayStatistics = gatewayStatisticsList.get(1); + AssertionsForClassTypes.assertThat(retryGatewayStatistics).isNotNull(); + AssertionsForClassTypes.assertThat(retryGatewayStatistics.get("statusCode").asInt()) + .isNotEqualTo(HttpConstants.StatusCodes.RETRY_WITH); + + JsonNode retryResponseTimeout = retryGatewayStatistics.get("httpNetworkResponseTimeout"); + AssertionsForClassTypes.assertThat(retryResponseTimeout).isNotNull(); + + Duration retryTimeout = Duration.parse(retryResponseTimeout.asText()); + AssertionsForClassTypes.assertThat(retryTimeout).isGreaterThan(Duration.ofSeconds(10)); + AssertionsForClassTypes.assertThat(retryTimeout).isLessThanOrEqualTo(Duration.ofSeconds(30)); + } + private static AccountLevelLocationContext getAccountLevelLocationContext(DatabaseAccount databaseAccount, boolean writeOnly) { Iterator locationIterator = writeOnly ? databaseAccount.getWritableLocations().iterator() : databaseAccount.getReadableLocations().iterator(); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Exceptions.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Exceptions.java index 3bc37ff48d2c..b29c7651b631 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Exceptions.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Exceptions.java @@ -94,6 +94,10 @@ public static boolean isCommonlyExpectedExceptionPossiblyCausingNoisyLogs(int st return true; } + if (statusCode == HttpConstants.StatusCodes.RETRY_WITH) { + return true; + } + return false; } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxGatewayStoreModel.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxGatewayStoreModel.java index 24f27d42e18c..e2431570e0c6 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxGatewayStoreModel.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxGatewayStoreModel.java @@ -46,6 +46,7 @@ import java.net.URI; import java.net.URISyntaxException; import java.nio.charset.StandardCharsets; +import java.time.Duration; import java.time.Instant; import java.util.Arrays; import java.util.HashMap; @@ -53,8 +54,8 @@ import java.util.Map; import java.util.Map.Entry; import java.util.Objects; -import java.util.concurrent.Callable; import java.util.concurrent.atomic.AtomicReference; +import java.util.function.Function; import static com.azure.cosmos.implementation.HttpConstants.HttpHeaders.INTENDED_COLLECTION_RID_HEADER; import static com.azure.cosmos.implementation.guava25.base.Preconditions.checkNotNull; @@ -806,14 +807,49 @@ private Mono invokeAsync(RxDocumentServiceRequest req request.requestContext.cosmosDiagnostics = clientContext.createDiagnostics(); } - Callable> funcDelegate = () -> invokeAsyncInternal(request).single(); + Function, Mono> funcDelegate = + retryPolicyArg -> { + this.applyGatewayRetryPolicyArg(request, retryPolicyArg); + return invokeAsyncInternal(request).single(); + }; GatewayRetryWithRetryPolicy gatewayRetryWithRetryPolicy = new GatewayRetryWithRetryPolicy( request, this.globalEndpointManager, this.getGatewayRetryWithTimeoutInSeconds()); - return BackoffRetryUtility.executeRetry(funcDelegate, gatewayRetryWithRetryPolicy); + return BackoffRetryUtility.executeAsync( + funcDelegate, + gatewayRetryWithRetryPolicy, + null, + Duration.ZERO, + request, + null); + } + + private void applyGatewayRetryPolicyArg( + RxDocumentServiceRequest request, + Quadruple retryPolicyArg) { + + if (retryPolicyArg == null || !Boolean.TRUE.equals(retryPolicyArg.getValue1())) { + return; + } + + Duration remainingTime = retryPolicyArg.getValue2(); + Integer retryAttemptCount = retryPolicyArg.getValue3(); + + if (remainingTime != null) { + request.setResponseTimeout(remainingTime); + request.getHeaders().put( + HttpConstants.HttpHeaders.REMAINING_TIME_IN_MS_ON_CLIENT_REQUEST, + Long.toString(remainingTime.toMillis())); + } + + if (retryAttemptCount != null) { + request.getHeaders().put( + HttpConstants.HttpHeaders.CLIENT_RETRY_ATTEMPT_COUNT, + retryAttemptCount.toString()); + } } private int getGatewayRetryWithTimeoutInSeconds() { From 4487103eb60fd5c757a550b0aadbf91c59156061 Mon Sep 17 00:00:00 2001 From: Fabian Meiswinkel Date: Mon, 1 Jun 2026 10:23:17 +0000 Subject: [PATCH 07/20] Update CHANGELOG.md --- sdk/cosmos/azure-cosmos/CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/sdk/cosmos/azure-cosmos/CHANGELOG.md b/sdk/cosmos/azure-cosmos/CHANGELOG.md index 5180cd9a9b71..0f4b4f473312 100644 --- a/sdk/cosmos/azure-cosmos/CHANGELOG.md +++ b/sdk/cosmos/azure-cosmos/CHANGELOG.md @@ -13,6 +13,7 @@ #### Other Changes * Replaced per-client `Schedulers.newSingle()` schedulers in `GlobalEndpointManager` and `GlobalPartitionEndpointManagerForPerPartitionCircuitBreaker` with shared `BoundedElastic` schedulers in `CosmosSchedulers` to prevent thread count from scaling linearly with client/tenant count. - See [PR 49062](https://github.com/Azure/azure-sdk-for-java/pull/49062) * Fixed a sporadic `NullPointerException` in `JsonSerializable.getWithMapping` triggered by concurrent first-time calls to `DatabaseAccount.getConsistencyPolicy()` and its sibling lazy getters (`getReplicationPolicy`, `getSystemReplicationPolicy`, `getQueryEngineConfiguration`). The fix makes `JsonSerializable.propertyBag` `final`, closing an unsafe-publication race in the lazy-initialisation pattern. - See [Issue 49256](https://github.com/Azure/azure-sdk-for-java/issues/49256) and [PR #49258](https://github.com/Azure/azure-sdk-for-java/pull/49258) +* Changed 449 (`Retry With`) retries in Gateway V1 and Gateway V2 to be consistently orchestrated client-side. - See [PR 49332](https://github.com/Azure/azure-sdk-for-java/pull/49332) ### 4.80.0 (2026-05-01) From 3cdbaa7107eac69efdc36330173e8eb882b1f5aa Mon Sep 17 00:00:00 2001 From: Fabian Meiswinkel Date: Mon, 1 Jun 2026 12:33:31 +0200 Subject: [PATCH 08/20] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- .../GatewayRetryWithRetryPolicy.java | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GatewayRetryWithRetryPolicy.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GatewayRetryWithRetryPolicy.java index 8e83dba5d9f0..4fdc4d159593 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GatewayRetryWithRetryPolicy.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GatewayRetryWithRetryPolicy.java @@ -30,6 +30,7 @@ public GatewayRetryWithRetryPolicy( this.metadataRequestRetryPolicy.onBeforeSendRequest(request); } + @Override @Override public Mono shouldRetry(Exception exception) { return this.retryWithRetryPolicy.shouldRetry(exception).flatMap(retryWithResult -> { @@ -37,19 +38,10 @@ public Mono shouldRetry(Exception exception) { return Mono.just(retryWithResult); } - return this.metadataRequestRetryPolicy.shouldRetry(exception).map(metadataRequestRetryResult -> { - if (metadataRequestRetryResult.shouldRetry || metadataRequestRetryResult.nonRelatedException) { - return metadataRequestRetryResult; - } - - if (metadataRequestRetryResult.exception != null) { - return ShouldRetryResult.errorOnNonRelatedException(metadataRequestRetryResult.exception); - } - - return ShouldRetryResult.noRetryOnNonRelatedException(); - }); + return this.metadataRequestRetryPolicy.shouldRetry(exception); }); } + } @Override public RetryContext getRetryContext() { From 25424914e4e19bd8d0e585d9fc1801e51d3ac094 Mon Sep 17 00:00:00 2001 From: Fabian Meiswinkel Date: Mon, 1 Jun 2026 10:55:52 +0000 Subject: [PATCH 09/20] Update GatewayRetryWithRetryPolicy.java --- .../cosmos/implementation/GatewayRetryWithRetryPolicy.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GatewayRetryWithRetryPolicy.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GatewayRetryWithRetryPolicy.java index 4fdc4d159593..bb22a2e78540 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GatewayRetryWithRetryPolicy.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GatewayRetryWithRetryPolicy.java @@ -30,7 +30,6 @@ public GatewayRetryWithRetryPolicy( this.metadataRequestRetryPolicy.onBeforeSendRequest(request); } - @Override @Override public Mono shouldRetry(Exception exception) { return this.retryWithRetryPolicy.shouldRetry(exception).flatMap(retryWithResult -> { @@ -41,7 +40,6 @@ public Mono shouldRetry(Exception exception) { return this.metadataRequestRetryPolicy.shouldRetry(exception); }); } - } @Override public RetryContext getRetryContext() { From ff303116460dec6a0583162a7378c424d6b02e4c Mon Sep 17 00:00:00 2001 From: Fabian Meiswinkel Date: Mon, 1 Jun 2026 19:17:34 +0000 Subject: [PATCH 10/20] Fixing test issue --- .../implementation/GatewayRetryWithRetryPolicyTest.java | 4 ++-- .../cosmos/implementation/GatewayRetryWithRetryPolicy.java | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/GatewayRetryWithRetryPolicyTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/GatewayRetryWithRetryPolicyTest.java index a446a22b9af4..94f3d4774a1e 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/GatewayRetryWithRetryPolicyTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/GatewayRetryWithRetryPolicyTest.java @@ -51,7 +51,7 @@ public void shouldNotRetryGoneException() throws Exception { ShouldRetryResult shouldRetryResult = retryPolicy.shouldRetry(goneException).block(); assertThat(shouldRetryResult.shouldRetry).isFalse(); - assertThat(shouldRetryResult.nonRelatedException).isTrue(); + assertThat(shouldRetryResult.nonRelatedException).isFalse(); assertThat(shouldRetryResult.exception).isSameAs(goneException); } @@ -90,4 +90,4 @@ private static void validateRetryWithTimeRange( assertThat(retryResult.backOffTime.toMillis()).isGreaterThan(expectedDelayInMs - saltValueInMs); assertThat(retryResult.backOffTime.toMillis()).isLessThan(expectedDelayInMs + saltValueInMs); } -} \ No newline at end of file +} diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GatewayRetryWithRetryPolicy.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GatewayRetryWithRetryPolicy.java index bb22a2e78540..7e2bff21635a 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GatewayRetryWithRetryPolicy.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GatewayRetryWithRetryPolicy.java @@ -45,4 +45,4 @@ public Mono shouldRetry(Exception exception) { public RetryContext getRetryContext() { return this.retryWithRetryPolicy.getRetryContext(); } -} \ No newline at end of file +} From 9c1e925cdfd3e8422a545e0617def2b4691d878b Mon Sep 17 00:00:00 2001 From: Fabian Meiswinkel Date: Mon, 1 Jun 2026 21:30:39 +0000 Subject: [PATCH 11/20] Update GatewayRetryWithRetryPolicyTest.java --- .../GatewayRetryWithRetryPolicyTest.java | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/GatewayRetryWithRetryPolicyTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/GatewayRetryWithRetryPolicyTest.java index 94f3d4774a1e..31e4f0e2989a 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/GatewayRetryWithRetryPolicyTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/GatewayRetryWithRetryPolicyTest.java @@ -6,6 +6,7 @@ import com.azure.cosmos.BridgeInternal; import com.azure.cosmos.CosmosException; import com.azure.cosmos.implementation.routing.RegionalRoutingContext; +import io.netty.channel.ConnectTimeoutException; import org.mockito.Mockito; import org.testng.annotations.Test; @@ -55,6 +56,20 @@ public void shouldNotRetryGoneException() throws Exception { assertThat(shouldRetryResult.exception).isSameAs(goneException); } + @Test(groups = { "unit" }, timeOut = TIMEOUT) + public void shouldDelegateRetryableNetworkExceptionToMetadataPolicy() throws Exception { + GatewayRetryWithRetryPolicy retryPolicy = new GatewayRetryWithRetryPolicy( + createRequest(), + Mockito.mock(GlobalEndpointManager.class), + 30); + + ShouldRetryResult shouldRetryResult = retryPolicy.shouldRetry(new ConnectTimeoutException()).block(); + assertThat(shouldRetryResult.shouldRetry).isTrue(); + assertThat(shouldRetryResult.nonRelatedException).isFalse(); + assertThat(shouldRetryResult.exception).isNull(); + assertThat(shouldRetryResult.backOffTime).isNotNull(); + } + @Test(groups = { "unit" }, timeOut = TIMEOUT) public void shouldStopRetryingRetryWithAfterTimeout() throws Exception { GatewayRetryWithRetryPolicy retryPolicy = new GatewayRetryWithRetryPolicy( From d623688a58ce360974df966c5aa46ba84d9793ea Mon Sep 17 00:00:00 2001 From: Fabian Meiswinkel Date: Mon, 1 Jun 2026 21:41:42 +0000 Subject: [PATCH 12/20] Adding additional tests --- .../RxGatewayStoreModelTest.java | 49 ++++++++++++++ .../ThinClientStoreModelTest.java | 64 +++++++++++++++++++ 2 files changed, 113 insertions(+) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxGatewayStoreModelTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxGatewayStoreModelTest.java index 62004bee9fa7..9d67473cced5 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxGatewayStoreModelTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxGatewayStoreModelTest.java @@ -24,6 +24,7 @@ import reactor.core.publisher.Sinks; import reactor.test.StepVerifier; +import java.lang.reflect.Method; import java.net.SocketException; import java.net.URI; import java.time.Duration; @@ -687,6 +688,54 @@ public void gatewayOverridesNoRetry449Header() throws Exception { assertThat(headers.toMap().get(HttpConstants.HttpHeaders.NO_RETRY_449)).isEqualTo("true"); } + @Test(groups = "unit") + public void gatewayRetryWithTimeoutUsesStrongConsistencyFromGatewayServiceConfigurationReader() throws Exception { + DiagnosticsClientContext clientContext = mockDiagnosticsClientContext(); + GatewayServiceConfigurationReader gatewayServiceConfigurationReader = Mockito.mock(GatewayServiceConfigurationReader.class); + Mockito.doReturn(ConsistencyLevel.STRONG) + .when(gatewayServiceConfigurationReader).getDefaultConsistencyLevel(); + + RxGatewayStoreModel storeModel = new RxGatewayStoreModel( + clientContext, + Mockito.mock(ISessionContainer.class), + ConsistencyLevel.SESSION, + QueryCompatibilityMode.Default, + new UserAgentContainer(), + Mockito.mock(GlobalEndpointManager.class), + Mockito.mock(HttpClient.class), + null, + null); + storeModel.setGatewayServiceConfigurationReader(gatewayServiceConfigurationReader); + + assertThat(getGatewayRetryWithTimeoutInSeconds(storeModel)).isEqualTo(60); + } + + @Test(groups = "unit") + public void gatewayRetryWithTimeoutFallsBackToDefaultConsistencyWhenGatewayServiceConfigurationReaderIsNull() + throws Exception { + + RxGatewayStoreModel storeModel = new RxGatewayStoreModel( + mockDiagnosticsClientContext(), + Mockito.mock(ISessionContainer.class), + ConsistencyLevel.STRONG, + QueryCompatibilityMode.Default, + new UserAgentContainer(), + Mockito.mock(GlobalEndpointManager.class), + Mockito.mock(HttpClient.class), + null, + null); + + assertThat(getGatewayRetryWithTimeoutInSeconds(storeModel)).isEqualTo(60); + } + + private static int getGatewayRetryWithTimeoutInSeconds(RxGatewayStoreModel storeModel) throws Exception { + Method getGatewayRetryWithTimeoutInSeconds = RxGatewayStoreModel.class + .getDeclaredMethod("getGatewayRetryWithTimeoutInSeconds"); + getGatewayRetryWithTimeoutInSeconds.setAccessible(true); + + return (int) getGatewayRetryWithTimeoutInSeconds.invoke(storeModel); + } + enum SessionTokenType { NONE, // no session token applied USER, // userControlled session token diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ThinClientStoreModelTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ThinClientStoreModelTest.java index ec6315f209b2..f657036d81f5 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ThinClientStoreModelTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ThinClientStoreModelTest.java @@ -1,7 +1,9 @@ package com.azure.cosmos.implementation; import com.azure.cosmos.ConsistencyLevel; +import com.azure.cosmos.implementation.directconnectivity.ReflectionUtils; import com.azure.cosmos.implementation.http.HttpClient; +import com.azure.cosmos.implementation.http.HttpHeaders; import com.azure.cosmos.implementation.http.HttpRequest; import com.azure.cosmos.implementation.routing.PartitionKeyInternal; import com.azure.cosmos.implementation.routing.RegionalRoutingContext; @@ -190,6 +192,68 @@ public void testNullAdditionalHeadersThinClientStoreModel() throws Exception { .isNull(); } + @Test(groups = "unit") + public void thinClientDoesNotAddNoRetry449Header() throws Exception { + DiagnosticsClientContext clientContext = Mockito.mock(DiagnosticsClientContext.class); + Mockito.doReturn(new DiagnosticsClientContext.DiagnosticsClientConfig()).when(clientContext).getConfig(); + Mockito + .doReturn(ImplementationBridgeHelpers + .CosmosDiagnosticsHelper + .getCosmosDiagnosticsAccessor() + .create(clientContext, 1d)) + .when(clientContext).createDiagnostics(); + + ISessionContainer sessionContainer = Mockito.mock(ISessionContainer.class); + Mockito.doReturn("1#100#1=20#2=5#3=30").when(sessionContainer).resolveGlobalSessionToken(any()); + + RegionalRoutingContext regionalRoutingContext = new RegionalRoutingContext(new URI("https://localhost:8080")); + regionalRoutingContext.setThinclientRegionalEndpoint(new URI("https://localhost:8081")); + GlobalEndpointManager globalEndpointManager = Mockito.mock(GlobalEndpointManager.class); + Mockito.doReturn(regionalRoutingContext).when(globalEndpointManager).resolveServiceEndpoint(any()); + DatabaseAccount databaseAccount = Mockito.mock(DatabaseAccount.class); + Mockito.doReturn("test-account").when(databaseAccount).getId(); + Mockito.doReturn(databaseAccount).when(globalEndpointManager).getLatestDatabaseAccount(); + + HttpClient httpClient = Mockito.mock(HttpClient.class); + ArgumentCaptor requestCaptor = ArgumentCaptor.forClass(HttpRequest.class); + Mockito.when(httpClient.send(requestCaptor.capture(), any())) + .thenReturn(Mono.error(new ConnectTimeoutException())); + + ThinClientStoreModel storeModel = new ThinClientStoreModel( + clientContext, + sessionContainer, + ConsistencyLevel.SESSION, + new UserAgentContainer(), + globalEndpointManager, + httpClient, + null); + + RxDocumentServiceRequest request = RxDocumentServiceRequest.createFromName( + clientContext, + OperationType.Read, + "/dbs/db/colls/col/docs/doc1", + ResourceType.Document); + PartitionKeyDefinition partitionKeyDefinition = new PartitionKeyDefinition(); + partitionKeyDefinition.setPaths(Collections.singletonList("/partitionKey")); + request.setPartitionKeyDefinition(partitionKeyDefinition); + request.setPartitionKeyInternal(PartitionKeyInternal.fromObjectArray(Collections.singletonList("testPk"), true)); + + try { + storeModel.performRequest(request).block(); + } catch (Exception e) { + // Expected - mock HTTP client throws ConnectTimeoutException + } + + assertThat(request.getHeaders().get(HttpConstants.HttpHeaders.NO_RETRY_449)) + .as("ThinClient request headers should not include the Gateway V1 no-retry-449 header") + .isNull(); + + HttpHeaders httpHeaders = ReflectionUtils.getHttpHeaders(requestCaptor.getValue()); + assertThat(httpHeaders.toMap().get(HttpConstants.HttpHeaders.NO_RETRY_449)) + .as("ThinClient HTTP framing headers should not include the Gateway V1 no-retry-449 header") + .isNull(); + } + @Test(groups = "unit") public void cloneShouldPreservePartitionKeyDefinition() { DiagnosticsClientContext clientContext = Mockito.mock(DiagnosticsClientContext.class); From e303df559f8037b4808346f494e89485d0815d0a Mon Sep 17 00:00:00 2001 From: Fabian Meiswinkel Date: Mon, 1 Jun 2026 23:35:19 +0000 Subject: [PATCH 13/20] Fixing test issues --- .../FaultInjectionServerErrorRuleOnGatewayTests.java | 12 +++++++++++- ...aultInjectionServerErrorRuleOnGatewayV2Tests.java | 12 +++++++++++- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/faultinjection/FaultInjectionServerErrorRuleOnGatewayTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/faultinjection/FaultInjectionServerErrorRuleOnGatewayTests.java index ffacb05d8a54..584a0f02b6d1 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/faultinjection/FaultInjectionServerErrorRuleOnGatewayTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/faultinjection/FaultInjectionServerErrorRuleOnGatewayTests.java @@ -4,6 +4,7 @@ package com.azure.cosmos.faultinjection; import com.azure.cosmos.BridgeInternal; +import com.azure.cosmos.ConsistencyLevel; import com.azure.cosmos.CosmosAsyncClient; import com.azure.cosmos.CosmosAsyncContainer; import com.azure.cosmos.CosmosClientBuilder; @@ -610,7 +611,16 @@ private void validateRetryWithGatewayStatistics(JsonNode gatewayStatisticsList) Duration retryTimeout = Duration.parse(retryResponseTimeout.asText()); assertThat(retryTimeout).isGreaterThan(Duration.ofSeconds(10)); - assertThat(retryTimeout).isLessThanOrEqualTo(Duration.ofSeconds(30)); + assertThat(retryTimeout).isLessThanOrEqualTo(this.getMaxRetryWithTimeout()); + } + + private Duration getMaxRetryWithTimeout() { + AsyncDocumentClient asyncDocumentClient = BridgeInternal.getContextClient(this.client); + ConsistencyLevel consistencyLevel = asyncDocumentClient.getConsistencyLevel() != null + ? asyncDocumentClient.getConsistencyLevel() + : asyncDocumentClient.getDefaultConsistencyLevelOfAccount(); + + return consistencyLevel == ConsistencyLevel.STRONG ? Duration.ofSeconds(60) : Duration.ofSeconds(30); } private void validateNoFaultInjectionApplied( diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/faultinjection/FaultInjectionServerErrorRuleOnGatewayV2Tests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/faultinjection/FaultInjectionServerErrorRuleOnGatewayV2Tests.java index 5e0340b7a504..374727d338e5 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/faultinjection/FaultInjectionServerErrorRuleOnGatewayV2Tests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/faultinjection/FaultInjectionServerErrorRuleOnGatewayV2Tests.java @@ -4,6 +4,7 @@ package com.azure.cosmos.faultinjection; import com.azure.cosmos.BridgeInternal; +import com.azure.cosmos.ConsistencyLevel; import com.azure.cosmos.CosmosAsyncClient; import com.azure.cosmos.CosmosAsyncContainer; import com.azure.cosmos.CosmosClientBuilder; @@ -766,7 +767,16 @@ private void validateRetryWithGatewayStatistics(JsonNode gatewayStatisticsList) Duration retryTimeout = Duration.parse(retryResponseTimeout.asText()); AssertionsForClassTypes.assertThat(retryTimeout).isGreaterThan(Duration.ofSeconds(10)); - AssertionsForClassTypes.assertThat(retryTimeout).isLessThanOrEqualTo(Duration.ofSeconds(30)); + AssertionsForClassTypes.assertThat(retryTimeout).isLessThanOrEqualTo(this.getMaxRetryWithTimeout()); + } + + private Duration getMaxRetryWithTimeout() { + AsyncDocumentClient asyncDocumentClient = BridgeInternal.getContextClient(this.client); + ConsistencyLevel consistencyLevel = asyncDocumentClient.getConsistencyLevel() != null + ? asyncDocumentClient.getConsistencyLevel() + : asyncDocumentClient.getDefaultConsistencyLevelOfAccount(); + + return consistencyLevel == ConsistencyLevel.STRONG ? Duration.ofSeconds(60) : Duration.ofSeconds(30); } private static AccountLevelLocationContext getAccountLevelLocationContext(DatabaseAccount databaseAccount, boolean writeOnly) { From 1c4723665c60d3eed3c1d6c7b4dc37a618a3153a Mon Sep 17 00:00:00 2001 From: Fabian Meiswinkel Date: Tue, 2 Jun 2026 01:54:42 +0000 Subject: [PATCH 14/20] Fixing tests --- .../GatewayRetryWithRetryPolicyTest.java | 2 +- .../implementation/GatewayRetryWithRetryPolicy.java | 12 +++++++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/GatewayRetryWithRetryPolicyTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/GatewayRetryWithRetryPolicyTest.java index 31e4f0e2989a..70c56e76de33 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/GatewayRetryWithRetryPolicyTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/GatewayRetryWithRetryPolicyTest.java @@ -52,7 +52,7 @@ public void shouldNotRetryGoneException() throws Exception { ShouldRetryResult shouldRetryResult = retryPolicy.shouldRetry(goneException).block(); assertThat(shouldRetryResult.shouldRetry).isFalse(); - assertThat(shouldRetryResult.nonRelatedException).isFalse(); + assertThat(shouldRetryResult.nonRelatedException).isTrue(); assertThat(shouldRetryResult.exception).isSameAs(goneException); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GatewayRetryWithRetryPolicy.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GatewayRetryWithRetryPolicy.java index 7e2bff21635a..a0573b4ab2b9 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GatewayRetryWithRetryPolicy.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GatewayRetryWithRetryPolicy.java @@ -37,7 +37,17 @@ public Mono shouldRetry(Exception exception) { return Mono.just(retryWithResult); } - return this.metadataRequestRetryPolicy.shouldRetry(exception); + return this.metadataRequestRetryPolicy.shouldRetry(exception).map(metadataRequestRetryResult -> { + if (metadataRequestRetryResult.shouldRetry || metadataRequestRetryResult.nonRelatedException) { + return metadataRequestRetryResult; + } + + if (metadataRequestRetryResult.exception != null) { + return ShouldRetryResult.errorOnNonRelatedException(metadataRequestRetryResult.exception); + } + + return ShouldRetryResult.noRetryOnNonRelatedException(); + }); }); } From f4769f6fdf1a4e644c52638b60d65de40d652e33 Mon Sep 17 00:00:00 2001 From: Fabian Meiswinkel Date: Tue, 2 Jun 2026 01:56:31 +0000 Subject: [PATCH 15/20] Update GatewayRetryWithRetryPolicyTest.java --- .../GatewayRetryWithRetryPolicyTest.java | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/GatewayRetryWithRetryPolicyTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/GatewayRetryWithRetryPolicyTest.java index 70c56e76de33..46e0ef24e9e5 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/GatewayRetryWithRetryPolicyTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/GatewayRetryWithRetryPolicyTest.java @@ -56,6 +56,21 @@ public void shouldNotRetryGoneException() throws Exception { assertThat(shouldRetryResult.exception).isSameAs(goneException); } + @Test(groups = { "unit" }, timeOut = TIMEOUT) + public void shouldNotHandleThrottlingException() throws Exception { + GatewayRetryWithRetryPolicy retryPolicy = new GatewayRetryWithRetryPolicy( + createRequest(), + Mockito.mock(GlobalEndpointManager.class), + 30); + + CosmosException throttlingException = BridgeInternal.createCosmosException(HttpConstants.StatusCodes.TOO_MANY_REQUESTS); + + ShouldRetryResult shouldRetryResult = retryPolicy.shouldRetry(throttlingException).block(); + assertThat(shouldRetryResult.shouldRetry).isFalse(); + assertThat(shouldRetryResult.nonRelatedException).isTrue(); + assertThat(shouldRetryResult.exception).isSameAs(throttlingException); + } + @Test(groups = { "unit" }, timeOut = TIMEOUT) public void shouldDelegateRetryableNetworkExceptionToMetadataPolicy() throws Exception { GatewayRetryWithRetryPolicy retryPolicy = new GatewayRetryWithRetryPolicy( From d2786aa92a699ce19d8c5c23e7b665ce9d3cdbc1 Mon Sep 17 00:00:00 2001 From: Fabian Meiswinkel Date: Tue, 2 Jun 2026 09:11:15 +0000 Subject: [PATCH 16/20] Fixes Spring Boot AAD tests --- ...ttpRequestFactoryBuilderConfiguration.java | 40 +++++++++++++++ .../AadResourceServerConfigurationTests.java | 34 ++++++++----- ...cResourceServerAutoConfigurationTests.java | 49 +++++++++++-------- 3 files changed, 92 insertions(+), 31 deletions(-) create mode 100644 sdk/spring/spring-cloud-azure-autoconfigure/src/test/java/com/azure/spring/cloud/autoconfigure/implementation/aad/RecordingClientHttpRequestFactoryBuilderConfiguration.java diff --git a/sdk/spring/spring-cloud-azure-autoconfigure/src/test/java/com/azure/spring/cloud/autoconfigure/implementation/aad/RecordingClientHttpRequestFactoryBuilderConfiguration.java b/sdk/spring/spring-cloud-azure-autoconfigure/src/test/java/com/azure/spring/cloud/autoconfigure/implementation/aad/RecordingClientHttpRequestFactoryBuilderConfiguration.java new file mode 100644 index 000000000000..6eecc15c6f19 --- /dev/null +++ b/sdk/spring/spring-cloud-azure-autoconfigure/src/test/java/com/azure/spring/cloud/autoconfigure/implementation/aad/RecordingClientHttpRequestFactoryBuilderConfiguration.java @@ -0,0 +1,40 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +package com.azure.spring.cloud.autoconfigure.implementation.aad; + +import org.springframework.boot.http.client.ClientHttpRequestFactoryBuilder; +import org.springframework.boot.http.client.HttpClientSettings; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import org.springframework.http.client.ClientHttpRequestFactory; + +@Configuration(proxyBeanMethods = false) +public class RecordingClientHttpRequestFactoryBuilderConfiguration { + + @Bean + public RecordingClientHttpRequestFactoryBuilder recordingClientHttpRequestFactoryBuilder() { + return new RecordingClientHttpRequestFactoryBuilder(ClientHttpRequestFactoryBuilder.detect()); + } + + public static final class RecordingClientHttpRequestFactoryBuilder + implements ClientHttpRequestFactoryBuilder { + + private final ClientHttpRequestFactoryBuilder delegate; + private HttpClientSettings clientSettings; + + RecordingClientHttpRequestFactoryBuilder( + ClientHttpRequestFactoryBuilder delegate) { + this.delegate = delegate; + } + + @Override + public ClientHttpRequestFactory build(HttpClientSettings settings) { + this.clientSettings = settings; + return this.delegate.build(settings); + } + + public HttpClientSettings getClientSettings() { + return this.clientSettings; + } + } +} \ No newline at end of file diff --git a/sdk/spring/spring-cloud-azure-autoconfigure/src/test/java/com/azure/spring/cloud/autoconfigure/implementation/aad/configuration/AadResourceServerConfigurationTests.java b/sdk/spring/spring-cloud-azure-autoconfigure/src/test/java/com/azure/spring/cloud/autoconfigure/implementation/aad/configuration/AadResourceServerConfigurationTests.java index bb8cc538e012..158a8602ca66 100644 --- a/sdk/spring/spring-cloud-azure-autoconfigure/src/test/java/com/azure/spring/cloud/autoconfigure/implementation/aad/configuration/AadResourceServerConfigurationTests.java +++ b/sdk/spring/spring-cloud-azure-autoconfigure/src/test/java/com/azure/spring/cloud/autoconfigure/implementation/aad/configuration/AadResourceServerConfigurationTests.java @@ -3,20 +3,24 @@ package com.azure.spring.cloud.autoconfigure.implementation.aad.configuration; import com.azure.identity.extensions.implementation.template.AzureAuthenticationTemplate; +import com.azure.spring.cloud.autoconfigure.implementation.aad.RecordingClientHttpRequestFactoryBuilderConfiguration; +import com.azure.spring.cloud.autoconfigure.implementation.aad.RecordingClientHttpRequestFactoryBuilderConfiguration.RecordingClientHttpRequestFactoryBuilder; import com.azure.spring.cloud.autoconfigure.implementation.aad.configuration.properties.AadAuthenticationProperties; -import com.azure.spring.cloud.autoconfigure.implementation.aad.security.jwt.AadJwtIssuerValidator; import com.azure.spring.cloud.autoconfigure.implementation.aad.security.AadResourceServerHttpSecurityConfigurer; +import com.azure.spring.cloud.autoconfigure.implementation.aad.security.jwt.AadJwtIssuerValidator; import com.azure.spring.cloud.autoconfigure.implementation.context.AzureGlobalPropertiesAutoConfiguration; import com.nimbusds.jose.jwk.source.JWKSourceBuilder; import com.nimbusds.jwt.proc.JWTClaimsSetAwareJWSKeySelector; import org.junit.jupiter.api.Test; import org.springframework.boot.autoconfigure.AutoConfigurations; +import org.springframework.boot.http.client.HttpClientSettings; import org.springframework.boot.http.converter.autoconfigure.HttpMessageConvertersAutoConfiguration; import org.springframework.boot.autoconfigure.logging.ConditionEvaluationReportLoggingListener; import org.springframework.boot.restclient.autoconfigure.RestTemplateAutoConfiguration; import org.springframework.boot.logging.LogLevel; import org.springframework.boot.test.context.FilteredClassLoader; import org.springframework.boot.test.context.runner.WebApplicationContextRunner; +import org.springframework.context.ApplicationContext; import org.springframework.context.annotation.Bean; import org.springframework.core.convert.converter.Converter; import org.springframework.security.config.annotation.method.configuration.EnableMethodSecurity; @@ -72,6 +76,7 @@ void testCreateJwtDecoderByJwkKeySetUri() { @Test void testJwtDecoderTimeoutDefaultValues() { resourceServerContextRunner() + .withUserConfiguration(RecordingClientHttpRequestFactoryBuilderConfiguration.class) .withPropertyValues("spring.cloud.azure.active-directory.enabled=true") .run(context -> { AadAuthenticationProperties properties = context.getBean(AadAuthenticationProperties.class); @@ -81,7 +86,7 @@ void testJwtDecoderTimeoutDefaultValues() { .isEqualTo(Duration.ofMillis(JWKSourceBuilder.DEFAULT_HTTP_READ_TIMEOUT)); // Verify the default timeouts are applied to the RestTemplate used by the JwtDecoder final JwtDecoder jwtDecoder = context.getBean(JwtDecoder.class); - verifyJwtDecoderRestTemplateTimeouts(jwtDecoder, + verifyJwtDecoderRestTemplateTimeouts(context, jwtDecoder, JWKSourceBuilder.DEFAULT_HTTP_CONNECT_TIMEOUT, JWKSourceBuilder.DEFAULT_HTTP_READ_TIMEOUT); }); @@ -90,6 +95,7 @@ void testJwtDecoderTimeoutDefaultValues() { @Test void testJwtDecoderTimeoutCustomValues() { resourceServerContextRunner() + .withUserConfiguration(RecordingClientHttpRequestFactoryBuilderConfiguration.class) .withPropertyValues( "spring.cloud.azure.active-directory.enabled=true", "spring.cloud.azure.active-directory.jwt-connect-timeout=2000", @@ -103,7 +109,7 @@ void testJwtDecoderTimeoutCustomValues() { assertThat(jwtDecoder).isNotNull(); assertThat(jwtDecoder).isExactlyInstanceOf(NimbusJwtDecoder.class); // Verify the configured timeouts are applied to the RestTemplate used by the JwtDecoder - verifyJwtDecoderRestTemplateTimeouts(jwtDecoder, 2000, 3000); + verifyJwtDecoderRestTemplateTimeouts(context, jwtDecoder, 2000, 3000); }); } @@ -410,7 +416,8 @@ public Collection convert(Jwt source) { * has the expected connect and read timeouts applied to its ClientHttpRequestFactory. */ @SuppressWarnings("unchecked") - private static void verifyJwtDecoderRestTemplateTimeouts(JwtDecoder jwtDecoder, + private static void verifyJwtDecoderRestTemplateTimeouts(ApplicationContext context, + JwtDecoder jwtDecoder, int expectedConnectTimeoutMs, int expectedReadTimeoutMs) { // NimbusJwtDecoder -> jwtProcessor (DefaultJWTProcessor) @@ -443,13 +450,18 @@ private static void verifyJwtDecoderRestTemplateTimeouts(JwtDecoder jwtDecoder, // RestTemplate -> ClientHttpRequestFactory org.springframework.http.client.ClientHttpRequestFactory requestFactory = ((org.springframework.web.client.RestTemplate) restOperations).getRequestFactory(); + assertThat(requestFactory).isNotNull(); + + assertRecordedHttpClientSettings(context, expectedConnectTimeoutMs, expectedReadTimeoutMs); + } - // Verify timeouts on the request factory (may be stored as Duration or int) - Object connectTimeoutValue = ReflectionTestUtils.getField(requestFactory, "connectTimeout"); - Object readTimeoutValue = ReflectionTestUtils.getField(requestFactory, "readTimeout"); - int connectTimeout = connectTimeoutValue instanceof java.time.Duration d ? (int) d.toMillis() : (int) connectTimeoutValue; - int readTimeout = readTimeoutValue instanceof java.time.Duration d ? (int) d.toMillis() : (int) readTimeoutValue; - assertThat(connectTimeout).isEqualTo(expectedConnectTimeoutMs); - assertThat(readTimeout).isEqualTo(expectedReadTimeoutMs); + private static void assertRecordedHttpClientSettings(ApplicationContext context, + int expectedConnectTimeoutMs, + int expectedReadTimeoutMs) { + HttpClientSettings clientSettings = context.getBean(RecordingClientHttpRequestFactoryBuilder.class) + .getClientSettings(); + assertThat(clientSettings).isNotNull(); + assertThat(clientSettings.connectTimeout()).isEqualTo(Duration.ofMillis(expectedConnectTimeoutMs)); + assertThat(clientSettings.readTimeout()).isEqualTo(Duration.ofMillis(expectedReadTimeoutMs)); } } diff --git a/sdk/spring/spring-cloud-azure-autoconfigure/src/test/java/com/azure/spring/cloud/autoconfigure/implementation/aadb2c/configuration/AadB2cResourceServerAutoConfigurationTests.java b/sdk/spring/spring-cloud-azure-autoconfigure/src/test/java/com/azure/spring/cloud/autoconfigure/implementation/aadb2c/configuration/AadB2cResourceServerAutoConfigurationTests.java index 517abd3b0e0f..cd100b48b9d9 100644 --- a/sdk/spring/spring-cloud-azure-autoconfigure/src/test/java/com/azure/spring/cloud/autoconfigure/implementation/aadb2c/configuration/AadB2cResourceServerAutoConfigurationTests.java +++ b/sdk/spring/spring-cloud-azure-autoconfigure/src/test/java/com/azure/spring/cloud/autoconfigure/implementation/aadb2c/configuration/AadB2cResourceServerAutoConfigurationTests.java @@ -3,6 +3,8 @@ package com.azure.spring.cloud.autoconfigure.implementation.aadb2c.configuration; import com.azure.spring.cloud.autoconfigure.implementation.aadb2c.AadB2cConstants; +import com.azure.spring.cloud.autoconfigure.implementation.aad.RecordingClientHttpRequestFactoryBuilderConfiguration; +import com.azure.spring.cloud.autoconfigure.implementation.aad.RecordingClientHttpRequestFactoryBuilderConfiguration.RecordingClientHttpRequestFactoryBuilder; import com.azure.spring.cloud.autoconfigure.implementation.aad.security.jwt.AadIssuerJwsKeySelector; import com.azure.spring.cloud.autoconfigure.implementation.aad.security.jwt.AadTrustedIssuerRepository; import com.azure.spring.cloud.autoconfigure.implementation.aadb2c.configuration.conditions.AadB2cConditions; @@ -20,6 +22,7 @@ import org.mockito.Mockito; import org.springframework.beans.BeanUtils; import org.springframework.boot.autoconfigure.AutoConfigurations; +import org.springframework.boot.http.client.HttpClientSettings; import org.springframework.boot.http.converter.autoconfigure.HttpMessageConvertersAutoConfiguration; import org.springframework.boot.restclient.autoconfigure.RestTemplateAutoConfiguration; import org.springframework.boot.test.context.FilteredClassLoader; @@ -145,22 +148,25 @@ void testB2COnlyResourceServerBean() { @Test void testB2CTimeoutDefaultValues() { - getResourceServerContextRunner().run(context -> { - AadB2cProperties properties = context.getBean(AadB2cProperties.class); - assertThat(properties.getJwtConnectTimeout()) - .isEqualTo(Duration.ofMillis(JWKSourceBuilder.DEFAULT_HTTP_CONNECT_TIMEOUT)); - assertThat(properties.getJwtReadTimeout()) - .isEqualTo(Duration.ofMillis(JWKSourceBuilder.DEFAULT_HTTP_READ_TIMEOUT)); - // Verify the default timeouts are applied to the RestTemplate used by the ResourceRetriever - verifyResourceRetrieverRestTemplateTimeouts(context, - JWKSourceBuilder.DEFAULT_HTTP_CONNECT_TIMEOUT, - JWKSourceBuilder.DEFAULT_HTTP_READ_TIMEOUT); - }); + getResourceServerContextRunner() + .withUserConfiguration(RecordingClientHttpRequestFactoryBuilderConfiguration.class) + .run(context -> { + AadB2cProperties properties = context.getBean(AadB2cProperties.class); + assertThat(properties.getJwtConnectTimeout()) + .isEqualTo(Duration.ofMillis(JWKSourceBuilder.DEFAULT_HTTP_CONNECT_TIMEOUT)); + assertThat(properties.getJwtReadTimeout()) + .isEqualTo(Duration.ofMillis(JWKSourceBuilder.DEFAULT_HTTP_READ_TIMEOUT)); + // Verify the default timeouts are applied to the RestTemplate used by the ResourceRetriever + verifyResourceRetrieverRestTemplateTimeouts(context, + JWKSourceBuilder.DEFAULT_HTTP_CONNECT_TIMEOUT, + JWKSourceBuilder.DEFAULT_HTTP_READ_TIMEOUT); + }); } @Test void testB2CTimeoutCustomValues() { getResourceServerContextRunner() + .withUserConfiguration(RecordingClientHttpRequestFactoryBuilderConfiguration.class) .withPropertyValues( "spring.cloud.azure.active-directory.b2c.jwt-connect-timeout=2000", "spring.cloud.azure.active-directory.b2c.jwt-read-timeout=3000") @@ -353,15 +359,18 @@ private static void verifyResourceRetrieverRestTemplateTimeouts(ApplicationConte // RestTemplate -> ClientHttpRequestFactory org.springframework.http.client.ClientHttpRequestFactory requestFactory = ((org.springframework.web.client.RestTemplate) restOperations).getRequestFactory(); + assertThat(requestFactory).isNotNull(); + + assertRecordedHttpClientSettings(context, expectedConnectTimeoutMs, expectedReadTimeoutMs); + } - // Verify timeouts on the request factory (may be stored as Duration or int) - Object connectTimeoutValue = org.springframework.test.util.ReflectionTestUtils - .getField(requestFactory, "connectTimeout"); - Object readTimeoutValue = org.springframework.test.util.ReflectionTestUtils - .getField(requestFactory, "readTimeout"); - int connectTimeout = connectTimeoutValue instanceof java.time.Duration d ? (int) d.toMillis() : (int) connectTimeoutValue; - int readTimeout = readTimeoutValue instanceof java.time.Duration d ? (int) d.toMillis() : (int) readTimeoutValue; - assertThat(connectTimeout).isEqualTo(expectedConnectTimeoutMs); - assertThat(readTimeout).isEqualTo(expectedReadTimeoutMs); + private static void assertRecordedHttpClientSettings(ApplicationContext context, + int expectedConnectTimeoutMs, + int expectedReadTimeoutMs) { + HttpClientSettings clientSettings = context.getBean(RecordingClientHttpRequestFactoryBuilder.class) + .getClientSettings(); + assertThat(clientSettings).isNotNull(); + assertThat(clientSettings.connectTimeout()).isEqualTo(Duration.ofMillis(expectedConnectTimeoutMs)); + assertThat(clientSettings.readTimeout()).isEqualTo(Duration.ofMillis(expectedReadTimeoutMs)); } } From 5b99e51d308113b1d62179daba3372e0113b5c6e Mon Sep 17 00:00:00 2001 From: Fabian Meiswinkel Date: Tue, 2 Jun 2026 09:38:56 +0000 Subject: [PATCH 17/20] Update RecordingClientHttpRequestFactoryBuilderConfiguration.java --- .../RecordingClientHttpRequestFactoryBuilderConfiguration.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/spring/spring-cloud-azure-autoconfigure/src/test/java/com/azure/spring/cloud/autoconfigure/implementation/aad/RecordingClientHttpRequestFactoryBuilderConfiguration.java b/sdk/spring/spring-cloud-azure-autoconfigure/src/test/java/com/azure/spring/cloud/autoconfigure/implementation/aad/RecordingClientHttpRequestFactoryBuilderConfiguration.java index 6eecc15c6f19..87815254df89 100644 --- a/sdk/spring/spring-cloud-azure-autoconfigure/src/test/java/com/azure/spring/cloud/autoconfigure/implementation/aad/RecordingClientHttpRequestFactoryBuilderConfiguration.java +++ b/sdk/spring/spring-cloud-azure-autoconfigure/src/test/java/com/azure/spring/cloud/autoconfigure/implementation/aad/RecordingClientHttpRequestFactoryBuilderConfiguration.java @@ -37,4 +37,4 @@ public HttpClientSettings getClientSettings() { return this.clientSettings; } } -} \ No newline at end of file +} From fdfa73049d3117009777cf3fe3433ce08f3103aa Mon Sep 17 00:00:00 2001 From: Fabian Meiswinkel Date: Tue, 2 Jun 2026 11:48:31 +0000 Subject: [PATCH 18/20] Fixing tests issues --- .../FaultInjectionServerErrorRuleOnGatewayTests.java | 6 ++---- .../FaultInjectionServerErrorRuleOnGatewayV2Tests.java | 6 ++---- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/faultinjection/FaultInjectionServerErrorRuleOnGatewayTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/faultinjection/FaultInjectionServerErrorRuleOnGatewayTests.java index 584a0f02b6d1..eca994eaf13e 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/faultinjection/FaultInjectionServerErrorRuleOnGatewayTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/faultinjection/FaultInjectionServerErrorRuleOnGatewayTests.java @@ -615,10 +615,8 @@ private void validateRetryWithGatewayStatistics(JsonNode gatewayStatisticsList) } private Duration getMaxRetryWithTimeout() { - AsyncDocumentClient asyncDocumentClient = BridgeInternal.getContextClient(this.client); - ConsistencyLevel consistencyLevel = asyncDocumentClient.getConsistencyLevel() != null - ? asyncDocumentClient.getConsistencyLevel() - : asyncDocumentClient.getDefaultConsistencyLevelOfAccount(); + ConsistencyLevel consistencyLevel = BridgeInternal.getContextClient(this.client) + .getDefaultConsistencyLevelOfAccount(); return consistencyLevel == ConsistencyLevel.STRONG ? Duration.ofSeconds(60) : Duration.ofSeconds(30); } diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/faultinjection/FaultInjectionServerErrorRuleOnGatewayV2Tests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/faultinjection/FaultInjectionServerErrorRuleOnGatewayV2Tests.java index 374727d338e5..4dc7fd769395 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/faultinjection/FaultInjectionServerErrorRuleOnGatewayV2Tests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/faultinjection/FaultInjectionServerErrorRuleOnGatewayV2Tests.java @@ -771,10 +771,8 @@ private void validateRetryWithGatewayStatistics(JsonNode gatewayStatisticsList) } private Duration getMaxRetryWithTimeout() { - AsyncDocumentClient asyncDocumentClient = BridgeInternal.getContextClient(this.client); - ConsistencyLevel consistencyLevel = asyncDocumentClient.getConsistencyLevel() != null - ? asyncDocumentClient.getConsistencyLevel() - : asyncDocumentClient.getDefaultConsistencyLevelOfAccount(); + ConsistencyLevel consistencyLevel = BridgeInternal.getContextClient(this.client) + .getDefaultConsistencyLevelOfAccount(); return consistencyLevel == ConsistencyLevel.STRONG ? Duration.ofSeconds(60) : Duration.ofSeconds(30); } From 070a5bfb13f37e925f76bada04e7898019c560f9 Mon Sep 17 00:00:00 2001 From: Fabian Meiswinkel Date: Tue, 2 Jun 2026 19:39:38 +0000 Subject: [PATCH 19/20] Update QueryValidationTests.java --- .../azure/cosmos/rx/QueryValidationTests.java | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/QueryValidationTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/QueryValidationTests.java index 31e380625577..0631e3b5d16d 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/QueryValidationTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/QueryValidationTests.java @@ -126,6 +126,7 @@ public void orderByQueryForLargeCollection() { ).block(); CosmosAsyncContainer container = createdDatabase.getContainer(containerProperties.getId()); + waitForCollectionToBeAvailableToRead(container); int partitionDocCount = 5; int pageSize = partitionDocCount + 1; @@ -381,6 +382,7 @@ public void splitQueryContinuationToken() throws Exception { CosmosContainerProperties containerProperties = new CosmosContainerProperties(containerId, "/mypk"); CosmosContainerResponse containerResponse = createdDatabase.createContainer(containerProperties).block(); CosmosAsyncContainer container = createdDatabase.getContainer(containerId); + waitForCollectionToBeAvailableToRead(container); AsyncDocumentClient asyncDocumentClient = BridgeInternal.getContextClient(this.client); //Insert some documents @@ -492,6 +494,7 @@ public void orderbyContinuationOnUndefinedAndNull() throws Exception { createdDatabase.createContainer(containerProperties, new CosmosContainerRequestOptions()).block(); CosmosAsyncContainer container = createdDatabase.getContainer(containerProperties.getId()); + waitForCollectionToBeAvailableToRead(container); CosmosContainerResponse containerResponse = container.read().block(); assert (containerResponse != null); CosmosContainerProperties properties = containerResponse.getProperties(); @@ -579,6 +582,7 @@ public void queryLargePartitionKeyOn100BPKCollection() throws Exception { CosmosContainerProperties containerProperties = new CosmosContainerProperties(containerId, "/id"); CosmosContainerResponse containerResponse = createdDatabase.createContainer(containerProperties).block(); CosmosAsyncContainer container = createdDatabase.getContainer(containerId); + waitForCollectionToBeAvailableToRead(container); //id as partitionkey > 100bytes String itemID1 = "cosmosdb" + "-drWarm4Z60GkknMfHLo5BwuiH7w6AffzSb9jKbvwAQwaRZd10oxnLeCueuyZ5gbm9dwVVAqJLdzrB38Dk73Q6xMErv-0"; @@ -614,6 +618,20 @@ private List getPartitionKeyRanges( return partitionKeyRanges; } + private static void waitForCollectionToBeAvailableToRead(CosmosAsyncContainer container) { + // Creating a container is an async task - especially with multiple regions it can + // take some time until the container is available in the remote regions as well. + // When the container does not exist yet, metadata reads or item operations can + // fail with 404/1013 "Collection is not yet available for read". + // So, adding this delay after container creation to minimize risk of hitting these errors. + try { + TimeUnit.SECONDS.sleep(3); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new RuntimeException(e); + } + } + private List queryAndGetResults(SqlQuerySpec querySpec, CosmosQueryRequestOptions options, Class type) { CosmosPagedFlux queryPagedFlux = createdContainer.queryItems(querySpec, options, type); AtomicReference> value = new AtomicReference<>(); From ccabf826d82b69b8574aa2db7456fe0cc81bf200 Mon Sep 17 00:00:00 2001 From: Fabian Meiswinkel Date: Tue, 2 Jun 2026 21:59:25 +0000 Subject: [PATCH 20/20] Fixing flaky query tests due to container creation being async --- ...StreamingOrderByQueryVectorSearchTest.java | 3 +++ .../cosmos/rx/OrderbyDocumentQueryTest.java | 1 + .../azure/cosmos/rx/QueryValidationTests.java | 22 ++++--------------- .../cosmos/rx/ReadFeedCollectionsTest.java | 1 + .../com/azure/cosmos/rx/TestSuiteBase.java | 20 ++++++++++++----- 5 files changed, 24 insertions(+), 23 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/NonStreamingOrderByQueryVectorSearchTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/NonStreamingOrderByQueryVectorSearchTest.java index 5895088fd528..c632fd06e315 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/NonStreamingOrderByQueryVectorSearchTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/NonStreamingOrderByQueryVectorSearchTest.java @@ -45,6 +45,7 @@ import static com.azure.cosmos.rx.TestSuiteBase.createDatabase; import static com.azure.cosmos.rx.TestSuiteBase.safeClose; import static com.azure.cosmos.rx.TestSuiteBase.safeDeleteDatabase; +import static com.azure.cosmos.rx.TestSuiteBase.waitForCollectionToBeAvailableToRead; import static org.assertj.core.api.Assertions.assertThat; import com.azure.cosmos.SuperFlakyTestRetryAnalyzer; @@ -101,6 +102,8 @@ public void before_NonStreamingOrderByQueryVectorSearchTest() { database.createContainer(containerProperties).block(); largeDataContainer = database.getContainer(largeDataContainerId); + waitForCollectionToBeAvailableToRead(); + for (Document doc : getVectorDocs()) { flatIndexContainer.createItem(doc).block(); quantizedIndexContainer.createItem(doc).block(); diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/OrderbyDocumentQueryTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/OrderbyDocumentQueryTest.java index 1b8c48f8d845..16722fd466bd 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/OrderbyDocumentQueryTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/OrderbyDocumentQueryTest.java @@ -688,6 +688,7 @@ public void before_OrderbyDocumentQueryTest() throws Exception { })) .block(); roundTripsContainer = createdDatabase.getContainer(containerName); + waitForCollectionToBeAvailableToRead(); setupRoundTripContainer(); List> keyValuePropsList = new ArrayList<>(); diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/QueryValidationTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/QueryValidationTests.java index 0631e3b5d16d..966890825de4 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/QueryValidationTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/QueryValidationTests.java @@ -126,7 +126,7 @@ public void orderByQueryForLargeCollection() { ).block(); CosmosAsyncContainer container = createdDatabase.getContainer(containerProperties.getId()); - waitForCollectionToBeAvailableToRead(container); + waitForCollectionToBeAvailableToRead(); int partitionDocCount = 5; int pageSize = partitionDocCount + 1; @@ -382,7 +382,7 @@ public void splitQueryContinuationToken() throws Exception { CosmosContainerProperties containerProperties = new CosmosContainerProperties(containerId, "/mypk"); CosmosContainerResponse containerResponse = createdDatabase.createContainer(containerProperties).block(); CosmosAsyncContainer container = createdDatabase.getContainer(containerId); - waitForCollectionToBeAvailableToRead(container); + waitForCollectionToBeAvailableToRead(); AsyncDocumentClient asyncDocumentClient = BridgeInternal.getContextClient(this.client); //Insert some documents @@ -494,7 +494,7 @@ public void orderbyContinuationOnUndefinedAndNull() throws Exception { createdDatabase.createContainer(containerProperties, new CosmosContainerRequestOptions()).block(); CosmosAsyncContainer container = createdDatabase.getContainer(containerProperties.getId()); - waitForCollectionToBeAvailableToRead(container); + waitForCollectionToBeAvailableToRead(); CosmosContainerResponse containerResponse = container.read().block(); assert (containerResponse != null); CosmosContainerProperties properties = containerResponse.getProperties(); @@ -582,7 +582,7 @@ public void queryLargePartitionKeyOn100BPKCollection() throws Exception { CosmosContainerProperties containerProperties = new CosmosContainerProperties(containerId, "/id"); CosmosContainerResponse containerResponse = createdDatabase.createContainer(containerProperties).block(); CosmosAsyncContainer container = createdDatabase.getContainer(containerId); - waitForCollectionToBeAvailableToRead(container); + waitForCollectionToBeAvailableToRead(); //id as partitionkey > 100bytes String itemID1 = "cosmosdb" + "-drWarm4Z60GkknMfHLo5BwuiH7w6AffzSb9jKbvwAQwaRZd10oxnLeCueuyZ5gbm9dwVVAqJLdzrB38Dk73Q6xMErv-0"; @@ -618,20 +618,6 @@ private List getPartitionKeyRanges( return partitionKeyRanges; } - private static void waitForCollectionToBeAvailableToRead(CosmosAsyncContainer container) { - // Creating a container is an async task - especially with multiple regions it can - // take some time until the container is available in the remote regions as well. - // When the container does not exist yet, metadata reads or item operations can - // fail with 404/1013 "Collection is not yet available for read". - // So, adding this delay after container creation to minimize risk of hitting these errors. - try { - TimeUnit.SECONDS.sleep(3); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - throw new RuntimeException(e); - } - } - private List queryAndGetResults(SqlQuerySpec querySpec, CosmosQueryRequestOptions options, Class type) { CosmosPagedFlux queryPagedFlux = createdContainer.queryItems(querySpec, options, type); AtomicReference> value = new AtomicReference<>(); diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/ReadFeedCollectionsTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/ReadFeedCollectionsTest.java index ad3c980c6f93..e081fee801b4 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/ReadFeedCollectionsTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/ReadFeedCollectionsTest.java @@ -88,6 +88,7 @@ public CosmosAsyncContainer createCollections(CosmosAsyncDatabase database) { partitionKeyDef.setPaths(paths); CosmosContainerProperties containerProperties = new CosmosContainerProperties(UUID.randomUUID().toString(), partitionKeyDef); database.createContainer(containerProperties, new CosmosContainerRequestOptions()).block(); + waitForCollectionToBeAvailableToRead(); return database.getContainer(containerProperties.getId()); } } diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/TestSuiteBase.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/TestSuiteBase.java index 2b213a5c5683..0e84dde4c2cf 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/TestSuiteBase.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/TestSuiteBase.java @@ -566,16 +566,26 @@ public static CosmosAsyncContainer createCollection(CosmosAsyncDatabase database .getCosmosAsyncClientAccessor() .getPreferredRegions(client).size() > 1; if (throughput > 6000 || isMultiRegional) { - try { - Thread.sleep(3000); - } catch (InterruptedException e) { - throw new RuntimeException(e); - } + waitForCollectionToBeAvailableToRead(); } return database.getContainer(cosmosContainerProperties.getId()); } + protected static void waitForCollectionToBeAvailableToRead() { + // Creating a container is an async task - especially with multiple regions it can + // take some time until the container is available in the remote regions as well. + // When the container does not exist yet, metadata reads or item operations can + // fail with 404/1013 "Collection is not yet available for read". + // So, adding this delay after container creation to minimize risk of hitting these errors. + try { + TimeUnit.SECONDS.sleep(3); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new RuntimeException(e); + } + } + public static CosmosAsyncContainer createCollection(CosmosAsyncDatabase database, CosmosContainerProperties cosmosContainerProperties, CosmosContainerRequestOptions options) { database.createContainer(cosmosContainerProperties, options)