From 40b746a6ab7cec3786f4cbc995c7d3dcfe468d2e Mon Sep 17 00:00:00 2001 From: guoqiang Date: Sat, 28 Feb 2026 12:49:25 +0800 Subject: [PATCH 1/2] fix [fix](Azure)Fix Azure endpoint detection for sovereign clouds with force-global config support --- .../java/org/apache/doris/common/Config.java | 23 +++++ .../property/storage/AzureProperties.java | 29 +++++-- .../property/storage/AzurePropertyUtils.java | 66 +++++++++++++- .../property/storage/AzurePropertiesTest.java | 85 +++++++++++++++++++ .../storage/AzurePropertyUtilsTest.java | 81 +++++++++++++++++- 5 files changed, 269 insertions(+), 15 deletions(-) diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java index 7a0ce25a8882b6..00ab259a559065 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java +++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java @@ -3517,6 +3517,29 @@ public static int metaServiceRpcRetryTimes() { }) public static boolean force_azure_blob_global_endpoint = false; + @ConfField(mutable = true, description = { + "指定 Azure endpoint 域名后缀白名单(包含 blob 与 dfs),多个值使用逗号分隔。" + + "默认值为 .blob.core.windows.net,.dfs.core.windows.net," + + ".blob.core.chinacloudapi.cn,.dfs.core.chinacloudapi.cn," + + ".blob.core.usgovcloudapi.net,.dfs.core.usgovcloudapi.net," + + ".blob.core.cloudapi.de,.dfs.core.cloudapi.de。", + "The host suffix whitelist for Azure endpoints (both blob and dfs), separated by commas. " + + "The default value is .blob.core.windows.net,.dfs.core.windows.net," + + ".blob.core.chinacloudapi.cn,.dfs.core.chinacloudapi.cn," + + ".blob.core.usgovcloudapi.net,.dfs.core.usgovcloudapi.net," + + ".blob.core.cloudapi.de,.dfs.core.cloudapi.de." + }) + public static String[] azure_blob_host_suffixes = { + ".blob.core.windows.net", + ".dfs.core.windows.net", + ".blob.core.chinacloudapi.cn", + ".dfs.core.chinacloudapi.cn", + ".blob.core.usgovcloudapi.net", + ".dfs.core.usgovcloudapi.net", + ".blob.core.cloudapi.de", + ".dfs.core.cloudapi.de" + }; + @ConfField(mutable = true, description = {"指定 Jdbc driver url 白名单,举例:jdbc_driver_url_white_list=a,b,c", "the white list for jdbc driver url, if it is empty, no white list will be set" + "for example: jdbc_driver_url_white_list=a,b,c" diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/AzureProperties.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/AzureProperties.java index b4848aa61b6d39..32b017cddc8f24 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/AzureProperties.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/AzureProperties.java @@ -30,6 +30,8 @@ import org.apache.hadoop.conf.Configuration; import java.util.HashMap; +import java.util.LinkedHashSet; +import java.util.Locale; import java.util.Map; import java.util.Objects; import java.util.Set; @@ -135,8 +137,6 @@ public AzureProperties(Map origProps) { super(Type.AZURE, origProps); } - private static final String AZURE_ENDPOINT_SUFFIX = ".blob.core.windows.net"; - @Override public void initNormalizeAndCheckProps() { super.initNormalizeAndCheckProps(); @@ -160,7 +160,7 @@ public static boolean guessIsMe(Map origProps) { .findFirst() .orElse(null); if (!Strings.isNullOrEmpty(value)) { - return value.endsWith(AZURE_ENDPOINT_SUFFIX); + return AzurePropertyUtils.isAzureBlobEndpoint(value); } return false; } @@ -243,13 +243,24 @@ protected Set schemas() { } private static void setHDFSAzureAccountKeys(Configuration conf, String accountName, String accountKey) { - String[] endpoints = { - "dfs.core.windows.net", - "blob.core.windows.net" - }; + Set endpoints = new LinkedHashSet<>(); + if (Config.azure_blob_host_suffixes != null) { + for (String endpointSuffix : Config.azure_blob_host_suffixes) { + if (Strings.isNullOrEmpty(endpointSuffix)) { + continue; + } + String normalizedEndpoint = endpointSuffix.trim().toLowerCase(Locale.ROOT); + if (normalizedEndpoint.startsWith(".")) { + normalizedEndpoint = normalizedEndpoint.substring(1); + } + if (!normalizedEndpoint.isEmpty()) { + endpoints.add(normalizedEndpoint); + } + } + } for (String endpoint : endpoints) { - String key = String.format("fs.azure.account.key.%s.%s", accountName, endpoint); - conf.set(key, accountKey); + String accountKeyConfig = String.format("fs.azure.account.key.%s.%s", accountName, endpoint); + conf.set(accountKeyConfig, accountKey); } conf.set("fs.azure.account.key", accountKey); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/AzurePropertyUtils.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/AzurePropertyUtils.java index 9015258070130d..3921ad29f49fa3 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/AzurePropertyUtils.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/AzurePropertyUtils.java @@ -17,6 +17,7 @@ package org.apache.doris.datasource.property.storage; +import org.apache.doris.common.Config; import org.apache.doris.common.UserException; import org.apache.doris.datasource.property.storage.exception.StoragePropertiesException; @@ -24,10 +25,12 @@ import java.net.URI; import java.net.URISyntaxException; +import java.util.Locale; import java.util.Map; import java.util.regex.Pattern; public class AzurePropertyUtils { + private static final String AZURE_GLOBAL_BLOB_HOST_SUFFIX = ".blob.core.windows.net"; /** * Validates and normalizes an Azure Blob Storage URI into a unified {@code s3://}-style format. @@ -76,6 +79,64 @@ public static String validateAndNormalizeUri(String path) throws UserException { private static final Pattern ONELAKE_PATTERN = Pattern.compile( "abfs[s]?://([^@]+)@([^/]+)\\.dfs\\.fabric\\.microsoft\\.com(/.*)?", Pattern.CASE_INSENSITIVE); + public static boolean isAzureBlobEndpoint(String endpointOrHost) { + String host = extractHost(endpointOrHost); + if (StringUtils.isBlank(host)) { + return false; + } + String normalizedHost = host.toLowerCase(Locale.ROOT); + if (Config.force_azure_blob_global_endpoint) { + return normalizedHost.endsWith(AZURE_GLOBAL_BLOB_HOST_SUFFIX); + } + return matchesAnySuffix(normalizedHost, Config.azure_blob_host_suffixes); + } + + private static boolean matchesAnySuffix(String normalizedHost, String[] suffixes) { + if (suffixes == null || suffixes.length == 0) { + return false; + } + for (String suffix : suffixes) { + if (matchesSuffix(normalizedHost, suffix)) { + return true; + } + } + return false; + } + + private static boolean matchesSuffix(String normalizedHost, String suffix) { + if (StringUtils.isBlank(suffix)) { + return false; + } + String normalizedSuffix = suffix.trim().toLowerCase(Locale.ROOT); + if (!normalizedSuffix.startsWith(".")) { + normalizedSuffix = "." + normalizedSuffix; + } + return normalizedHost.endsWith(normalizedSuffix); + } + + private static String extractHost(String endpointOrHost) { + if (StringUtils.isBlank(endpointOrHost)) { + return null; + } + String normalized = endpointOrHost.trim(); + if (normalized.contains("://")) { + try { + return new URI(normalized).getHost(); + } catch (URISyntaxException e) { + return null; + } + } + int slashIndex = normalized.indexOf('/'); + if (slashIndex >= 0) { + normalized = normalized.substring(0, slashIndex); + } + int colonIndex = normalized.indexOf(':'); + if (colonIndex >= 0) { + normalized = normalized.substring(0, colonIndex); + } + return normalized; + } + /** * Converts an Azure Blob Storage URI into a unified {@code s3:///} format. @@ -137,8 +198,9 @@ private static String convertToS3Style(String uri) { throw new StoragePropertiesException("Invalid Azure HTTPS URI, missing host: " + uri); } - // Typical Azure Blob domain: .blob.core.windows.net - if (!host.contains(".blob.core.windows.net")) { + // When force_azure_blob_global_endpoint is enabled, only global Azure Blob host is accepted. + // Otherwise we allow custom domains because Azure can be accessed through endpoint aliases. + if (Config.force_azure_blob_global_endpoint && !isAzureBlobEndpoint(host)) { throw new StoragePropertiesException("Not an Azure Blob URL: " + uri); } diff --git a/fe/fe-core/src/test/java/org/apache/doris/datasource/property/storage/AzurePropertiesTest.java b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/storage/AzurePropertiesTest.java index 8171f33db409ef..dbfcbaaabe8bfa 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/datasource/property/storage/AzurePropertiesTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/storage/AzurePropertiesTest.java @@ -17,6 +17,7 @@ package org.apache.doris.datasource.property.storage; +import org.apache.doris.common.Config; import org.apache.doris.common.UserException; import org.apache.doris.datasource.property.storage.exception.StoragePropertiesException; @@ -213,4 +214,88 @@ public void testOneLake() throws UserException { Assertions.assertEquals("https://login.microsoftonline.com/72f988bf-5289-5289-5289-2d7cd011db47/oauth2/token", hadoopStorageConfig.get("fs.azure.account.oauth2.client.endpoint.onelake.dfs.fabric.microsoft.com")); } + + @Test + public void testGuessIsMeChinaEndpoint() { + boolean originalConfig = Config.force_azure_blob_global_endpoint; + try { + Config.force_azure_blob_global_endpoint = false; + origProps.put("s3.endpoint", "https://mystorageaccount.blob.core.chinacloudapi.cn"); + Assertions.assertTrue(AzureProperties.guessIsMe(origProps)); + origProps.put("s3.endpoint", "mystorageaccount.blob.core.chinacloudapi.cn"); + Assertions.assertTrue(AzureProperties.guessIsMe(origProps)); + } finally { + Config.force_azure_blob_global_endpoint = originalConfig; + } + } + + @Test + public void testGuessIsMeChinaEndpointWhenForceGlobalEndpoint() { + boolean originalConfig = Config.force_azure_blob_global_endpoint; + try { + Config.force_azure_blob_global_endpoint = true; + origProps.put("s3.endpoint", "https://mystorageaccount.blob.core.chinacloudapi.cn"); + Assertions.assertFalse(AzureProperties.guessIsMe(origProps)); + origProps.put("provider", "azure"); + Assertions.assertTrue(AzureProperties.guessIsMe(origProps)); + } finally { + Config.force_azure_blob_global_endpoint = originalConfig; + } + } + + @Test + public void testFormatAzureEndpointHonorsForceGlobalEndpointConfig() { + boolean originalConfig = Config.force_azure_blob_global_endpoint; + try { + Config.force_azure_blob_global_endpoint = false; + Assertions.assertEquals("https://mystorageaccount.blob.core.chinacloudapi.cn", + AzureProperties.formatAzureEndpoint("mystorageaccount.blob.core.chinacloudapi.cn", + "mystorageaccount")); + + Config.force_azure_blob_global_endpoint = true; + Assertions.assertEquals("https://mystorageaccount.blob.core.windows.net", + AzureProperties.formatAzureEndpoint("mystorageaccount.blob.core.chinacloudapi.cn", + "mystorageaccount")); + } finally { + Config.force_azure_blob_global_endpoint = originalConfig; + } + } + + @Test + public void testHadoopStorageConfigContainsChinaCloudAccountKeys() throws UserException { + origProps.put("s3.endpoint", "https://mystorageaccount.blob.core.chinacloudapi.cn"); + origProps.put("s3.access_key", "myAzureAccessKey"); + origProps.put("s3.secret_key", "myAzureSecretKey"); + origProps.put("provider", "azure"); + + AzureProperties azureProperties = (AzureProperties) StorageProperties.createPrimary(origProps); + Configuration hadoopStorageConfig = azureProperties.getHadoopStorageConfig(); + Assertions.assertEquals("myAzureSecretKey", + hadoopStorageConfig.get("fs.azure.account.key.myAzureAccessKey.blob.core.chinacloudapi.cn")); + Assertions.assertEquals("myAzureSecretKey", + hadoopStorageConfig.get("fs.azure.account.key.myAzureAccessKey.dfs.core.chinacloudapi.cn")); + } + + @Test + public void testHadoopStorageConfigContainsCustomAccountKeyEndpointsFromConfig() throws UserException { + String[] originalAzureBlobHostSuffixes = Config.azure_blob_host_suffixes; + try { + Config.azure_blob_host_suffixes = new String[] {"blob.custom.test", ".dfs.custom.test", " "}; + origProps.put("s3.endpoint", "https://mystorageaccount.blob.custom.test"); + origProps.put("s3.access_key", "myAzureAccessKey"); + origProps.put("s3.secret_key", "myAzureSecretKey"); + origProps.put("provider", "azure"); + + AzureProperties azureProperties = (AzureProperties) StorageProperties.createPrimary(origProps); + Configuration hadoopStorageConfig = azureProperties.getHadoopStorageConfig(); + Assertions.assertEquals("myAzureSecretKey", + hadoopStorageConfig.get("fs.azure.account.key.myAzureAccessKey.blob.custom.test")); + Assertions.assertEquals("myAzureSecretKey", + hadoopStorageConfig.get("fs.azure.account.key.myAzureAccessKey.dfs.custom.test")); + Assertions.assertNull( + hadoopStorageConfig.get("fs.azure.account.key.myAzureAccessKey.blob.core.windows.net")); + } finally { + Config.azure_blob_host_suffixes = originalAzureBlobHostSuffixes; + } + } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/datasource/property/storage/AzurePropertyUtilsTest.java b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/storage/AzurePropertyUtilsTest.java index cbc584d9a10b0f..67fa4de6734843 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/datasource/property/storage/AzurePropertyUtilsTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/storage/AzurePropertyUtilsTest.java @@ -17,6 +17,8 @@ package org.apache.doris.datasource.property.storage; +import org.apache.doris.common.Config; +import org.apache.doris.common.UserException; import org.apache.doris.datasource.property.storage.exception.StoragePropertiesException; import org.junit.jupiter.api.Assertions; @@ -87,6 +89,19 @@ public void testHttpsUriWithPath() throws Exception { Assertions.assertEquals(expected, AzurePropertyUtils.validateAndNormalizeUri(input)); } + @Test + public void testHttpsChinaUriWithPath() throws Exception { + boolean originalConfig = Config.force_azure_blob_global_endpoint; + try { + Config.force_azure_blob_global_endpoint = false; + String input = "https://account.blob.core.chinacloudapi.cn/container/data/file.parquet"; + String expected = "s3://container/data/file.parquet"; + Assertions.assertEquals(expected, AzurePropertyUtils.validateAndNormalizeUri(input)); + } finally { + Config.force_azure_blob_global_endpoint = originalConfig; + } + } + @Test public void testInvalidAzureScheme() { String input = "ftp://container@account.blob.core.windows.net/data/file.txt"; @@ -109,10 +124,21 @@ public void testHttpsUriMissingHost() { } @Test - public void testHttpsUriNotAzureBlob() { - String input = "https://account.otherdomain.com/container/file.txt"; - Assertions.assertThrows(StoragePropertiesException.class, () -> - AzurePropertyUtils.validateAndNormalizeUri(input)); + public void testHttpsUriNotAzureBlob() throws UserException { + boolean originalConfig = Config.force_azure_blob_global_endpoint; + try { + String input = "https://account.otherdomain.com/container/file.txt"; + + Config.force_azure_blob_global_endpoint = false; + Assertions.assertEquals("s3://container/file.txt", + AzurePropertyUtils.validateAndNormalizeUri(input)); + + Config.force_azure_blob_global_endpoint = true; + Assertions.assertThrows(StoragePropertiesException.class, () -> + AzurePropertyUtils.validateAndNormalizeUri(input)); + } finally { + Config.force_azure_blob_global_endpoint = originalConfig; + } } @Test @@ -121,6 +147,53 @@ public void testBlankUri() { AzurePropertyUtils.validateAndNormalizeUri(" ")); } + @Test + public void testIsAzureBlobEndpoint() { + boolean originalConfig = Config.force_azure_blob_global_endpoint; + String[] originalAzureBlobHostSuffixes = Config.azure_blob_host_suffixes; + try { + Config.force_azure_blob_global_endpoint = false; + Config.azure_blob_host_suffixes = new String[] { + ".blob.core.windows.net", + ".dfs.core.windows.net", + ".blob.core.chinacloudapi.cn", + ".dfs.core.chinacloudapi.cn" + }; + Assertions.assertTrue(AzurePropertyUtils.isAzureBlobEndpoint("https://account.blob.core.windows.net")); + Assertions.assertTrue(AzurePropertyUtils.isAzureBlobEndpoint("account.blob.core.chinacloudapi.cn")); + Assertions.assertTrue(AzurePropertyUtils.isAzureBlobEndpoint("account.dfs.core.chinacloudapi.cn")); + Assertions.assertFalse(AzurePropertyUtils.isAzureBlobEndpoint("https://account.otherdomain.com")); + + Config.force_azure_blob_global_endpoint = true; + Assertions.assertTrue(AzurePropertyUtils.isAzureBlobEndpoint("https://account.blob.core.windows.net")); + Assertions.assertFalse(AzurePropertyUtils.isAzureBlobEndpoint("account.blob.core.chinacloudapi.cn")); + Assertions.assertFalse(AzurePropertyUtils.isAzureBlobEndpoint("account.dfs.core.chinacloudapi.cn")); + } finally { + Config.force_azure_blob_global_endpoint = originalConfig; + Config.azure_blob_host_suffixes = originalAzureBlobHostSuffixes; + } + } + + @Test + public void testIsAzureBlobEndpointFromConfig() { + boolean originalForceAzureBlobGlobalEndpoint = Config.force_azure_blob_global_endpoint; + String[] originalAzureBlobHostSuffixes = Config.azure_blob_host_suffixes; + try { + Config.force_azure_blob_global_endpoint = false; + Config.azure_blob_host_suffixes = new String[] {"blob.custom.test", "dfs.custom.test", " .blob.extra.test "}; + Assertions.assertTrue(AzurePropertyUtils.isAzureBlobEndpoint("https://account.blob.custom.test")); + Assertions.assertTrue(AzurePropertyUtils.isAzureBlobEndpoint("account.dfs.custom.test")); + Assertions.assertTrue(AzurePropertyUtils.isAzureBlobEndpoint("account.blob.extra.test")); + Assertions.assertFalse(AzurePropertyUtils.isAzureBlobEndpoint("account.blob.unknown.test")); + + Config.force_azure_blob_global_endpoint = true; + Assertions.assertFalse(AzurePropertyUtils.isAzureBlobEndpoint("account.blob.custom.test")); + } finally { + Config.force_azure_blob_global_endpoint = originalForceAzureBlobGlobalEndpoint; + Config.azure_blob_host_suffixes = originalAzureBlobHostSuffixes; + } + } + // ---------- validateAndGetUri Tests ---------- @Test From 3352e6a8228d417d570fc92e7bf26617f522b553 Mon Sep 17 00:00:00 2001 From: guoqiang Date: Mon, 2 Mar 2026 14:41:20 +0800 Subject: [PATCH 2/2] fix --- be/src/common/config.cpp | 2 - be/src/common/config.h | 2 - be/src/io/fs/azure_obj_storage_client.cpp | 3 - be/src/util/s3_util.cpp | 11 +--- cloud/src/common/config.h | 2 - cloud/src/recycler/s3_accessor.cpp | 10 +-- .../java/org/apache/doris/common/Config.java | 9 --- .../property/storage/AzureProperties.java | 10 ++- .../property/storage/AzurePropertyUtils.java | 11 ---- .../property/storage/AzurePropertiesTest.java | 65 +++++++++---------- .../storage/AzurePropertyUtilsTest.java | 43 ++---------- 11 files changed, 48 insertions(+), 120 deletions(-) diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp index 60c793197a271b..b7ecbbb7d45a32 100644 --- a/be/src/common/config.cpp +++ b/be/src/common/config.cpp @@ -1466,8 +1466,6 @@ DEFINE_Int64(wait_cancel_release_memory_ms, "5000"); DEFINE_mBool(check_segment_when_build_rowset_meta, "false"); -DEFINE_mBool(force_azure_blob_global_endpoint, "false"); - DEFINE_mInt32(max_s3_client_retry, "10"); DEFINE_mInt32(s3_read_base_wait_time_ms, "100"); DEFINE_mInt32(s3_read_max_wait_time_ms, "800"); diff --git a/be/src/common/config.h b/be/src/common/config.h index 1d3e173a918b9b..3e200f721635db 100644 --- a/be/src/common/config.h +++ b/be/src/common/config.h @@ -1560,8 +1560,6 @@ DECLARE_mBool(check_segment_when_build_rowset_meta); DECLARE_Int32(num_query_ctx_map_partitions); -DECLARE_mBool(force_azure_blob_global_endpoint); - DECLARE_mBool(enable_s3_rate_limiter); DECLARE_mInt64(s3_get_bucket_tokens); DECLARE_mInt64(s3_get_token_per_second); diff --git a/be/src/io/fs/azure_obj_storage_client.cpp b/be/src/io/fs/azure_obj_storage_client.cpp index 026d5b56b9344f..7dd44db76d8ff7 100644 --- a/be/src/io/fs/azure_obj_storage_client.cpp +++ b/be/src/io/fs/azure_obj_storage_client.cpp @@ -418,9 +418,6 @@ std::string AzureObjStorageClient::generate_presigned_url(const ObjectStoragePat Azure::Storage::StorageSharedKeyCredential(conf.ak, conf.sk)); std::string endpoint = conf.endpoint; - if (doris::config::force_azure_blob_global_endpoint) { - endpoint = fmt::format("https://{}.blob.core.windows.net", conf.ak); - } auto sasURL = fmt::format(SAS_TOKEN_URL_TEMPLATE, endpoint, conf.bucket, opts.key, sasToken); if (sasURL.find("://") == std::string::npos) { sasURL = "https://" + sasURL; diff --git a/be/src/util/s3_util.cpp b/be/src/util/s3_util.cpp index ce24ab30e60c80..f0c3aa1503a382 100644 --- a/be/src/util/s3_util.cpp +++ b/be/src/util/s3_util.cpp @@ -313,14 +313,9 @@ std::shared_ptr S3ClientFactory::_create_azure_client( std::make_shared(s3_conf.ak, s3_conf.sk); const std::string container_name = s3_conf.bucket; - std::string uri; - if (config::force_azure_blob_global_endpoint) { - uri = fmt::format("https://{}.blob.core.windows.net/{}", s3_conf.ak, container_name); - } else { - uri = fmt::format("{}/{}", s3_conf.endpoint, container_name); - if (s3_conf.endpoint.find("://") == std::string::npos) { - uri = "https://" + uri; - } + std::string uri = fmt::format("{}/{}", s3_conf.endpoint, container_name); + if (s3_conf.endpoint.find("://") == std::string::npos) { + uri = "https://" + uri; } Azure::Storage::Blobs::BlobClientOptions options; diff --git a/cloud/src/common/config.h b/cloud/src/common/config.h index f6aa67a496fd45..37030aba77fec0 100644 --- a/cloud/src/common/config.h +++ b/cloud/src/common/config.h @@ -293,8 +293,6 @@ CONF_Validator(s3_client_http_scheme, [](const std::string& config) -> bool { return config == "http" || config == "https"; }); -CONF_Bool(force_azure_blob_global_endpoint, "false"); - // Max retry times for object storage request CONF_mInt64(max_s3_client_retry, "10"); diff --git a/cloud/src/recycler/s3_accessor.cpp b/cloud/src/recycler/s3_accessor.cpp index 1792a81d435f05..0f2a7776fcc7fe 100644 --- a/cloud/src/recycler/s3_accessor.cpp +++ b/cloud/src/recycler/s3_accessor.cpp @@ -372,13 +372,9 @@ int S3Accessor::init() { options.Retry.MaxRetries = config::max_s3_client_retry; auto cred = std::make_shared(conf_.ak, conf_.sk); - if (config::force_azure_blob_global_endpoint) { - uri_ = fmt::format("https://{}.blob.core.windows.net/{}", conf_.ak, conf_.bucket); - } else { - uri_ = fmt::format("{}/{}", conf_.endpoint, conf_.bucket); - if (uri_.find("://") == std::string::npos) { - uri_ = "https://" + uri_; - } + uri_ = fmt::format("{}/{}", conf_.endpoint, conf_.bucket); + if (uri_.find("://") == std::string::npos) { + uri_ = "https://" + uri_; } uri_ = normalize_http_uri(uri_); // In Azure's HTTP requests, all policies in the vector are called in a chained manner following the HTTP pipeline approach. diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java index 00ab259a559065..9ced6a1ac2bf26 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java +++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java @@ -3508,15 +3508,6 @@ public static int metaServiceRpcRetryTimes() { + "for example: s3_load_endpoint_white_list=a,b,c"}) public static String[] s3_load_endpoint_white_list = {}; - @ConfField(mutable = true, description = { - "此参数控制是否强制使用 Azure global endpoint。默认值为 false,系统将使用用户指定的 endpoint。" - + "如果设置为 true,系统将强制使用 {account}.blob.core.windows.net。", - "This parameter controls whether to force the use of the Azure global endpoint. " - + "The default is false, meaning the system will use the user-specified endpoint. " - + "If set to true, the system will force the use of {account}.blob.core.windows.net." - }) - public static boolean force_azure_blob_global_endpoint = false; - @ConfField(mutable = true, description = { "指定 Azure endpoint 域名后缀白名单(包含 blob 与 dfs),多个值使用逗号分隔。" + "默认值为 .blob.core.windows.net,.dfs.core.windows.net," diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/AzureProperties.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/AzureProperties.java index 32b017cddc8f24..a8d6280d3771cf 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/AzureProperties.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/AzureProperties.java @@ -66,6 +66,7 @@ public class AzureProperties extends StorageProperties { @Getter @ConnectorProperty(names = {"azure.endpoint", "s3.endpoint", "AWS_ENDPOINT", "endpoint", "ENDPOINT"}, + required = false, description = "The endpoint of S3.") protected String endpoint = ""; @@ -191,9 +192,12 @@ public Map getBackendConfigProperties() { public static final String AZURE_ENDPOINT_TEMPLATE = "https://%s.blob.core.windows.net"; - public static String formatAzureEndpoint(String endpoint, String accessKey) { - if (Config.force_azure_blob_global_endpoint) { - return String.format(AZURE_ENDPOINT_TEMPLATE, accessKey); + public static String formatAzureEndpoint(String endpoint, String accountName) { + if (Strings.isNullOrEmpty(endpoint)) { + if (Strings.isNullOrEmpty(accountName)) { + return ""; + } + return String.format(AZURE_ENDPOINT_TEMPLATE, accountName); } if (endpoint.contains("://")) { return endpoint; diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/AzurePropertyUtils.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/AzurePropertyUtils.java index 3921ad29f49fa3..e2d04057f65bbf 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/AzurePropertyUtils.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/AzurePropertyUtils.java @@ -30,8 +30,6 @@ import java.util.regex.Pattern; public class AzurePropertyUtils { - private static final String AZURE_GLOBAL_BLOB_HOST_SUFFIX = ".blob.core.windows.net"; - /** * Validates and normalizes an Azure Blob Storage URI into a unified {@code s3://}-style format. *

@@ -85,9 +83,6 @@ public static boolean isAzureBlobEndpoint(String endpointOrHost) { return false; } String normalizedHost = host.toLowerCase(Locale.ROOT); - if (Config.force_azure_blob_global_endpoint) { - return normalizedHost.endsWith(AZURE_GLOBAL_BLOB_HOST_SUFFIX); - } return matchesAnySuffix(normalizedHost, Config.azure_blob_host_suffixes); } @@ -198,12 +193,6 @@ private static String convertToS3Style(String uri) { throw new StoragePropertiesException("Invalid Azure HTTPS URI, missing host: " + uri); } - // When force_azure_blob_global_endpoint is enabled, only global Azure Blob host is accepted. - // Otherwise we allow custom domains because Azure can be accessed through endpoint aliases. - if (Config.force_azure_blob_global_endpoint && !isAzureBlobEndpoint(host)) { - throw new StoragePropertiesException("Not an Azure Blob URL: " + uri); - } - // Path usually looks like: // String[] parts = path.split("/", 3); if (parts.length < 2) { diff --git a/fe/fe-core/src/test/java/org/apache/doris/datasource/property/storage/AzurePropertiesTest.java b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/storage/AzurePropertiesTest.java index dbfcbaaabe8bfa..073d9f361b787d 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/datasource/property/storage/AzurePropertiesTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/storage/AzurePropertiesTest.java @@ -217,48 +217,41 @@ public void testOneLake() throws UserException { @Test public void testGuessIsMeChinaEndpoint() { - boolean originalConfig = Config.force_azure_blob_global_endpoint; - try { - Config.force_azure_blob_global_endpoint = false; - origProps.put("s3.endpoint", "https://mystorageaccount.blob.core.chinacloudapi.cn"); - Assertions.assertTrue(AzureProperties.guessIsMe(origProps)); - origProps.put("s3.endpoint", "mystorageaccount.blob.core.chinacloudapi.cn"); - Assertions.assertTrue(AzureProperties.guessIsMe(origProps)); - } finally { - Config.force_azure_blob_global_endpoint = originalConfig; - } + origProps.put("s3.endpoint", "https://mystorageaccount.blob.core.chinacloudapi.cn"); + Assertions.assertTrue(AzureProperties.guessIsMe(origProps)); + origProps.put("s3.endpoint", "mystorageaccount.blob.core.chinacloudapi.cn"); + Assertions.assertTrue(AzureProperties.guessIsMe(origProps)); } @Test - public void testGuessIsMeChinaEndpointWhenForceGlobalEndpoint() { - boolean originalConfig = Config.force_azure_blob_global_endpoint; - try { - Config.force_azure_blob_global_endpoint = true; - origProps.put("s3.endpoint", "https://mystorageaccount.blob.core.chinacloudapi.cn"); - Assertions.assertFalse(AzureProperties.guessIsMe(origProps)); - origProps.put("provider", "azure"); - Assertions.assertTrue(AzureProperties.guessIsMe(origProps)); - } finally { - Config.force_azure_blob_global_endpoint = originalConfig; - } + public void testGuessIsMeByProviderWhenEndpointIsUnknown() { + origProps.put("s3.endpoint", "https://mystorageaccount.invalid.test"); + Assertions.assertFalse(AzureProperties.guessIsMe(origProps)); + origProps.put("provider", "azure"); + Assertions.assertTrue(AzureProperties.guessIsMe(origProps)); } @Test - public void testFormatAzureEndpointHonorsForceGlobalEndpointConfig() { - boolean originalConfig = Config.force_azure_blob_global_endpoint; - try { - Config.force_azure_blob_global_endpoint = false; - Assertions.assertEquals("https://mystorageaccount.blob.core.chinacloudapi.cn", - AzureProperties.formatAzureEndpoint("mystorageaccount.blob.core.chinacloudapi.cn", - "mystorageaccount")); - - Config.force_azure_blob_global_endpoint = true; - Assertions.assertEquals("https://mystorageaccount.blob.core.windows.net", - AzureProperties.formatAzureEndpoint("mystorageaccount.blob.core.chinacloudapi.cn", - "mystorageaccount")); - } finally { - Config.force_azure_blob_global_endpoint = originalConfig; - } + public void testFormatAzureEndpointUsesInputOrDefault() { + Assertions.assertEquals("https://mystorageaccount.blob.core.chinacloudapi.cn", + AzureProperties.formatAzureEndpoint("mystorageaccount.blob.core.chinacloudapi.cn", + "mystorageaccount")); + Assertions.assertEquals("https://mystorageaccount.blob.core.chinacloudapi.cn", + AzureProperties.formatAzureEndpoint("https://mystorageaccount.blob.core.chinacloudapi.cn", + "mystorageaccount")); + Assertions.assertEquals("https://mystorageaccount.blob.core.windows.net", + AzureProperties.formatAzureEndpoint("", "mystorageaccount")); + Assertions.assertEquals("", AzureProperties.formatAzureEndpoint("", "")); + } + + @Test + public void testDefaultEndpointWhenEndpointNotSet() throws UserException { + origProps.put("s3.access_key", "myAzureAccessKey"); + origProps.put("s3.secret_key", "myAzureSecretKey"); + origProps.put("provider", "azure"); + + AzureProperties azureProperties = (AzureProperties) StorageProperties.createPrimary(origProps); + Assertions.assertEquals("https://myAzureAccessKey.blob.core.windows.net", azureProperties.getEndpoint()); } @Test diff --git a/fe/fe-core/src/test/java/org/apache/doris/datasource/property/storage/AzurePropertyUtilsTest.java b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/storage/AzurePropertyUtilsTest.java index 67fa4de6734843..1965284c3d1ba7 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/datasource/property/storage/AzurePropertyUtilsTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/storage/AzurePropertyUtilsTest.java @@ -91,15 +91,9 @@ public void testHttpsUriWithPath() throws Exception { @Test public void testHttpsChinaUriWithPath() throws Exception { - boolean originalConfig = Config.force_azure_blob_global_endpoint; - try { - Config.force_azure_blob_global_endpoint = false; - String input = "https://account.blob.core.chinacloudapi.cn/container/data/file.parquet"; - String expected = "s3://container/data/file.parquet"; - Assertions.assertEquals(expected, AzurePropertyUtils.validateAndNormalizeUri(input)); - } finally { - Config.force_azure_blob_global_endpoint = originalConfig; - } + String input = "https://account.blob.core.chinacloudapi.cn/container/data/file.parquet"; + String expected = "s3://container/data/file.parquet"; + Assertions.assertEquals(expected, AzurePropertyUtils.validateAndNormalizeUri(input)); } @Test @@ -125,20 +119,9 @@ public void testHttpsUriMissingHost() { @Test public void testHttpsUriNotAzureBlob() throws UserException { - boolean originalConfig = Config.force_azure_blob_global_endpoint; - try { - String input = "https://account.otherdomain.com/container/file.txt"; - - Config.force_azure_blob_global_endpoint = false; - Assertions.assertEquals("s3://container/file.txt", - AzurePropertyUtils.validateAndNormalizeUri(input)); - - Config.force_azure_blob_global_endpoint = true; - Assertions.assertThrows(StoragePropertiesException.class, () -> - AzurePropertyUtils.validateAndNormalizeUri(input)); - } finally { - Config.force_azure_blob_global_endpoint = originalConfig; - } + String input = "https://account.otherdomain.com/container/file.txt"; + Assertions.assertEquals("s3://container/file.txt", + AzurePropertyUtils.validateAndNormalizeUri(input)); } @Test @@ -149,10 +132,8 @@ public void testBlankUri() { @Test public void testIsAzureBlobEndpoint() { - boolean originalConfig = Config.force_azure_blob_global_endpoint; String[] originalAzureBlobHostSuffixes = Config.azure_blob_host_suffixes; try { - Config.force_azure_blob_global_endpoint = false; Config.azure_blob_host_suffixes = new String[] { ".blob.core.windows.net", ".dfs.core.windows.net", @@ -163,33 +144,21 @@ public void testIsAzureBlobEndpoint() { Assertions.assertTrue(AzurePropertyUtils.isAzureBlobEndpoint("account.blob.core.chinacloudapi.cn")); Assertions.assertTrue(AzurePropertyUtils.isAzureBlobEndpoint("account.dfs.core.chinacloudapi.cn")); Assertions.assertFalse(AzurePropertyUtils.isAzureBlobEndpoint("https://account.otherdomain.com")); - - Config.force_azure_blob_global_endpoint = true; - Assertions.assertTrue(AzurePropertyUtils.isAzureBlobEndpoint("https://account.blob.core.windows.net")); - Assertions.assertFalse(AzurePropertyUtils.isAzureBlobEndpoint("account.blob.core.chinacloudapi.cn")); - Assertions.assertFalse(AzurePropertyUtils.isAzureBlobEndpoint("account.dfs.core.chinacloudapi.cn")); } finally { - Config.force_azure_blob_global_endpoint = originalConfig; Config.azure_blob_host_suffixes = originalAzureBlobHostSuffixes; } } @Test public void testIsAzureBlobEndpointFromConfig() { - boolean originalForceAzureBlobGlobalEndpoint = Config.force_azure_blob_global_endpoint; String[] originalAzureBlobHostSuffixes = Config.azure_blob_host_suffixes; try { - Config.force_azure_blob_global_endpoint = false; Config.azure_blob_host_suffixes = new String[] {"blob.custom.test", "dfs.custom.test", " .blob.extra.test "}; Assertions.assertTrue(AzurePropertyUtils.isAzureBlobEndpoint("https://account.blob.custom.test")); Assertions.assertTrue(AzurePropertyUtils.isAzureBlobEndpoint("account.dfs.custom.test")); Assertions.assertTrue(AzurePropertyUtils.isAzureBlobEndpoint("account.blob.extra.test")); Assertions.assertFalse(AzurePropertyUtils.isAzureBlobEndpoint("account.blob.unknown.test")); - - Config.force_azure_blob_global_endpoint = true; - Assertions.assertFalse(AzurePropertyUtils.isAzureBlobEndpoint("account.blob.custom.test")); } finally { - Config.force_azure_blob_global_endpoint = originalForceAzureBlobGlobalEndpoint; Config.azure_blob_host_suffixes = originalAzureBlobHostSuffixes; } }