diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp index 36d465d2c77af5..ac4359331d4535 100644 --- a/be/src/common/config.cpp +++ b/be/src/common/config.cpp @@ -1470,8 +1470,6 @@ DEFINE_Int64(wait_cancel_release_memory_ms, "5000"); DEFINE_mBool(check_segment_when_build_rowset_meta, "false"); -DEFINE_mBool(force_azure_blob_global_endpoint, "false"); - DEFINE_mInt32(max_s3_client_retry, "10"); DEFINE_mInt32(s3_read_base_wait_time_ms, "100"); DEFINE_mInt32(s3_read_max_wait_time_ms, "800"); diff --git a/be/src/common/config.h b/be/src/common/config.h index c1df9a11acbca8..30e546f93bc950 100644 --- a/be/src/common/config.h +++ b/be/src/common/config.h @@ -1563,8 +1563,6 @@ DECLARE_mBool(check_segment_when_build_rowset_meta); DECLARE_Int32(num_query_ctx_map_partitions); -DECLARE_mBool(force_azure_blob_global_endpoint); - DECLARE_mBool(enable_s3_rate_limiter); DECLARE_mInt64(s3_get_bucket_tokens); DECLARE_mInt64(s3_get_token_per_second); diff --git a/be/src/io/fs/azure_obj_storage_client.cpp b/be/src/io/fs/azure_obj_storage_client.cpp index 026d5b56b9344f..7dd44db76d8ff7 100644 --- a/be/src/io/fs/azure_obj_storage_client.cpp +++ b/be/src/io/fs/azure_obj_storage_client.cpp @@ -418,9 +418,6 @@ std::string AzureObjStorageClient::generate_presigned_url(const ObjectStoragePat Azure::Storage::StorageSharedKeyCredential(conf.ak, conf.sk)); std::string endpoint = conf.endpoint; - if (doris::config::force_azure_blob_global_endpoint) { - endpoint = fmt::format("https://{}.blob.core.windows.net", conf.ak); - } auto sasURL = fmt::format(SAS_TOKEN_URL_TEMPLATE, endpoint, conf.bucket, opts.key, sasToken); if (sasURL.find("://") == std::string::npos) { sasURL = "https://" + sasURL; diff --git a/be/src/util/s3_util.cpp b/be/src/util/s3_util.cpp index a87bb07a5b28e1..0a98e4c9ee77a9 100644 --- a/be/src/util/s3_util.cpp +++ b/be/src/util/s3_util.cpp @@ -313,14 +313,9 @@ std::shared_ptr S3ClientFactory::_create_azure_client( std::make_shared(s3_conf.ak, s3_conf.sk); const std::string container_name = s3_conf.bucket; - std::string uri; - if (config::force_azure_blob_global_endpoint) { - uri = fmt::format("https://{}.blob.core.windows.net/{}", s3_conf.ak, container_name); - } else { - uri = fmt::format("{}/{}", s3_conf.endpoint, container_name); - if (s3_conf.endpoint.find("://") == std::string::npos) { - uri = "https://" + uri; - } + std::string uri = fmt::format("{}/{}", s3_conf.endpoint, container_name); + if (s3_conf.endpoint.find("://") == std::string::npos) { + uri = "https://" + uri; } Azure::Storage::Blobs::BlobClientOptions options; diff --git a/cloud/src/common/config.h b/cloud/src/common/config.h index 52e7e593dc6adf..46f64cdb0e6319 100644 --- a/cloud/src/common/config.h +++ b/cloud/src/common/config.h @@ -297,8 +297,6 @@ CONF_Validator(s3_client_http_scheme, [](const std::string& config) -> bool { return config == "http" || config == "https"; }); -CONF_Bool(force_azure_blob_global_endpoint, "false"); - // Max retry times for object storage request CONF_mInt64(max_s3_client_retry, "10"); diff --git a/cloud/src/recycler/s3_accessor.cpp b/cloud/src/recycler/s3_accessor.cpp index 1792a81d435f05..0f2a7776fcc7fe 100644 --- a/cloud/src/recycler/s3_accessor.cpp +++ b/cloud/src/recycler/s3_accessor.cpp @@ -372,13 +372,9 @@ int S3Accessor::init() { options.Retry.MaxRetries = config::max_s3_client_retry; auto cred = std::make_shared(conf_.ak, conf_.sk); - if (config::force_azure_blob_global_endpoint) { - uri_ = fmt::format("https://{}.blob.core.windows.net/{}", conf_.ak, conf_.bucket); - } else { - uri_ = fmt::format("{}/{}", conf_.endpoint, conf_.bucket); - if (uri_.find("://") == std::string::npos) { - uri_ = "https://" + uri_; - } + uri_ = fmt::format("{}/{}", conf_.endpoint, conf_.bucket); + if (uri_.find("://") == std::string::npos) { + uri_ = "https://" + uri_; } uri_ = normalize_http_uri(uri_); // In Azure's HTTP requests, all policies in the vector are called in a chained manner following the HTTP pipeline approach. diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java index cd345b5eaadb23..4e51ca44d0ab18 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java +++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java @@ -3544,13 +3544,27 @@ public static int metaServiceRpcRetryTimes() { public static int s3_head_request_max_paths = 100; @ConfField(mutable = true, description = { - "此参数控制是否强制使用 Azure global endpoint。默认值为 false,系统将使用用户指定的 endpoint。" - + "如果设置为 true,系统将强制使用 {account}.blob.core.windows.net。", - "This parameter controls whether to force the use of the Azure global endpoint. " - + "The default is false, meaning the system will use the user-specified endpoint. " - + "If set to true, the system will force the use of {account}.blob.core.windows.net." + "指定 Azure endpoint 域名后缀白名单(包含 blob 与 dfs),多个值使用逗号分隔。" + + "默认值为 .blob.core.windows.net,.dfs.core.windows.net," + + ".blob.core.chinacloudapi.cn,.dfs.core.chinacloudapi.cn," + + ".blob.core.usgovcloudapi.net,.dfs.core.usgovcloudapi.net," + + ".blob.core.cloudapi.de,.dfs.core.cloudapi.de。", + "The host suffix whitelist for Azure endpoints (both blob and dfs), separated by commas. " + + "The default value is .blob.core.windows.net,.dfs.core.windows.net," + + ".blob.core.chinacloudapi.cn,.dfs.core.chinacloudapi.cn," + + ".blob.core.usgovcloudapi.net,.dfs.core.usgovcloudapi.net," + + ".blob.core.cloudapi.de,.dfs.core.cloudapi.de." }) - public static boolean force_azure_blob_global_endpoint = false; + public static String[] azure_blob_host_suffixes = { + ".blob.core.windows.net", + ".dfs.core.windows.net", + ".blob.core.chinacloudapi.cn", + ".dfs.core.chinacloudapi.cn", + ".blob.core.usgovcloudapi.net", + ".dfs.core.usgovcloudapi.net", + ".blob.core.cloudapi.de", + ".dfs.core.cloudapi.de" + }; @ConfField(mutable = true, description = {"指定 Jdbc driver url 白名单,举例:jdbc_driver_url_white_list=a,b,c", "the white list for jdbc driver url, if it is empty, no white list will be set" diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/AzureProperties.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/AzureProperties.java index b4848aa61b6d39..a8d6280d3771cf 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/AzureProperties.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/AzureProperties.java @@ -30,6 +30,8 @@ import org.apache.hadoop.conf.Configuration; import java.util.HashMap; +import java.util.LinkedHashSet; +import java.util.Locale; import java.util.Map; import java.util.Objects; import java.util.Set; @@ -64,6 +66,7 @@ public class AzureProperties extends StorageProperties { @Getter @ConnectorProperty(names = {"azure.endpoint", "s3.endpoint", "AWS_ENDPOINT", "endpoint", "ENDPOINT"}, + required = false, description = "The endpoint of S3.") protected String endpoint = ""; @@ -135,8 +138,6 @@ public AzureProperties(Map origProps) { super(Type.AZURE, origProps); } - private static final String AZURE_ENDPOINT_SUFFIX = ".blob.core.windows.net"; - @Override public void initNormalizeAndCheckProps() { super.initNormalizeAndCheckProps(); @@ -160,7 +161,7 @@ public static boolean guessIsMe(Map origProps) { .findFirst() .orElse(null); if (!Strings.isNullOrEmpty(value)) { - return value.endsWith(AZURE_ENDPOINT_SUFFIX); + return AzurePropertyUtils.isAzureBlobEndpoint(value); } return false; } @@ -191,9 +192,12 @@ public Map getBackendConfigProperties() { public static final String AZURE_ENDPOINT_TEMPLATE = "https://%s.blob.core.windows.net"; - public static String formatAzureEndpoint(String endpoint, String accessKey) { - if (Config.force_azure_blob_global_endpoint) { - return String.format(AZURE_ENDPOINT_TEMPLATE, accessKey); + public static String formatAzureEndpoint(String endpoint, String accountName) { + if (Strings.isNullOrEmpty(endpoint)) { + if (Strings.isNullOrEmpty(accountName)) { + return ""; + } + return String.format(AZURE_ENDPOINT_TEMPLATE, accountName); } if (endpoint.contains("://")) { return endpoint; @@ -243,13 +247,24 @@ protected Set schemas() { } private static void setHDFSAzureAccountKeys(Configuration conf, String accountName, String accountKey) { - String[] endpoints = { - "dfs.core.windows.net", - "blob.core.windows.net" - }; + Set endpoints = new LinkedHashSet<>(); + if (Config.azure_blob_host_suffixes != null) { + for (String endpointSuffix : Config.azure_blob_host_suffixes) { + if (Strings.isNullOrEmpty(endpointSuffix)) { + continue; + } + String normalizedEndpoint = endpointSuffix.trim().toLowerCase(Locale.ROOT); + if (normalizedEndpoint.startsWith(".")) { + normalizedEndpoint = normalizedEndpoint.substring(1); + } + if (!normalizedEndpoint.isEmpty()) { + endpoints.add(normalizedEndpoint); + } + } + } for (String endpoint : endpoints) { - String key = String.format("fs.azure.account.key.%s.%s", accountName, endpoint); - conf.set(key, accountKey); + String accountKeyConfig = String.format("fs.azure.account.key.%s.%s", accountName, endpoint); + conf.set(accountKeyConfig, accountKey); } conf.set("fs.azure.account.key", accountKey); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/AzurePropertyUtils.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/AzurePropertyUtils.java index 9015258070130d..e2d04057f65bbf 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/AzurePropertyUtils.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/AzurePropertyUtils.java @@ -17,6 +17,7 @@ package org.apache.doris.datasource.property.storage; +import org.apache.doris.common.Config; import org.apache.doris.common.UserException; import org.apache.doris.datasource.property.storage.exception.StoragePropertiesException; @@ -24,11 +25,11 @@ import java.net.URI; import java.net.URISyntaxException; +import java.util.Locale; import java.util.Map; import java.util.regex.Pattern; public class AzurePropertyUtils { - /** * Validates and normalizes an Azure Blob Storage URI into a unified {@code s3://}-style format. *

@@ -76,6 +77,61 @@ public static String validateAndNormalizeUri(String path) throws UserException { private static final Pattern ONELAKE_PATTERN = Pattern.compile( "abfs[s]?://([^@]+)@([^/]+)\\.dfs\\.fabric\\.microsoft\\.com(/.*)?", Pattern.CASE_INSENSITIVE); + public static boolean isAzureBlobEndpoint(String endpointOrHost) { + String host = extractHost(endpointOrHost); + if (StringUtils.isBlank(host)) { + return false; + } + String normalizedHost = host.toLowerCase(Locale.ROOT); + return matchesAnySuffix(normalizedHost, Config.azure_blob_host_suffixes); + } + + private static boolean matchesAnySuffix(String normalizedHost, String[] suffixes) { + if (suffixes == null || suffixes.length == 0) { + return false; + } + for (String suffix : suffixes) { + if (matchesSuffix(normalizedHost, suffix)) { + return true; + } + } + return false; + } + + private static boolean matchesSuffix(String normalizedHost, String suffix) { + if (StringUtils.isBlank(suffix)) { + return false; + } + String normalizedSuffix = suffix.trim().toLowerCase(Locale.ROOT); + if (!normalizedSuffix.startsWith(".")) { + normalizedSuffix = "." + normalizedSuffix; + } + return normalizedHost.endsWith(normalizedSuffix); + } + + private static String extractHost(String endpointOrHost) { + if (StringUtils.isBlank(endpointOrHost)) { + return null; + } + String normalized = endpointOrHost.trim(); + if (normalized.contains("://")) { + try { + return new URI(normalized).getHost(); + } catch (URISyntaxException e) { + return null; + } + } + int slashIndex = normalized.indexOf('/'); + if (slashIndex >= 0) { + normalized = normalized.substring(0, slashIndex); + } + int colonIndex = normalized.indexOf(':'); + if (colonIndex >= 0) { + normalized = normalized.substring(0, colonIndex); + } + return normalized; + } + /** * Converts an Azure Blob Storage URI into a unified {@code s3:///} format. @@ -137,11 +193,6 @@ private static String convertToS3Style(String uri) { throw new StoragePropertiesException("Invalid Azure HTTPS URI, missing host: " + uri); } - // Typical Azure Blob domain: .blob.core.windows.net - if (!host.contains(".blob.core.windows.net")) { - throw new StoragePropertiesException("Not an Azure Blob URL: " + uri); - } - // Path usually looks like: // String[] parts = path.split("/", 3); if (parts.length < 2) { diff --git a/fe/fe-core/src/test/java/org/apache/doris/datasource/property/storage/AzurePropertiesTest.java b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/storage/AzurePropertiesTest.java index 8171f33db409ef..073d9f361b787d 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/datasource/property/storage/AzurePropertiesTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/storage/AzurePropertiesTest.java @@ -17,6 +17,7 @@ package org.apache.doris.datasource.property.storage; +import org.apache.doris.common.Config; import org.apache.doris.common.UserException; import org.apache.doris.datasource.property.storage.exception.StoragePropertiesException; @@ -213,4 +214,81 @@ public void testOneLake() throws UserException { Assertions.assertEquals("https://login.microsoftonline.com/72f988bf-5289-5289-5289-2d7cd011db47/oauth2/token", hadoopStorageConfig.get("fs.azure.account.oauth2.client.endpoint.onelake.dfs.fabric.microsoft.com")); } + + @Test + public void testGuessIsMeChinaEndpoint() { + origProps.put("s3.endpoint", "https://mystorageaccount.blob.core.chinacloudapi.cn"); + Assertions.assertTrue(AzureProperties.guessIsMe(origProps)); + origProps.put("s3.endpoint", "mystorageaccount.blob.core.chinacloudapi.cn"); + Assertions.assertTrue(AzureProperties.guessIsMe(origProps)); + } + + @Test + public void testGuessIsMeByProviderWhenEndpointIsUnknown() { + origProps.put("s3.endpoint", "https://mystorageaccount.invalid.test"); + Assertions.assertFalse(AzureProperties.guessIsMe(origProps)); + origProps.put("provider", "azure"); + Assertions.assertTrue(AzureProperties.guessIsMe(origProps)); + } + + @Test + public void testFormatAzureEndpointUsesInputOrDefault() { + Assertions.assertEquals("https://mystorageaccount.blob.core.chinacloudapi.cn", + AzureProperties.formatAzureEndpoint("mystorageaccount.blob.core.chinacloudapi.cn", + "mystorageaccount")); + Assertions.assertEquals("https://mystorageaccount.blob.core.chinacloudapi.cn", + AzureProperties.formatAzureEndpoint("https://mystorageaccount.blob.core.chinacloudapi.cn", + "mystorageaccount")); + Assertions.assertEquals("https://mystorageaccount.blob.core.windows.net", + AzureProperties.formatAzureEndpoint("", "mystorageaccount")); + Assertions.assertEquals("", AzureProperties.formatAzureEndpoint("", "")); + } + + @Test + public void testDefaultEndpointWhenEndpointNotSet() throws UserException { + origProps.put("s3.access_key", "myAzureAccessKey"); + origProps.put("s3.secret_key", "myAzureSecretKey"); + origProps.put("provider", "azure"); + + AzureProperties azureProperties = (AzureProperties) StorageProperties.createPrimary(origProps); + Assertions.assertEquals("https://myAzureAccessKey.blob.core.windows.net", azureProperties.getEndpoint()); + } + + @Test + public void testHadoopStorageConfigContainsChinaCloudAccountKeys() throws UserException { + origProps.put("s3.endpoint", "https://mystorageaccount.blob.core.chinacloudapi.cn"); + origProps.put("s3.access_key", "myAzureAccessKey"); + origProps.put("s3.secret_key", "myAzureSecretKey"); + origProps.put("provider", "azure"); + + AzureProperties azureProperties = (AzureProperties) StorageProperties.createPrimary(origProps); + Configuration hadoopStorageConfig = azureProperties.getHadoopStorageConfig(); + Assertions.assertEquals("myAzureSecretKey", + hadoopStorageConfig.get("fs.azure.account.key.myAzureAccessKey.blob.core.chinacloudapi.cn")); + Assertions.assertEquals("myAzureSecretKey", + hadoopStorageConfig.get("fs.azure.account.key.myAzureAccessKey.dfs.core.chinacloudapi.cn")); + } + + @Test + public void testHadoopStorageConfigContainsCustomAccountKeyEndpointsFromConfig() throws UserException { + String[] originalAzureBlobHostSuffixes = Config.azure_blob_host_suffixes; + try { + Config.azure_blob_host_suffixes = new String[] {"blob.custom.test", ".dfs.custom.test", " "}; + origProps.put("s3.endpoint", "https://mystorageaccount.blob.custom.test"); + origProps.put("s3.access_key", "myAzureAccessKey"); + origProps.put("s3.secret_key", "myAzureSecretKey"); + origProps.put("provider", "azure"); + + AzureProperties azureProperties = (AzureProperties) StorageProperties.createPrimary(origProps); + Configuration hadoopStorageConfig = azureProperties.getHadoopStorageConfig(); + Assertions.assertEquals("myAzureSecretKey", + hadoopStorageConfig.get("fs.azure.account.key.myAzureAccessKey.blob.custom.test")); + Assertions.assertEquals("myAzureSecretKey", + hadoopStorageConfig.get("fs.azure.account.key.myAzureAccessKey.dfs.custom.test")); + Assertions.assertNull( + hadoopStorageConfig.get("fs.azure.account.key.myAzureAccessKey.blob.core.windows.net")); + } finally { + Config.azure_blob_host_suffixes = originalAzureBlobHostSuffixes; + } + } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/datasource/property/storage/AzurePropertyUtilsTest.java b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/storage/AzurePropertyUtilsTest.java index cbc584d9a10b0f..1965284c3d1ba7 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/datasource/property/storage/AzurePropertyUtilsTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/storage/AzurePropertyUtilsTest.java @@ -17,6 +17,8 @@ package org.apache.doris.datasource.property.storage; +import org.apache.doris.common.Config; +import org.apache.doris.common.UserException; import org.apache.doris.datasource.property.storage.exception.StoragePropertiesException; import org.junit.jupiter.api.Assertions; @@ -87,6 +89,13 @@ public void testHttpsUriWithPath() throws Exception { Assertions.assertEquals(expected, AzurePropertyUtils.validateAndNormalizeUri(input)); } + @Test + public void testHttpsChinaUriWithPath() throws Exception { + String input = "https://account.blob.core.chinacloudapi.cn/container/data/file.parquet"; + String expected = "s3://container/data/file.parquet"; + Assertions.assertEquals(expected, AzurePropertyUtils.validateAndNormalizeUri(input)); + } + @Test public void testInvalidAzureScheme() { String input = "ftp://container@account.blob.core.windows.net/data/file.txt"; @@ -109,9 +118,9 @@ public void testHttpsUriMissingHost() { } @Test - public void testHttpsUriNotAzureBlob() { + public void testHttpsUriNotAzureBlob() throws UserException { String input = "https://account.otherdomain.com/container/file.txt"; - Assertions.assertThrows(StoragePropertiesException.class, () -> + Assertions.assertEquals("s3://container/file.txt", AzurePropertyUtils.validateAndNormalizeUri(input)); } @@ -121,6 +130,39 @@ public void testBlankUri() { AzurePropertyUtils.validateAndNormalizeUri(" ")); } + @Test + public void testIsAzureBlobEndpoint() { + String[] originalAzureBlobHostSuffixes = Config.azure_blob_host_suffixes; + try { + Config.azure_blob_host_suffixes = new String[] { + ".blob.core.windows.net", + ".dfs.core.windows.net", + ".blob.core.chinacloudapi.cn", + ".dfs.core.chinacloudapi.cn" + }; + Assertions.assertTrue(AzurePropertyUtils.isAzureBlobEndpoint("https://account.blob.core.windows.net")); + Assertions.assertTrue(AzurePropertyUtils.isAzureBlobEndpoint("account.blob.core.chinacloudapi.cn")); + Assertions.assertTrue(AzurePropertyUtils.isAzureBlobEndpoint("account.dfs.core.chinacloudapi.cn")); + Assertions.assertFalse(AzurePropertyUtils.isAzureBlobEndpoint("https://account.otherdomain.com")); + } finally { + Config.azure_blob_host_suffixes = originalAzureBlobHostSuffixes; + } + } + + @Test + public void testIsAzureBlobEndpointFromConfig() { + String[] originalAzureBlobHostSuffixes = Config.azure_blob_host_suffixes; + try { + Config.azure_blob_host_suffixes = new String[] {"blob.custom.test", "dfs.custom.test", " .blob.extra.test "}; + Assertions.assertTrue(AzurePropertyUtils.isAzureBlobEndpoint("https://account.blob.custom.test")); + Assertions.assertTrue(AzurePropertyUtils.isAzureBlobEndpoint("account.dfs.custom.test")); + Assertions.assertTrue(AzurePropertyUtils.isAzureBlobEndpoint("account.blob.extra.test")); + Assertions.assertFalse(AzurePropertyUtils.isAzureBlobEndpoint("account.blob.unknown.test")); + } finally { + Config.azure_blob_host_suffixes = originalAzureBlobHostSuffixes; + } + } + // ---------- validateAndGetUri Tests ---------- @Test