Skip to content
This repository was archived by the owner on Apr 1, 2026. It is now read-only.

Commit 10e6497

Browse files
authored
Merge branch 'main' into sycai_ai_gen_bool
2 parents b2446ab + 913de1b commit 10e6497

12 files changed

Lines changed: 239 additions & 20 deletions

File tree

CHANGELOG.md

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,21 @@
44

55
[1]: https://pypi.org/project/bigframes/#history
66

7+
## [2.19.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v2.18.0...v2.19.0) (2025-09-09)
8+
9+
10+
### Features
11+
12+
* Add str.join method ([#2054](https://github.com/googleapis/python-bigquery-dataframes/issues/2054)) ([8804ada](https://github.com/googleapis/python-bigquery-dataframes/commit/8804adaf8ba23fdcad6e42a7bf034bd0a11c890f))
13+
* Support display.max_colwidth option ([#2053](https://github.com/googleapis/python-bigquery-dataframes/issues/2053)) ([5229e07](https://github.com/googleapis/python-bigquery-dataframes/commit/5229e07b4535c01b0cdbd731455ff225a373b5c8))
14+
* Support VPC egress setting in remote function ([#2059](https://github.com/googleapis/python-bigquery-dataframes/issues/2059)) ([5df779d](https://github.com/googleapis/python-bigquery-dataframes/commit/5df779d4f421d3ba777cfd928d99ca2e8a3f79ad))
15+
16+
17+
### Bug Fixes
18+
19+
* Fix issue mishandling chunked array while loading data ([#2051](https://github.com/googleapis/python-bigquery-dataframes/issues/2051)) ([873d0ee](https://github.com/googleapis/python-bigquery-dataframes/commit/873d0eee474ed34f1d5164c37383f2737dbec4db))
20+
* Remove warning for slot_millis_sum ([#2047](https://github.com/googleapis/python-bigquery-dataframes/issues/2047)) ([425a691](https://github.com/googleapis/python-bigquery-dataframes/commit/425a6917d5442eeb4df486c6eed1fd136bbcedfb))
21+
722
## [2.18.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v2.17.0...v2.18.0) (2025-09-03)
823

924

bigframes/_config/auth.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from __future__ import annotations
16+
17+
import threading
18+
from typing import Optional
19+
20+
import google.auth.credentials
21+
import google.auth.transport.requests
22+
import pydata_google_auth
23+
24+
_SCOPES = ["https://www.googleapis.com/auth/cloud-platform"]
25+
26+
# Put the lock here rather than in BigQueryOptions so that BigQueryOptions
27+
# remains deepcopy-able.
28+
_AUTH_LOCK = threading.Lock()
29+
_cached_credentials: Optional[google.auth.credentials.Credentials] = None
30+
_cached_project_default: Optional[str] = None
31+
32+
33+
def get_default_credentials_with_project() -> tuple[
34+
google.auth.credentials.Credentials, Optional[str]
35+
]:
36+
global _AUTH_LOCK, _cached_credentials, _cached_project_default
37+
38+
with _AUTH_LOCK:
39+
if _cached_credentials is not None:
40+
return _cached_credentials, _cached_project_default
41+
42+
_cached_credentials, _cached_project_default = pydata_google_auth.default(
43+
scopes=_SCOPES, use_local_webserver=False
44+
)
45+
46+
# Ensure an access token is available.
47+
_cached_credentials.refresh(google.auth.transport.requests.Request())
48+
49+
return _cached_credentials, _cached_project_default
50+
51+
52+
def reset_default_credentials_and_project():
53+
global _AUTH_LOCK, _cached_credentials, _cached_project_default
54+
55+
with _AUTH_LOCK:
56+
_cached_credentials = None
57+
_cached_project_default = None

bigframes/_config/bigquery_options.py

Lines changed: 41 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import google.auth.credentials
2323
import requests.adapters
2424

25+
import bigframes._config.auth
2526
import bigframes._importing
2627
import bigframes.enums
2728
import bigframes.exceptions as bfe
@@ -37,6 +38,7 @@
3738

3839
def _get_validated_location(value: Optional[str]) -> Optional[str]:
3940
import bigframes._tools.strings
41+
import bigframes.constants
4042

4143
if value is None or value in bigframes.constants.ALL_BIGQUERY_LOCATIONS:
4244
return value
@@ -141,20 +143,52 @@ def application_name(self, value: Optional[str]):
141143
)
142144
self._application_name = value
143145

146+
def _try_set_default_credentials_and_project(
147+
self,
148+
) -> tuple[google.auth.credentials.Credentials, Optional[str]]:
149+
# Don't fetch credentials or project if credentials is already set.
150+
# If it's set, we've already authenticated, so if the user wants to
151+
# re-auth, they should explicitly reset the credentials.
152+
if self._credentials is not None:
153+
return self._credentials, self._project
154+
155+
(
156+
credentials,
157+
credentials_project,
158+
) = bigframes._config.auth.get_default_credentials_with_project()
159+
self._credentials = credentials
160+
161+
# Avoid overriding an explicitly set project with a default value.
162+
if self._project is None:
163+
self._project = credentials_project
164+
165+
return credentials, self._project
166+
144167
@property
145-
def credentials(self) -> Optional[google.auth.credentials.Credentials]:
168+
def credentials(self) -> google.auth.credentials.Credentials:
146169
"""The OAuth2 credentials to use for this client.
147170
171+
Set to None to force re-authentication.
172+
148173
Returns:
149174
None or google.auth.credentials.Credentials:
150175
google.auth.credentials.Credentials if exists; otherwise None.
151176
"""
152-
return self._credentials
177+
if self._credentials:
178+
return self._credentials
179+
180+
credentials, _ = self._try_set_default_credentials_and_project()
181+
return credentials
153182

154183
@credentials.setter
155184
def credentials(self, value: Optional[google.auth.credentials.Credentials]):
156185
if self._session_started and self._credentials is not value:
157186
raise ValueError(SESSION_STARTED_MESSAGE.format(attribute="credentials"))
187+
188+
if value is None:
189+
# The user has _explicitly_ asked that we re-authenticate.
190+
bigframes._config.auth.reset_default_credentials_and_project()
191+
158192
self._credentials = value
159193

160194
@property
@@ -183,7 +217,11 @@ def project(self) -> Optional[str]:
183217
None or str:
184218
Google Cloud project ID as a string; otherwise None.
185219
"""
186-
return self._project
220+
if self._project:
221+
return self._project
222+
223+
_, project = self._try_set_default_credentials_and_project()
224+
return project
187225

188226
@project.setter
189227
def project(self, value: Optional[str]):

bigframes/functions/_function_client.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,15 @@
5151
}
5252
)
5353

54+
# https://cloud.google.com/functions/docs/reference/rest/v2/projects.locations.functions#vpconnectoregresssettings
55+
_VPC_EGRESS_SETTINGS_MAP = types.MappingProxyType(
56+
{
57+
"all": functions_v2.ServiceConfig.VpcConnectorEgressSettings.ALL_TRAFFIC,
58+
"private-ranges-only": functions_v2.ServiceConfig.VpcConnectorEgressSettings.PRIVATE_RANGES_ONLY,
59+
"unspecified": functions_v2.ServiceConfig.VpcConnectorEgressSettings.VPC_CONNECTOR_EGRESS_SETTINGS_UNSPECIFIED,
60+
}
61+
)
62+
5463
# BQ managed functions (@udf) currently only support Python 3.11.
5564
_MANAGED_FUNC_PYTHON_VERSION = "python-3.11"
5665

@@ -375,6 +384,7 @@ def create_cloud_function(
375384
max_instance_count=None,
376385
is_row_processor=False,
377386
vpc_connector=None,
387+
vpc_connector_egress_settings="private-ranges-only",
378388
memory_mib=1024,
379389
ingress_settings="internal-only",
380390
):
@@ -472,6 +482,15 @@ def create_cloud_function(
472482
function.service_config.max_instance_count = max_instance_count
473483
if vpc_connector is not None:
474484
function.service_config.vpc_connector = vpc_connector
485+
if vpc_connector_egress_settings not in _VPC_EGRESS_SETTINGS_MAP:
486+
raise bf_formatting.create_exception_with_feedback_link(
487+
ValueError,
488+
f"'{vpc_connector_egress_settings}' not one of the supported vpc egress settings values: {list(_VPC_EGRESS_SETTINGS_MAP)}",
489+
)
490+
function.service_config.vpc_connector_egress_settings = cast(
491+
functions_v2.ServiceConfig.VpcConnectorEgressSettings,
492+
_VPC_EGRESS_SETTINGS_MAP[vpc_connector_egress_settings],
493+
)
475494
function.service_config.service_account_email = (
476495
self._cloud_function_service_account
477496
)
@@ -532,6 +551,7 @@ def provision_bq_remote_function(
532551
cloud_function_max_instance_count,
533552
is_row_processor,
534553
cloud_function_vpc_connector,
554+
cloud_function_vpc_connector_egress_settings,
535555
cloud_function_memory_mib,
536556
cloud_function_ingress_settings,
537557
bq_metadata,
@@ -580,6 +600,7 @@ def provision_bq_remote_function(
580600
max_instance_count=cloud_function_max_instance_count,
581601
is_row_processor=is_row_processor,
582602
vpc_connector=cloud_function_vpc_connector,
603+
vpc_connector_egress_settings=cloud_function_vpc_connector_egress_settings,
583604
memory_mib=cloud_function_memory_mib,
584605
ingress_settings=cloud_function_ingress_settings,
585606
)

bigframes/functions/_function_session.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,9 @@ def remote_function(
245245
cloud_function_timeout: Optional[int] = 600,
246246
cloud_function_max_instances: Optional[int] = None,
247247
cloud_function_vpc_connector: Optional[str] = None,
248+
cloud_function_vpc_connector_egress_settings: Literal[
249+
"all", "private-ranges-only", "unspecified"
250+
] = "private-ranges-only",
248251
cloud_function_memory_mib: Optional[int] = 1024,
249252
cloud_function_ingress_settings: Literal[
250253
"all", "internal-only", "internal-and-gclb"
@@ -425,6 +428,13 @@ def remote_function(
425428
function. This is useful if your code needs access to data or
426429
service(s) that are on a VPC network. See for more details
427430
https://cloud.google.com/functions/docs/networking/connecting-vpc.
431+
cloud_function_vpc_connector_egress_settings (str, Optional):
432+
Egress settings for the VPC connector, controlling what outbound
433+
traffic is routed through the VPC connector.
434+
Options are: `all`, `private-ranges-only`, or `unspecified`.
435+
If not specified, `private-ranges-only` is used by default.
436+
See for more details
437+
https://cloud.google.com/run/docs/configuring/vpc-connectors#egress-job.
428438
cloud_function_memory_mib (int, Optional):
429439
The amounts of memory (in mebibytes) to allocate for the cloud
430440
function (2nd gen) created. This also dictates a corresponding
@@ -616,6 +626,7 @@ def wrapper(func):
616626
cloud_function_max_instance_count=cloud_function_max_instances,
617627
is_row_processor=is_row_processor,
618628
cloud_function_vpc_connector=cloud_function_vpc_connector,
629+
cloud_function_vpc_connector_egress_settings=cloud_function_vpc_connector_egress_settings,
619630
cloud_function_memory_mib=cloud_function_memory_mib,
620631
cloud_function_ingress_settings=cloud_function_ingress_settings,
621632
bq_metadata=bqrf_metadata,

bigframes/pandas/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,9 @@ def remote_function(
8787
cloud_function_timeout: Optional[int] = 600,
8888
cloud_function_max_instances: Optional[int] = None,
8989
cloud_function_vpc_connector: Optional[str] = None,
90+
cloud_function_vpc_connector_egress_settings: Literal[
91+
"all", "private-ranges-only", "unspecified"
92+
] = "private-ranges-only",
9093
cloud_function_memory_mib: Optional[int] = 1024,
9194
cloud_function_ingress_settings: Literal[
9295
"all", "internal-only", "internal-and-gclb"
@@ -109,6 +112,7 @@ def remote_function(
109112
cloud_function_timeout=cloud_function_timeout,
110113
cloud_function_max_instances=cloud_function_max_instances,
111114
cloud_function_vpc_connector=cloud_function_vpc_connector,
115+
cloud_function_vpc_connector_egress_settings=cloud_function_vpc_connector_egress_settings,
112116
cloud_function_memory_mib=cloud_function_memory_mib,
113117
cloud_function_ingress_settings=cloud_function_ingress_settings,
114118
cloud_build_service_account=cloud_build_service_account,

bigframes/session/__init__.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,6 @@
4949
import bigframes_vendored.pandas.io.parsers.readers as third_party_pandas_readers
5050
import bigframes_vendored.pandas.io.pickle as third_party_pandas_pickle
5151
import google.cloud.bigquery as bigquery
52-
import google.cloud.storage as storage # type: ignore
5352
import numpy as np
5453
import pandas
5554
from pandas._typing import (
@@ -1424,7 +1423,7 @@ def _check_file_size(self, filepath: str):
14241423
if filepath.startswith("gs://"): # GCS file path
14251424
bucket_name, blob_path = filepath.split("/", 3)[2:]
14261425

1427-
client = storage.Client()
1426+
client = self._clients_provider.storageclient
14281427
bucket = client.bucket(bucket_name)
14291428

14301429
list_blobs_params = inspect.signature(bucket.list_blobs).parameters
@@ -1510,6 +1509,9 @@ def remote_function(
15101509
cloud_function_timeout: Optional[int] = 600,
15111510
cloud_function_max_instances: Optional[int] = None,
15121511
cloud_function_vpc_connector: Optional[str] = None,
1512+
cloud_function_vpc_connector_egress_settings: Literal[
1513+
"all", "private-ranges-only", "unspecified"
1514+
] = "private-ranges-only",
15131515
cloud_function_memory_mib: Optional[int] = 1024,
15141516
cloud_function_ingress_settings: Literal[
15151517
"all", "internal-only", "internal-and-gclb"
@@ -1675,6 +1677,13 @@ def remote_function(
16751677
function. This is useful if your code needs access to data or
16761678
service(s) that are on a VPC network. See for more details
16771679
https://cloud.google.com/functions/docs/networking/connecting-vpc.
1680+
cloud_function_vpc_connector_egress_settings (str, Optional):
1681+
Egress settings for the VPC connector, controlling what outbound
1682+
traffic is routed through the VPC connector.
1683+
Options are: `all`, `private-ranges-only`, or `unspecified`.
1684+
If not specified, `private-ranges-only` is used by default.
1685+
See for more details
1686+
https://cloud.google.com/run/docs/configuring/vpc-connectors#egress-job.
16781687
cloud_function_memory_mib (int, Optional):
16791688
The amounts of memory (in mebibytes) to allocate for the cloud
16801689
function (2nd gen) created. This also dictates a corresponding
@@ -1732,6 +1741,7 @@ def remote_function(
17321741
cloud_function_timeout=cloud_function_timeout,
17331742
cloud_function_max_instances=cloud_function_max_instances,
17341743
cloud_function_vpc_connector=cloud_function_vpc_connector,
1744+
cloud_function_vpc_connector_egress_settings=cloud_function_vpc_connector_egress_settings,
17351745
cloud_function_memory_mib=cloud_function_memory_mib,
17361746
cloud_function_ingress_settings=cloud_function_ingress_settings,
17371747
cloud_build_service_account=cloud_build_service_account,

bigframes/session/clients.py

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -29,17 +29,17 @@
2929
import google.cloud.bigquery_storage_v1
3030
import google.cloud.functions_v2
3131
import google.cloud.resourcemanager_v3
32-
import pydata_google_auth
32+
import google.cloud.storage # type: ignore
3333
import requests
3434

35+
import bigframes._config
3536
import bigframes.constants
3637
import bigframes.version
3738

3839
from . import environment
3940

4041
_ENV_DEFAULT_PROJECT = "GOOGLE_CLOUD_PROJECT"
4142
_APPLICATION_NAME = f"bigframes/{bigframes.version.__version__} ibis/9.2.0"
42-
_SCOPES = ["https://www.googleapis.com/auth/cloud-platform"]
4343

4444

4545
# BigQuery is a REST API, which requires the protocol as part of the URL.
@@ -50,10 +50,6 @@
5050
_BIGQUERYSTORAGE_REGIONAL_ENDPOINT = "bigquerystorage.{location}.rep.googleapis.com"
5151

5252

53-
def _get_default_credentials_with_project():
54-
return pydata_google_auth.default(scopes=_SCOPES, use_local_webserver=False)
55-
56-
5753
def _get_application_names():
5854
apps = [_APPLICATION_NAME]
5955

@@ -88,10 +84,8 @@ def __init__(
8884
):
8985
credentials_project = None
9086
if credentials is None:
91-
credentials, credentials_project = _get_default_credentials_with_project()
92-
93-
# Ensure an access token is available.
94-
credentials.refresh(google.auth.transport.requests.Request())
87+
credentials = bigframes._config.options.bigquery.credentials
88+
credentials_project = bigframes._config.options.bigquery.project
9589

9690
# Prefer the project in this order:
9791
# 1. Project explicitly specified by the user
@@ -165,6 +159,9 @@ def __init__(
165159
google.cloud.resourcemanager_v3.ProjectsClient
166160
] = None
167161

162+
self._storageclient_lock = threading.Lock()
163+
self._storageclient: Optional[google.cloud.storage.Client] = None
164+
168165
def _create_bigquery_client(self):
169166
bq_options = None
170167
if "bqclient" in self._client_endpoints_override:
@@ -347,3 +344,17 @@ def resourcemanagerclient(self):
347344
)
348345

349346
return self._resourcemanagerclient
347+
348+
@property
349+
def storageclient(self):
350+
with self._storageclient_lock:
351+
if not self._storageclient:
352+
storage_info = google.api_core.client_info.ClientInfo(
353+
user_agent=self._application_name
354+
)
355+
self._storageclient = google.cloud.storage.Client(
356+
client_info=storage_info,
357+
credentials=self._credentials,
358+
)
359+
360+
return self._storageclient

0 commit comments

Comments
 (0)