From 25272a67cf897aaaa840d1ec8a8b2df5297f9012 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Fri, 13 Feb 2026 05:28:12 +0000 Subject: [PATCH 1/8] docs: use public apis for image processing --- .../multimodal/multimodal_dataframe.ipynb | 925 +++++------------- 1 file changed, 222 insertions(+), 703 deletions(-) diff --git a/notebooks/multimodal/multimodal_dataframe.ipynb b/notebooks/multimodal/multimodal_dataframe.ipynb index 3d3e2d0a6f..d36e3e9baf 100644 --- a/notebooks/multimodal/multimodal_dataframe.ipynb +++ b/notebooks/multimodal/multimodal_dataframe.ipynb @@ -92,7 +92,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 3, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -130,7 +130,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -172,7 +172,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -180,20 +180,7 @@ "id": "fx6YcZJbeYru", "outputId": "d707954a-0dd0-4c50-b7bf-36b140cf76cf" }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/core/global_session.py:113: DefaultLocationWarning: No explicit location is set, so using location US for the session.\n", - " _global_session = bigframes.session.connect(\n", - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" - ] - } - ], + "outputs": [], "source": [ "# Create blob columns from wildcard path.\n", "df_image = bpd.from_glob_path(\n", @@ -209,7 +196,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -223,10 +210,12 @@ "name": "stderr", "output_type": "stream", "text": [ - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:987: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", "instead of using `db_dtypes` in the future when available in pandas\n", "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", + " return prop(*args, **kwargs)\n" ] }, { @@ -256,23 +245,23 @@ " \n", " \n", " 0\n", - " \n", + " \n", " \n", " \n", " 1\n", - " \n", + " \n", " \n", " \n", " 2\n", - " \n", + " \n", " \n", " \n", " 3\n", - " \n", + " \n", " \n", " \n", " 4\n", - " \n", + " \n", " \n", " \n", "\n", @@ -281,16 +270,16 @@ ], "text/plain": [ " image\n", - "0 {'uri': 'gs://cloud-samples-data/bigquery/tuto...\n", - "1 {'uri': 'gs://cloud-samples-data/bigquery/tuto...\n", - "2 {'uri': 'gs://cloud-samples-data/bigquery/tuto...\n", - "3 {'uri': 'gs://cloud-samples-data/bigquery/tuto...\n", - "4 {'uri': 'gs://cloud-samples-data/bigquery/tuto...\n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-02-13T04:5...\n", + "1 {\"access_urls\":{\"expiry_time\":\"2026-02-13T04:5...\n", + "2 {\"access_urls\":{\"expiry_time\":\"2026-02-13T04:5...\n", + "3 {\"access_urls\":{\"expiry_time\":\"2026-02-13T04:5...\n", + "4 {\"access_urls\":{\"expiry_time\":\"2026-02-13T04:5...\n", "\n", "[5 rows x 1 columns]" ] }, - "execution_count": 5, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -321,7 +310,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "metadata": { "id": "YYYVn7NDH0Me" }, @@ -330,35 +319,39 @@ "name": "stderr", "output_type": "stream", "text": [ - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", + " return prop(*args, **kwargs)\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:987: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", "instead of using `db_dtypes` in the future when available in pandas\n", "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/bigquery/_operations/json.py:121: UserWarning: The `json_extract` is deprecated and will be removed in a future\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/bigquery/_operations/json.py:121: UserWarning: The `json_extract` is deprecated and will be removed in a future\n", "version. Use `json_query` instead.\n", " warnings.warn(bfe.format_message(msg), category=UserWarning)\n", - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", + " return prop(*args, **kwargs)\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:987: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", "instead of using `db_dtypes` in the future when available in pandas\n", "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/bigquery/_operations/json.py:121: UserWarning: The `json_extract` is deprecated and will be removed in a future\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/bigquery/_operations/json.py:121: UserWarning: The `json_extract` is deprecated and will be removed in a future\n", "version. Use `json_query` instead.\n", " warnings.warn(bfe.format_message(msg), category=UserWarning)\n", - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", + " return prop(*args, **kwargs)\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:987: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", "instead of using `db_dtypes` in the future when available in pandas\n", "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/bigquery/_operations/json.py:121: UserWarning: The `json_extract` is deprecated and will be removed in a future\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/bigquery/_operations/json.py:121: UserWarning: The `json_extract` is deprecated and will be removed in a future\n", "version. Use `json_query` instead.\n", " warnings.warn(bfe.format_message(msg), category=UserWarning)\n", - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:987: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", "instead of using `db_dtypes` in the future when available in pandas\n", "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", + " return prop(*args, **kwargs)\n" ] }, { @@ -392,7 +385,7 @@ " \n", " \n", " 0\n", - " \n", + " \n", " alice\n", " image/png\n", " 1591240\n", @@ -400,7 +393,7 @@ " \n", " \n", " 1\n", - " \n", + " \n", " bob\n", " image/png\n", " 1182951\n", @@ -408,7 +401,7 @@ " \n", " \n", " 2\n", - " \n", + " \n", " bob\n", " image/png\n", " 1520884\n", @@ -416,7 +409,7 @@ " \n", " \n", " 3\n", - " \n", + " \n", " alice\n", " image/png\n", " 1235401\n", @@ -424,7 +417,7 @@ " \n", " \n", " 4\n", - " \n", + " \n", " bob\n", " image/png\n", " 1591923\n", @@ -437,11 +430,11 @@ ], "text/plain": [ " image author content_type \\\n", - "0 {'uri': 'gs://cloud-samples-data/bigquery/tuto... alice image/png \n", - "1 {'uri': 'gs://cloud-samples-data/bigquery/tuto... bob image/png \n", - "2 {'uri': 'gs://cloud-samples-data/bigquery/tuto... bob image/png \n", - "3 {'uri': 'gs://cloud-samples-data/bigquery/tuto... alice image/png \n", - "4 {'uri': 'gs://cloud-samples-data/bigquery/tuto... bob image/png \n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-02-13T04:5... alice image/png \n", + "1 {\"access_urls\":{\"expiry_time\":\"2026-02-13T04:5... bob image/png \n", + "2 {\"access_urls\":{\"expiry_time\":\"2026-02-13T04:5... bob image/png \n", + "3 {\"access_urls\":{\"expiry_time\":\"2026-02-13T04:5... alice image/png \n", + "4 {\"access_urls\":{\"expiry_time\":\"2026-02-13T04:5... bob image/png \n", "\n", " size updated \n", "0 1591240 2025-03-20 17:45:04+00:00 \n", @@ -453,7 +446,7 @@ "[5 rows x 5 columns]" ] }, - "execution_count": 6, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -478,7 +471,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -492,19 +485,15 @@ "name": "stderr", "output_type": "stream", "text": [ - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", + " return prop(*args, **kwargs)\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", + " return prop(*args, **kwargs)\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:987: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", "instead of using `db_dtypes` in the future when available in pandas\n", "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/bigquery/_operations/json.py:121: UserWarning: The `json_extract` is deprecated and will be removed in a future\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/bigquery/_operations/json.py:121: UserWarning: The `json_extract` is deprecated and will be removed in a future\n", "version. Use `json_query` instead.\n", " warnings.warn(bfe.format_message(msg), category=UserWarning)\n" ] @@ -512,7 +501,7 @@ { "data": { "text/html": [ - "" + "" ], "text/plain": [ "" @@ -524,7 +513,7 @@ { "data": { "text/html": [ - "" + "" ], "text/plain": [ "" @@ -546,12 +535,12 @@ }, "source": [ "### 3. Conduct image transformations\n", - "BigFrames Multimodal DataFrame provides image(and other) transformation functions. Such as image_blur, image_resize and image_normalize. The output can be saved to GCS folders or to BQ as bytes." + "This section demonstrates how to perform image transformations like blur, resize, and normalize using custom BigQuery Python UDFs and the `opencv-python` library." ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -564,46 +553,144 @@ "name": "stderr", "output_type": "stream", "text": [ - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/core/log_adapter.py:182: FunctionAxisOnePreviewWarning: Blob Functions use bigframes DataFrame Managed function with axis=1 senario, which is a preview feature.\n", - " return method(*args, **kwargs)\n", - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/core/log_adapter.py:182: FunctionAxisOnePreviewWarning: Blob Functions use bigframes DataFrame Managed function with axis=1 senario, which is a preview feature.\n", - " return method(*args, **kwargs)\n", - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/core/log_adapter.py:182: FunctionAxisOnePreviewWarning: Blob Functions use bigframes DataFrame Managed function with axis=1 senario, which is a preview feature.\n", - " return method(*args, **kwargs)\n" + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/pandas/__init__.py:151: PreviewWarning: udf is in preview.\n", + " return global_session.with_default_session(\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", + " return prop(*args, **kwargs)\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dataframe.py:4735: FunctionAxisOnePreviewWarning: DataFrame.apply with parameter axis=1 scenario is in preview.\n", + " warnings.warn(msg, category=bfe.FunctionAxisOnePreviewWarning)\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", + " return prop(*args, **kwargs)\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dataframe.py:4735: FunctionAxisOnePreviewWarning: DataFrame.apply with parameter axis=1 scenario is in preview.\n", + " warnings.warn(msg, category=bfe.FunctionAxisOnePreviewWarning)\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", + " return prop(*args, **kwargs)\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dataframe.py:4735: FunctionAxisOnePreviewWarning: DataFrame.apply with parameter axis=1 scenario is in preview.\n", + " warnings.warn(msg, category=bfe.FunctionAxisOnePreviewWarning)\n" ] } ], "source": [ - "df_image[\"blurred\"] = df_image[\"image\"].blob.image_blur(\n", - " (20, 20), dst=f\"gs://{OUTPUT_BUCKET}/image_blur_transformed/\", engine=\"opencv\"\n", + "# Construct the canonical connection ID\n", + "FULL_CONNECTION_ID = f\"{PROJECT}.{LOCATION}.bigframes-default-connection\"\n", + "\n", + "@bpd.udf(\n", + " input_types=[str, str, str, int, int, bool],\n", + " output_type=str,\n", + " dataset=DATASET_ID,\n", + " name=\"image_blur\",\n", + " bigquery_connection=FULL_CONNECTION_ID,\n", + " packages=[\"opencv-python\", \"numpy\", \"requests\"],\n", ")\n", - "df_image[\"resized\"] = df_image[\"image\"].blob.image_resize(\n", - " (300, 200), dst=f\"gs://{OUTPUT_BUCKET}/image_resize_transformed/\", engine=\"opencv\"\n", + "def image_blur(src_rt: str, dst_rt: str, ext: str, kx: int, ky: int, verbose: bool) -> str:\n", + " import json\n", + " import cv2 as cv\n", + " import numpy as np\n", + " import requests\n", + " from requests import adapters\n", + " try:\n", + " session = requests.Session()\n", + " session.mount(\"https://\", adapters.HTTPAdapter(max_retries=3))\n", + " src_obj, dst_obj = json.loads(src_rt), json.loads(dst_rt)\n", + " src_url, dst_url = src_obj[\"access_urls\"][\"read_url\"], dst_obj[\"access_urls\"][\"write_url\"]\n", + " response = session.get(src_url, timeout=30)\n", + " response.raise_for_status()\n", + " img = cv.imdecode(np.frombuffer(response.content, np.uint8), cv.IMREAD_UNCHANGED)\n", + " kx, ky = int(kx), int(ky)\n", + " img_blurred = cv.blur(img, (kx, ky))\n", + " ext = ext.lower()\n", + " success, encoded = cv.imencode(ext, img_blurred)\n", + " if not success:\n", + " raise ValueError(f\"cv.imencode failed for extension {ext}\")\n", + " session.put(dst_url, data=encoded.tobytes(), headers={\"Content-Type\": \"image/\" + ext.replace(\".\", \"\")}, timeout=30).raise_for_status()\n", + " return json.dumps({\"status\": \"\", \"content\": dst_rt}) if verbose else dst_obj[\"objectref\"][\"uri\"]\n", + " except Exception as e:\n", + " if verbose: return json.dumps({\"status\": str(e), \"content\": \"\"})\n", + " raise e\n", + "\n", + "@bpd.udf(\n", + " input_types=[str, str, str, int, int, float, float, bool],\n", + " output_type=str,\n", + " dataset=DATASET_ID,\n", + " name=\"image_resize\",\n", + " bigquery_connection=FULL_CONNECTION_ID,\n", + " packages=[\"opencv-python\", \"numpy\", \"requests\"],\n", + ")\n", + "def image_resize(src_rt: str, dst_rt: str, ext: str, dx: int, dy: int, fx: float, fy: float, verbose: bool) -> str:\n", + " import json\n", + " import cv2 as cv\n", + " import numpy as np\n", + " import requests\n", + " from requests import adapters\n", + " try:\n", + " session = requests.Session()\n", + " session.mount(\"https://\", adapters.HTTPAdapter(max_retries=3))\n", + " src_obj, dst_obj = json.loads(src_rt), json.loads(dst_rt)\n", + " src_url, dst_url = src_obj[\"access_urls\"][\"read_url\"], dst_obj[\"access_urls\"][\"write_url\"]\n", + " response = session.get(src_url, timeout=30)\n", + " response.raise_for_status()\n", + " img = cv.imdecode(np.frombuffer(response.content, np.uint8), cv.IMREAD_UNCHANGED)\n", + " img_resized = cv.resize(img, dsize=(dx, dy), fx=fx, fy=fy)\n", + " success, encoded = cv.imencode(ext, img_resized)\n", + " session.put(dst_url, data=encoded.tobytes(), headers={\"Content-Type\": \"image/\" + ext.replace(\".\", \"\")}, timeout=30).raise_for_status()\n", + " return json.dumps({\"status\": \"\", \"content\": dst_rt}) if verbose else dst_obj[\"objectref\"][\"uri\"]\n", + " except Exception as e:\n", + " if verbose: return json.dumps({\"status\": str(e), \"content\": \"\"})\n", + " raise e\n", + "\n", + "@bpd.udf(\n", + " input_types=[str, str, str, float, float, str, bool],\n", + " output_type=str,\n", + " dataset=DATASET_ID,\n", + " name=\"image_normalize\",\n", + " bigquery_connection=FULL_CONNECTION_ID,\n", + " packages=[\"opencv-python\", \"numpy\", \"requests\"],\n", ")\n", - "df_image[\"normalized\"] = df_image[\"image\"].blob.image_normalize(\n", - " alpha=50.0,\n", - " beta=150.0,\n", - " norm_type=\"minmax\",\n", - " dst=f\"gs://{OUTPUT_BUCKET}/image_normalize_transformed/\",\n", - " engine=\"opencv\",\n", - ")" + "def image_normalize(src_rt: str, dst_rt: str, ext: str, alpha: float, beta: float, norm_type: str, verbose: bool) -> str:\n", + " import json\n", + " import cv2 as cv\n", + " import numpy as np\n", + " import requests\n", + " from requests import adapters\n", + " try:\n", + " session = requests.Session()\n", + " session.mount(\"https://\", adapters.HTTPAdapter(max_retries=3))\n", + " src_obj, dst_obj = json.loads(src_rt), json.loads(dst_rt)\n", + " src_url, dst_url = src_obj[\"access_urls\"][\"read_url\"], dst_obj[\"access_urls\"][\"write_url\"]\n", + " response = session.get(src_url, timeout=30)\n", + " response.raise_for_status()\n", + " img = cv.imdecode(np.frombuffer(response.content, np.uint8), cv.IMREAD_UNCHANGED)\n", + " norm_map = {\"inf\": cv.NORM_INF, \"l1\": cv.NORM_L1, \"l2\": cv.NORM_L2, \"minmax\": cv.NORM_MINMAX}\n", + " img_normalized = cv.normalize(img, None, alpha=alpha, beta=beta, norm_type=norm_map[norm_type])\n", + " success, encoded = cv.imencode(ext, img_normalized)\n", + " session.put(dst_url, data=encoded.tobytes(), headers={\"Content-Type\": \"image/\" + ext.replace(\".\", \"\")}, timeout=30).raise_for_status()\n", + " return json.dumps({\"status\": \"\", \"content\": dst_rt}) if verbose else dst_obj[\"objectref\"][\"uri\"]\n", + " except Exception as e:\n", + " if verbose: return json.dumps({\"status\": str(e), \"content\": \"\"})\n", + " raise e\n", + "\n", + "def apply_transformation(series, dst_folder, udf, *args, verbose=False):\n", + " import os\n", + " dst_folder = os.path.join(dst_folder, \"\")\n", + " dst_uri = series.blob.uri().str.replace(r\"^.*\\/(.*)$\", rf\"{dst_folder}\\1\", regex=True)\n", + " dst_blob = dst_uri.str.to_blob(connection=FULL_CONNECTION_ID)\n", + " df_transform = bpd.DataFrame({\n", + " \"src_rt\": get_runtime_json_str(series, mode=\"R\"),\n", + " \"dst_rt\": get_runtime_json_str(dst_blob, mode=\"RW\"),\n", + " \"ext\": dst_uri.str.extract(r\"(\\.[0-9a-zA-Z]+$)\")[0]\n", + " })\n", + " res = df_transform.apply(udf, axis=1, args=(*args, verbose))\n", + " return res if verbose else res.str.to_blob(connection=FULL_CONNECTION_ID)\n", + "\n", + "# Apply transformations\n", + "df_image[\"blurred\"] = apply_transformation(df_image[\"image\"], f\"gs://{OUTPUT_BUCKET}/image_blur_transformed/\", image_blur, 20, 20)\n", + "df_image[\"resized\"] = apply_transformation(df_image[\"image\"], f\"gs://{OUTPUT_BUCKET}/image_resize_transformed/\", image_resize, 300, 200, 0.0, 0.0)\n", + "df_image[\"normalized\"] = apply_transformation(df_image[\"image\"], f\"gs://{OUTPUT_BUCKET}/image_normalize_transformed/\", image_normalize, 50.0, 150.0, \"minmax\")" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 10, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -616,133 +703,58 @@ "name": "stderr", "output_type": "stream", "text": [ - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/core/log_adapter.py:182: FunctionAxisOnePreviewWarning: Blob Functions use bigframes DataFrame Managed function with axis=1 senario, which is a preview feature.\n", - " return method(*args, **kwargs)\n" + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", + " return prop(*args, **kwargs)\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dataframe.py:4735: FunctionAxisOnePreviewWarning: DataFrame.apply with parameter axis=1 scenario is in preview.\n", + " warnings.warn(msg, category=bfe.FunctionAxisOnePreviewWarning)\n" ] } ], "source": [ "# You can also chain functions together\n", - "df_image[\"blur_resized\"] = df_image[\"blurred\"].blob.image_resize((300, 200), dst=f\"gs://{OUTPUT_BUCKET}/image_blur_resize_transformed/\", engine=\"opencv\")" + "df_image[\"blur_resized\"] = apply_transformation(df_image[\"blurred\"], f\"gs://{OUTPUT_BUCKET}/image_blur_resize_transformed/\", image_resize, 300, 200, 0.0, 0.0)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Using `verbose` mode for detailed output\\n\n", - "\\n\n", - "All multimodal functions support a `verbose` parameter, which defaults to `False`.\\n\n", - "\\n\n", + "### Using `verbose` mode for detailed output\n", + "\n", + "All multimodal functions support a `verbose` parameter, which defaults to `False`.\n", + "\n", "* When `verbose=False` (the default), the function will only return the main content of the result (e.g., the transformed image, the extracted text).\\n\n", - "* When `verbose=True`, the function returns a `STRUCT` containing two fields:\\n\n", - " * `content`: The main result of the operation.\\n\n", - " * `status`: An informational field. If the operation is successful, this will be empty. If an error occurs during the processing of a specific row, this field will contain the error message, allowing the overall job to complete without failing.\\n\n", - "\\n\n", + "* When `verbose=True`, the function returns a `STRUCT` containing two fields:\n", + " * `content`: The main result of the operation.\n", + " * `status`: An informational field. If the operation is successful, this will be empty. If an error occurs during the processing of a specific row, this field will contain the error message, allowing the overall job to complete without failing.\n", + "\n", "Using `verbose=True` is highly recommended for debugging and for workflows where you need to handle potential failures on a row-by-row basis. Let's see it in action with the `image_blur` function." ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/core/log_adapter.py:182: FunctionAxisOnePreviewWarning: Blob Functions use bigframes DataFrame Managed function with axis=1 senario, which is a preview feature.\n", - " return method(*args, **kwargs)\n", - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", + " return prop(*args, **kwargs)\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dataframe.py:4735: FunctionAxisOnePreviewWarning: DataFrame.apply with parameter axis=1 scenario is in preview.\n", + " warnings.warn(msg, category=bfe.FunctionAxisOnePreviewWarning)\n" ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
blurred_verbose
0{'status': '', 'content': {'uri': 'gs://bigfra...
1{'status': '', 'content': {'uri': 'gs://bigfra...
2{'status': '', 'content': {'uri': 'gs://bigfra...
3{'status': '', 'content': {'uri': 'gs://bigfra...
4{'status': '', 'content': {'uri': 'gs://bigfra...
\n", - "

5 rows × 1 columns

\n", - "
[5 rows x 1 columns in total]" - ], - "text/plain": [ - " blurred_verbose\n", - "0 {'status': '', 'content': {'uri': 'gs://bigfra...\n", - "1 {'status': '', 'content': {'uri': 'gs://bigfra...\n", - "2 {'status': '', 'content': {'uri': 'gs://bigfra...\n", - "3 {'status': '', 'content': {'uri': 'gs://bigfra...\n", - "4 {'status': '', 'content': {'uri': 'gs://bigfra...\n", - "\n", - "[5 rows x 1 columns]" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" } ], "source": [ - "df_image[\"blurred_verbose\"] = df_image[\"image\"].blob.image_blur(\n", - " (20, 20), dst=f\"gs://{OUTPUT_BUCKET}/image_blur_transformed_verbose/\", engine=\"opencv\", verbose=True\n", - ")\n", + "df_image[\"blurred_verbose\"] = apply_transformation(df_image[\"image\"], f\"gs://{OUTPUT_BUCKET}/image_blur_transformed_verbose/\", image_blur, 20, 20, verbose=True)\n", "df_image[[\"blurred_verbose\"]]" ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -751,195 +763,7 @@ "id": "6NGK6GYSU44B", "outputId": "859101c1-2ee4-4f9a-e250-e8947127420a" }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
imageauthorcontent_typesizeupdatedblurredresizednormalizedblur_resizedblurred_verbose
0aliceimage/png15912402025-03-20 17:45:04+00:00{'status': '', 'content': {'uri': 'gs://bigframes_blob_test/image_blur_transformed_verbose/k9-guard-dog-paw-balm.png', 'version': None, 'authorizer': 'bigframes-dev.us.bigframes-default-connection', 'details': None}}
1bobimage/png11829512025-03-20 17:45:02+00:00{'status': '', 'content': {'uri': 'gs://bigframes_blob_test/image_blur_transformed_verbose/k9-guard-dog-hot-spot-spray.png', 'version': None, 'authorizer': 'bigframes-dev.us.bigframes-default-connection', 'details': None}}
2bobimage/png15208842025-03-20 17:44:55+00:00{'status': '', 'content': {'uri': 'gs://bigframes_blob_test/image_blur_transformed_verbose/fluffy-buns-chinchilla-food-variety-pack.png', 'version': None, 'authorizer': 'bigframes-dev.us.bigframes-default-connection', 'details': None}}
3aliceimage/png12354012025-03-20 17:45:19+00:00{'status': '', 'content': {'uri': 'gs://bigframes_blob_test/image_blur_transformed_verbose/purrfect-perch-cat-scratcher.png', 'version': None, 'authorizer': 'bigframes-dev.us.bigframes-default-connection', 'details': None}}
4bobimage/png15919232025-03-20 17:44:47+00:00{'status': '', 'content': {'uri': 'gs://bigframes_blob_test/image_blur_transformed_verbose/chirpy-seed-deluxe-bird-food.png', 'version': None, 'authorizer': 'bigframes-dev.us.bigframes-default-connection', 'details': None}}
\n", - "

5 rows × 10 columns

\n", - "
[5 rows x 10 columns in total]" - ], - "text/plain": [ - " image author content_type \\\n", - "0 {'uri': 'gs://cloud-samples-data/bigquery/tuto... alice image/png \n", - "1 {'uri': 'gs://cloud-samples-data/bigquery/tuto... bob image/png \n", - "2 {'uri': 'gs://cloud-samples-data/bigquery/tuto... bob image/png \n", - "3 {'uri': 'gs://cloud-samples-data/bigquery/tuto... alice image/png \n", - "4 {'uri': 'gs://cloud-samples-data/bigquery/tuto... bob image/png \n", - "\n", - " size updated \\\n", - "0 1591240 2025-03-20 17:45:04+00:00 \n", - "1 1182951 2025-03-20 17:45:02+00:00 \n", - "2 1520884 2025-03-20 17:44:55+00:00 \n", - "3 1235401 2025-03-20 17:45:19+00:00 \n", - "4 1591923 2025-03-20 17:44:47+00:00 \n", - "\n", - " blurred \\\n", - "0 {'uri': 'gs://bigframes_blob_test/image_blur_t... \n", - "1 {'uri': 'gs://bigframes_blob_test/image_blur_t... \n", - "2 {'uri': 'gs://bigframes_blob_test/image_blur_t... \n", - "3 {'uri': 'gs://bigframes_blob_test/image_blur_t... \n", - "4 {'uri': 'gs://bigframes_blob_test/image_blur_t... \n", - "\n", - " resized \\\n", - "0 {'uri': 'gs://bigframes_blob_test/image_resize... \n", - "1 {'uri': 'gs://bigframes_blob_test/image_resize... \n", - "2 {'uri': 'gs://bigframes_blob_test/image_resize... \n", - "3 {'uri': 'gs://bigframes_blob_test/image_resize... \n", - "4 {'uri': 'gs://bigframes_blob_test/image_resize... \n", - "\n", - " normalized \\\n", - "0 {'uri': 'gs://bigframes_blob_test/image_normal... \n", - "1 {'uri': 'gs://bigframes_blob_test/image_normal... \n", - "2 {'uri': 'gs://bigframes_blob_test/image_normal... \n", - "3 {'uri': 'gs://bigframes_blob_test/image_normal... \n", - "4 {'uri': 'gs://bigframes_blob_test/image_normal... \n", - "\n", - " blur_resized \\\n", - "0 {'uri': 'gs://bigframes_blob_test/image_blur_r... \n", - "1 {'uri': 'gs://bigframes_blob_test/image_blur_r... \n", - "2 {'uri': 'gs://bigframes_blob_test/image_blur_r... \n", - "3 {'uri': 'gs://bigframes_blob_test/image_blur_r... \n", - "4 {'uri': 'gs://bigframes_blob_test/image_blur_r... \n", - "\n", - " blurred_verbose \n", - "0 {'status': '', 'content': {'uri': 'gs://bigfra... \n", - "1 {'status': '', 'content': {'uri': 'gs://bigfra... \n", - "2 {'status': '', 'content': {'uri': 'gs://bigfra... \n", - "3 {'status': '', 'content': {'uri': 'gs://bigfra... \n", - "4 {'status': '', 'content': {'uri': 'gs://bigfra... \n", - "\n", - "[5 rows x 10 columns]" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df_image" ] @@ -955,22 +779,11 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "metadata": { "id": "mRUGfcaFVW-3" }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/core/log_adapter.py:182: FutureWarning: Since upgrading the default model can cause unintended breakages, the\n", - "default model will be removed in BigFrames 3.0. Please supply an\n", - "explicit model to avoid this message.\n", - " return method(*args, **kwargs)\n" - ] - } - ], + "outputs": [], "source": [ "from bigframes.ml import llm\n", "gemini = llm.GeminiTextGenerator()" @@ -978,7 +791,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -987,87 +800,7 @@ "id": "DNFP7CbjWdR9", "outputId": "3f90a062-0abc-4bce-f53c-db57b06a14b9" }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ml_generate_text_llm_resultimage
0The item is a tin of K9 Guard dog paw balm.
1The item is K9 Guard Dog Hot Spot Spray.
\n", - "

2 rows × 2 columns

\n", - "
[2 rows x 2 columns in total]" - ], - "text/plain": [ - " ml_generate_text_llm_result \\\n", - "0 The item is a tin of K9 Guard dog paw balm. \n", - "1 The item is K9 Guard Dog Hot Spot Spray. \n", - "\n", - " image \n", - "0 {'uri': 'gs://cloud-samples-data/bigquery/tuto... \n", - "1 {'uri': 'gs://cloud-samples-data/bigquery/tuto... \n", - "\n", - "[2 rows x 2 columns]" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# Ask the same question on the images\n", "df_image = df_image.head(2)\n", @@ -1077,22 +810,11 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "metadata": { "id": "IG3J3HsKhyBY" }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" - ] - } - ], + "outputs": [], "source": [ "# Ask different questions\n", "df_image[\"question\"] = [\"what item is it?\", \"what color is the picture?\"]" @@ -1100,7 +822,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -1109,87 +831,7 @@ "id": "qKOb765IiVuD", "outputId": "731bafad-ea29-463f-c8c1-cb7acfd70e5d" }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ml_generate_text_llm_resultimage
0The item is a tin of K9Guard Dog Paw Balm.
1The bottle is mostly white, with a light blue accents. The background is a light gray. There are also black and green elements on the bottle's label.
\n", - "

2 rows × 2 columns

\n", - "
[2 rows x 2 columns in total]" - ], - "text/plain": [ - " ml_generate_text_llm_result \\\n", - "0 The item is a tin of K9Guard Dog Paw Balm. \n", - "1 The bottle is mostly white, with a light blue ... \n", - "\n", - " image \n", - "0 {'uri': 'gs://cloud-samples-data/bigquery/tuto... \n", - "1 {'uri': 'gs://cloud-samples-data/bigquery/tuto... \n", - "\n", - "[2 rows x 2 columns]" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "answer_alt = gemini.predict(df_image, prompt=[df_image[\"question\"], df_image[\"image\"]])\n", "answer_alt[[\"ml_generate_text_llm_result\", \"image\"]]" @@ -1197,7 +839,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -1206,104 +848,7 @@ "id": "KATVv2CO5RT1", "outputId": "6ec01f27-70b6-4f69-c545-e5e3c879480c" }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/core/log_adapter.py:182: FutureWarning: Since upgrading the default model can cause unintended breakages, the\n", - "default model will be removed in BigFrames 3.0. Please supply an\n", - "explicit model to avoid this message.\n", - " return method(*args, **kwargs)\n", - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ml_generate_embedding_resultml_generate_embedding_statusml_generate_embedding_start_secml_generate_embedding_end_seccontent
0[ 0.00638842 0.01666344 0.00451782 ... -0.02...<NA><NA>{\"access_urls\":{\"expiry_time\":\"2025-10-25T00:2...
1[ 0.00973689 0.02148374 0.00244311 ... 0.00...<NA><NA>{\"access_urls\":{\"expiry_time\":\"2025-10-25T00:2...
\n", - "

2 rows × 5 columns

\n", - "
[2 rows x 5 columns in total]" - ], - "text/plain": [ - " ml_generate_embedding_result \\\n", - "0 [ 0.00638842 0.01666344 0.00451782 ... -0.02... \n", - "1 [ 0.00973689 0.02148374 0.00244311 ... 0.00... \n", - "\n", - " ml_generate_embedding_status ml_generate_embedding_start_sec \\\n", - "0 \n", - "1 \n", - "\n", - " ml_generate_embedding_end_sec \\\n", - "0 \n", - "1 \n", - "\n", - " content \n", - "0 {\"access_urls\":{\"expiry_time\":\"2025-10-25T00:2... \n", - "1 {\"access_urls\":{\"expiry_time\":\"2025-10-25T00:2... \n", - "\n", - "[2 rows x 5 columns]" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# Generate embeddings.\n", "embed_model = llm.MultimodalEmbeddingGenerator()\n", @@ -1443,7 +988,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -1455,18 +1000,7 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:987: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" - ] - } - ], + "outputs": [], "source": [ "# The audio_transcribe function is a convenience wrapper around bigframes.bigquery.ai.generate.\n", "# Here's how to perform the same operation directly:\n", @@ -1498,22 +1032,7 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
0    {'status': '', 'content': 'Now, as all books, ...
" - ], - "text/plain": [ - "0 {'status': '', 'content': 'Now, as all books, ...\n", - "Name: transcription_results, dtype: struct[pyarrow]" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# To get verbose results (including status), we can extract both fields from the result struct.\n", "transcribed_content_series = transcribed_results.struct.field(\"result\")\n", @@ -1632,7 +1151,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.15" + "version": "3.13.0" } }, "nbformat": 4, From f525c17d386605d3fbf08eda81b6a072bdfc4217 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Fri, 13 Feb 2026 19:58:07 +0000 Subject: [PATCH 2/8] docs: run notebook --- .../multimodal/multimodal_dataframe.ipynb | 900 +++++++++--------- 1 file changed, 434 insertions(+), 466 deletions(-) diff --git a/notebooks/multimodal/multimodal_dataframe.ipynb b/notebooks/multimodal/multimodal_dataframe.ipynb index 288fdc865c..0b72720a92 100644 --- a/notebooks/multimodal/multimodal_dataframe.ipynb +++ b/notebooks/multimodal/multimodal_dataframe.ipynb @@ -83,7 +83,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -92,7 +92,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -216,7 +216,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -226,8 +226,13 @@ }, "outputs": [], "source": [ - "### 3. Conduct image transformations\n", - "This section demonstrates how to perform image transformations like blur, resize, and normalize using custom BigQuery Python UDFs and the `opencv-python` library." + "# Create blob columns from wildcard path.\n", + "df_image = bpd.from_glob_path(\n", + " \"gs://cloud-samples-data/bigquery/tutorials/cymbal-pets/images/*\", name=\"image\"\n", + ")\n", + "\n", + "# From an existing object table\n", + "# df = bpd.read_gbq_object_table(\"\", name=\"blob_col\")" ] }, { @@ -281,36 +286,36 @@ " \n", " \n", " 0\n", - " \n", + " \n", " \n", " \n", " 1\n", - " \n", + " \n", " \n", " \n", " 2\n", - " \n", + " \n", " \n", " \n", " 3\n", - " \n", + " \n", " \n", " \n", " 4\n", - " \n", + " \n", " \n", " \n", "\n", - "

5 rows \u00d7 1 columns

\n", + "

5 rows × 1 columns

\n", "[5 rows x 1 columns in total]" ], "text/plain": [ " image\n", - "0 {\"access_urls\":{\"expiry_time\":\"2026-02-13T01:5...\n", - "1 {\"access_urls\":{\"expiry_time\":\"2026-02-13T01:5...\n", - "2 {\"access_urls\":{\"expiry_time\":\"2026-02-13T01:5...\n", - "3 {\"access_urls\":{\"expiry_time\":\"2026-02-13T01:5...\n", - "4 {\"access_urls\":{\"expiry_time\":\"2026-02-13T01:5...\n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-02-14T01:5...\n", + "1 {\"access_urls\":{\"expiry_time\":\"2026-02-14T01:5...\n", + "2 {\"access_urls\":{\"expiry_time\":\"2026-02-14T01:5...\n", + "3 {\"access_urls\":{\"expiry_time\":\"2026-02-14T01:5...\n", + "4 {\"access_urls\":{\"expiry_time\":\"2026-02-14T01:5...\n", "\n", "[5 rows x 1 columns]" ] @@ -320,6 +325,248 @@ "output_type": "execute_result" } ], + "source": [ + "# Take only the 5 images to deal with. Preview the content of the Mutimodal DataFrame\n", + "df_image = df_image.head(5)\n", + "df_image" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "b6RRZb3qPi_T" + }, + "source": [ + "### 2. Combine unstructured data with structured data" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4YJCdmLtR-qu" + }, + "source": [ + "Now you can put more information into the table to describe the files. Such as author info from inputs, or other metadata from the gcs object itself." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "id": "YYYVn7NDH0Me" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:987: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", + " return prop(*args, **kwargs)\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
imageauthorcontent_typesizeupdated
0aliceimage/png15912402025-03-20 17:45:04+00:00
1bobimage/png11829512025-03-20 17:45:02+00:00
2bobimage/png15208842025-03-20 17:44:55+00:00
3aliceimage/png12354012025-03-20 17:45:19+00:00
4bobimage/png15919232025-03-20 17:44:47+00:00
\n", + "

5 rows × 5 columns

\n", + "
[5 rows x 5 columns in total]" + ], + "text/plain": [ + " image author content_type \\\n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-02-14T01:5... alice image/png \n", + "1 {\"access_urls\":{\"expiry_time\":\"2026-02-14T01:5... bob image/png \n", + "2 {\"access_urls\":{\"expiry_time\":\"2026-02-14T01:5... bob image/png \n", + "3 {\"access_urls\":{\"expiry_time\":\"2026-02-14T01:5... alice image/png \n", + "4 {\"access_urls\":{\"expiry_time\":\"2026-02-14T01:5... bob image/png \n", + "\n", + " size updated \n", + "0 1591240 2025-03-20 17:45:04+00:00 \n", + "1 1182951 2025-03-20 17:45:02+00:00 \n", + "2 1520884 2025-03-20 17:44:55+00:00 \n", + "3 1235401 2025-03-20 17:45:19+00:00 \n", + "4 1591923 2025-03-20 17:44:47+00:00 \n", + "\n", + "[5 rows x 5 columns]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Combine unstructured data with structured data\n", + "df_image = df_image.head(5)\n", + "df_image[\"author\"] = [\"alice\", \"bob\", \"bob\", \"alice\", \"bob\"] # type: ignore\n", + "df_image[\"content_type\"] = get_content_type(df_image[\"image\"])\n", + "df_image[\"size\"] = get_size(df_image[\"image\"])\n", + "df_image[\"updated\"] = get_updated(df_image[\"image\"])\n", + "df_image" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "NUd4Kog_QLRS" + }, + "source": [ + "Then you can filter the rows based on the structured data. And for different content types, you can display them respectively or together." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 75 + }, + "id": "UGuAk9PNDRF3", + "outputId": "73feb33d-4a05-48fb-96e5-3c48c2a456f3" + }, + "outputs": [ + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# filter images and display, you can also display audio and video types\n", + "display_blob(df_image[df_image[\"author\"] == \"alice\"][\"image\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 3. Conduct image transformations" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This section demonstrates how to perform image transformations like blur, resize, and normalize using custom BigQuery Python UDFs and the `opencv-python` library." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 487 + }, + "id": "HhCb8jRsLe9B", + "outputId": "03081cf9-3a22-42c9-b38f-649f592fdada" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/pandas/__init__.py:151: PreviewWarning: udf is in preview.\n", + " return global_session.with_default_session(\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dataframe.py:4735: FunctionAxisOnePreviewWarning: DataFrame.apply with parameter axis=1 scenario is in preview.\n", + " warnings.warn(msg, category=bfe.FunctionAxisOnePreviewWarning)\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dataframe.py:4735: FunctionAxisOnePreviewWarning: DataFrame.apply with parameter axis=1 scenario is in preview.\n", + " warnings.warn(msg, category=bfe.FunctionAxisOnePreviewWarning)\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dataframe.py:4735: FunctionAxisOnePreviewWarning: DataFrame.apply with parameter axis=1 scenario is in preview.\n", + " warnings.warn(msg, category=bfe.FunctionAxisOnePreviewWarning)\n" + ] + } + ], "source": [ "# Construct the canonical connection ID\n", "FULL_CONNECTION_ID = f\"{PROJECT}.{LOCATION}.bigframes-default-connection\"\n", @@ -422,7 +669,10 @@ "def apply_transformation(series, dst_folder, udf, *args, verbose=False):\n", " import os\n", " dst_folder = os.path.join(dst_folder, \"\")\n", - " dst_uri = series.blob.uri().str.replace(r\"^.*\\/(.*)$\", rf\"{dst_folder}\\1\", regex=True)\n", + " # Fetch metadata to get the URI\n", + " metadata = bbq.obj.fetch_metadata(series)\n", + " current_uri = metadata.struct.field(\"uri\")\n", + " dst_uri = current_uri.str.replace(r\"^.*\\/(.*)$\", rf\"{dst_folder}\\1\", regex=True)\n", " dst_blob = dst_uri.str.to_blob(connection=FULL_CONNECTION_ID)\n", " df_transform = bpd.DataFrame({\n", " \"src_rt\": get_runtime_json_str(series, mode=\"R\"),\n", @@ -459,7 +709,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 10, "metadata": { "id": "YYYVn7NDH0Me" }, @@ -468,12 +718,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:987: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", - " return prop(*args, **kwargs)\n" + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dataframe.py:4735: FunctionAxisOnePreviewWarning: DataFrame.apply with parameter axis=1 scenario is in preview.\n", + " warnings.warn(msg, category=bfe.FunctionAxisOnePreviewWarning)\n" ] }, { @@ -497,78 +743,47 @@ " \n", " \n", " \n", - " image\n", - " author\n", - " content_type\n", - " size\n", - " updated\n", + " blurred_verbose\n", " \n", " \n", " \n", " \n", " 0\n", - " \n", - " alice\n", - " image/png\n", - " 1591240\n", - " 2025-03-20 17:45:04+00:00\n", + " {\"status\": \"\", \"content\": \"{\\\"access_urls\\\":{\\...\n", " \n", " \n", " 1\n", - " \n", - " bob\n", - " image/png\n", - " 1182951\n", - " 2025-03-20 17:45:02+00:00\n", + " {\"status\": \"\", \"content\": \"{\\\"access_urls\\\":{\\...\n", " \n", " \n", " 2\n", - " \n", - " bob\n", - " image/png\n", - " 1520884\n", - " 2025-03-20 17:44:55+00:00\n", + " {\"status\": \"\", \"content\": \"{\\\"access_urls\\\":{\\...\n", " \n", " \n", " 3\n", - " \n", - " alice\n", - " image/png\n", - " 1235401\n", - " 2025-03-20 17:45:19+00:00\n", + " {\"status\": \"\", \"content\": \"{\\\"access_urls\\\":{\\...\n", " \n", " \n", " 4\n", - " \n", - " bob\n", - " image/png\n", - " 1591923\n", - " 2025-03-20 17:44:47+00:00\n", + " {\"status\": \"\", \"content\": \"{\\\"access_urls\\\":{\\...\n", " \n", " \n", "\n", - "

5 rows \u00d7 5 columns

\n", - "[5 rows x 5 columns in total]" + "

5 rows × 1 columns

\n", + "[5 rows x 1 columns in total]" ], "text/plain": [ - " image author content_type \\\n", - "0 {\"access_urls\":{\"expiry_time\":\"2026-02-13T01:5... alice image/png \n", - "1 {\"access_urls\":{\"expiry_time\":\"2026-02-13T01:5... bob image/png \n", - "2 {\"access_urls\":{\"expiry_time\":\"2026-02-13T01:5... bob image/png \n", - "3 {\"access_urls\":{\"expiry_time\":\"2026-02-13T01:5... alice image/png \n", - "4 {\"access_urls\":{\"expiry_time\":\"2026-02-13T01:5... bob image/png \n", - "\n", - " size updated \n", - "0 1591240 2025-03-20 17:45:04+00:00 \n", - "1 1182951 2025-03-20 17:45:02+00:00 \n", - "2 1520884 2025-03-20 17:44:55+00:00 \n", - "3 1235401 2025-03-20 17:45:19+00:00 \n", - "4 1591923 2025-03-20 17:44:47+00:00 \n", + " blurred_verbose\n", + "0 {\"status\": \"\", \"content\": \"{\\\"access_urls\\\":{\\...\n", + "1 {\"status\": \"\", \"content\": \"{\\\"access_urls\\\":{\\...\n", + "2 {\"status\": \"\", \"content\": \"{\\\"access_urls\\\":{\\...\n", + "3 {\"status\": \"\", \"content\": \"{\\\"access_urls\\\":{\\...\n", + "4 {\"status\": \"\", \"content\": \"{\\\"access_urls\\\":{\\...\n", "\n", - "[5 rows x 5 columns]" + "[5 rows x 1 columns]" ] }, - "execution_count": 7, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -589,7 +804,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 11, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -602,7 +817,7 @@ { "data": { "text/html": [ - "" + "" ], "text/plain": [ "" @@ -614,7 +829,7 @@ { "data": { "text/html": [ - "" + "" ], "text/plain": [ "" @@ -641,7 +856,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 12, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -654,46 +869,24 @@ "name": "stderr", "output_type": "stream", "text": [ - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/core/log_adapter.py:182: FunctionAxisOnePreviewWarning: Blob Functions use bigframes DataFrame Managed function with axis=1 senario, which is a preview feature.\n", - " return method(*args, **kwargs)\n", - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/core/log_adapter.py:182: FunctionAxisOnePreviewWarning: Blob Functions use bigframes DataFrame Managed function with axis=1 senario, which is a preview feature.\n", - " return method(*args, **kwargs)\n", - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/core/log_adapter.py:182: FunctionAxisOnePreviewWarning: Blob Functions use bigframes DataFrame Managed function with axis=1 senario, which is a preview feature.\n", - " return method(*args, **kwargs)\n" + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dataframe.py:4735: FunctionAxisOnePreviewWarning: DataFrame.apply with parameter axis=1 scenario is in preview.\n", + " warnings.warn(msg, category=bfe.FunctionAxisOnePreviewWarning)\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dataframe.py:4735: FunctionAxisOnePreviewWarning: DataFrame.apply with parameter axis=1 scenario is in preview.\n", + " warnings.warn(msg, category=bfe.FunctionAxisOnePreviewWarning)\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dataframe.py:4735: FunctionAxisOnePreviewWarning: DataFrame.apply with parameter axis=1 scenario is in preview.\n", + " warnings.warn(msg, category=bfe.FunctionAxisOnePreviewWarning)\n" ] } ], "source": [ - "df_image[\"blurred\"] = df_image[\"image\"].blob.image_blur(\n", - " (20, 20), dst=f\"gs://{OUTPUT_BUCKET}/image_blur_transformed/\", engine=\"opencv\"\n", - ")\n", - "df_image[\"resized\"] = df_image[\"image\"].blob.image_resize(\n", - " (300, 200), dst=f\"gs://{OUTPUT_BUCKET}/image_resize_transformed/\", engine=\"opencv\"\n", - ")\n", - "df_image[\"normalized\"] = df_image[\"image\"].blob.image_normalize(\n", - " alpha=50.0,\n", - " beta=150.0,\n", - " norm_type=\"minmax\",\n", - " dst=f\"gs://{OUTPUT_BUCKET}/image_normalize_transformed/\",\n", - " engine=\"opencv\",\n", - ")" + "df_image[\"blurred\"] = apply_transformation(df_image[\"image\"], f\"gs://{OUTPUT_BUCKET}/image_blur_transformed/\", image_blur, 20, 20)\n", + "df_image[\"resized\"] = apply_transformation(df_image[\"image\"], f\"gs://{OUTPUT_BUCKET}/image_resize_transformed/\", image_resize, 300, 200, 0.0, 0.0)\n", + "df_image[\"normalized\"] = apply_transformation(df_image[\"image\"], f\"gs://{OUTPUT_BUCKET}/image_normalize_transformed/\", image_normalize, 50.0, 150.0, \"minmax\")" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 13, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -706,55 +899,43 @@ "name": "stderr", "output_type": "stream", "text": [ - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/core/log_adapter.py:182: FunctionAxisOnePreviewWarning: Blob Functions use bigframes DataFrame Managed function with axis=1 senario, which is a preview feature.\n", - " return method(*args, **kwargs)\n" + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dataframe.py:4735: FunctionAxisOnePreviewWarning: DataFrame.apply with parameter axis=1 scenario is in preview.\n", + " warnings.warn(msg, category=bfe.FunctionAxisOnePreviewWarning)\n" ] } ], "source": [ "# You can also chain functions together\n", - "df_image[\"blur_resized\"] = df_image[\"blurred\"].blob.image_resize((300, 200), dst=f\"gs://{OUTPUT_BUCKET}/image_blur_resize_transformed/\", engine=\"opencv\")" + "df_image[\"blur_resized\"] = apply_transformation(df_image[\"blurred\"], f\"gs://{OUTPUT_BUCKET}/image_blur_resize_transformed/\", image_resize, 300, 200, 0.0, 0.0)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Using `verbose` mode for detailed output\\n\n", - "\\n\n", - "All multimodal functions support a `verbose` parameter, which defaults to `False`.\\n\n", + "### Using `verbose` mode for detailed output\n", + "\n", + "All multimodal functions support a `verbose` parameter, which defaults to `False`.\n", "\\n\n", "* When `verbose=False` (the default), the function will only return the main content of the result (e.g., the transformed image, the extracted text).\\n\n", - "* When `verbose=True`, the function returns a `STRUCT` containing two fields:\\n\n", - " * `content`: The main result of the operation.\\n\n", - " * `status`: An informational field. If the operation is successful, this will be empty. If an error occurs during the processing of a specific row, this field will contain the error message, allowing the overall job to complete without failing.\\n\n", - "\\n\n", + "* When `verbose=True`, the function returns a `STRUCT` containing two fields:\n", + " * `content`: The main result of the operation.\n", + " * `status`: An informational field. If the operation is successful, this will be empty. If an error occurs during the processing of a specific row, this field will contain the error message, allowing the overall job to complete without failing.\n", + "\n", "Using `verbose=True` is highly recommended for debugging and for workflows where you need to handle potential failures on a row-by-row basis. Let's see it in action with the `image_blur` function." ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/core/log_adapter.py:182: FunctionAxisOnePreviewWarning: Blob Functions use bigframes DataFrame Managed function with axis=1 senario, which is a preview feature.\n", - " return method(*args, **kwargs)\n", - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dataframe.py:4735: FunctionAxisOnePreviewWarning: DataFrame.apply with parameter axis=1 scenario is in preview.\n", + " warnings.warn(msg, category=bfe.FunctionAxisOnePreviewWarning)\n" ] }, { @@ -784,55 +965,53 @@ " \n", " \n", " 0\n", - " {'status': '', 'content': {'uri': 'gs://bigfra...\n", + " {\"status\": \"\", \"content\": \"{\\\"access_urls\\\":{\\...\n", " \n", " \n", " 1\n", - " {'status': '', 'content': {'uri': 'gs://bigfra...\n", + " {\"status\": \"\", \"content\": \"{\\\"access_urls\\\":{\\...\n", " \n", " \n", " 2\n", - " {'status': '', 'content': {'uri': 'gs://bigfra...\n", + " {\"status\": \"\", \"content\": \"{\\\"access_urls\\\":{\\...\n", " \n", " \n", " 3\n", - " {'status': '', 'content': {'uri': 'gs://bigfra...\n", + " {\"status\": \"\", \"content\": \"{\\\"access_urls\\\":{\\...\n", " \n", " \n", " 4\n", - " {'status': '', 'content': {'uri': 'gs://bigfra...\n", + " {\"status\": \"\", \"content\": \"{\\\"access_urls\\\":{\\...\n", " \n", " \n", "\n", - "

5 rows \u00d7 1 columns

\n", + "

5 rows × 1 columns

\n", "[5 rows x 1 columns in total]" ], "text/plain": [ " blurred_verbose\n", - "0 {'status': '', 'content': {'uri': 'gs://bigfra...\n", - "1 {'status': '', 'content': {'uri': 'gs://bigfra...\n", - "2 {'status': '', 'content': {'uri': 'gs://bigfra...\n", - "3 {'status': '', 'content': {'uri': 'gs://bigfra...\n", - "4 {'status': '', 'content': {'uri': 'gs://bigfra...\n", + "0 {\"status\": \"\", \"content\": \"{\\\"access_urls\\\":{\\...\n", + "1 {\"status\": \"\", \"content\": \"{\\\"access_urls\\\":{\\...\n", + "2 {\"status\": \"\", \"content\": \"{\\\"access_urls\\\":{\\...\n", + "3 {\"status\": \"\", \"content\": \"{\\\"access_urls\\\":{\\...\n", + "4 {\"status\": \"\", \"content\": \"{\\\"access_urls\\\":{\\...\n", "\n", "[5 rows x 1 columns]" ] }, - "execution_count": 10, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "df_image[\"blurred_verbose\"] = df_image[\"image\"].blob.image_blur(\n", - " (20, 20), dst=f\"gs://{OUTPUT_BUCKET}/image_blur_transformed_verbose/\", engine=\"opencv\", verbose=True\n", - ")\n", + "df_image[\"blurred_verbose\"] = apply_transformation(df_image[\"image\"], f\"gs://{OUTPUT_BUCKET}/image_blur_transformed_verbose/\", image_blur, 20, 20, verbose=True)\n", "df_image[[\"blurred_verbose\"]]" ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 15, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -846,26 +1025,20 @@ "name": "stderr", "output_type": "stream", "text": [ - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:987: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", "instead of using `db_dtypes` in the future when available in pandas\n", "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", + " return prop(*args, **kwargs)\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", + " return prop(*args, **kwargs)\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", + " return prop(*args, **kwargs)\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", + " return prop(*args, **kwargs)\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", + " return prop(*args, **kwargs)\n" ] }, { @@ -897,88 +1070,88 @@ " blurred\n", " resized\n", " normalized\n", - " blur_resized\n", " blurred_verbose\n", + " blur_resized\n", " \n", " \n", " \n", " \n", " 0\n", - " \n", + " \n", " alice\n", " image/png\n", " 1591240\n", " 2025-03-20 17:45:04+00:00\n", - " \n", - " \n", - " \n", - " \n", - " {'status': '', 'content': {'uri': 'gs://bigframes_blob_test/image_blur_transformed_verbose/k9-guard-dog-paw-balm.png', 'version': None, 'authorizer': 'bigframes-dev.us.bigframes-default-connection', 'details': None}}\n", + " \n", + " \n", + " \n", + " {\"status\": \"\", \"content\": \"{\\\"access_urls\\\":{\\\"expiry_time\\\":\\\"2026-02-14T01:54:36Z\\\",\\\"read_url\\\":\\\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed_verbose%2Fk9-guard-dog-paw-balm.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260213%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20260213T195436Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&X-Goog-Signature=023abcc625f85fdc1ac5ebb937bea4795f0dd4c81407b5669495c882a80cc4e097315c70242d7c7bd270ab39a4a23a6bcb1f1abb055cdadb5fb92cbd243c68158f8c384274f9da5058e884d00aa2c37503d01b7da715f0287c899df4f74beb82be069de2d1062c79a19675491fa21df859132a93cae3d69056472671972da4147b3e62bdd2594707848237b8fc8dd9ecbf49e8f8e1900e1d045d5ae58103ead988f0e499cbfcc6301982682d490c3fe2856ffe82f96b4d6f1274174b6d1b4b6b70d4edf2625feb23d5923060d7a471adeebc46159124fc0a16618711b410c243dec2c24c024bdbceb3e16d3e61febf35181e6f616a8bb57052f48a554aa46b45\\\",\\\"write_url\\\":\\\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed_verbose%2Fk9-guard-dog-paw-balm.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260213%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20260213T195436Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&X-Goog-Signature=6c057949db2bffffa248cd691152f21d9d7d013649cf306ad9a93e08eaf24721691fefc8ee08a300f19e488e3dabf784ce79322185c81c1bc7ee7a45228b36e335a06ce5d9aa10ee1c1d349d377d81fae6943110e4a8d66eb6a1c4a25fba1cff76cb698ed132aeeb26953be41d13d47bdec08dbe41c284cae5af05b18ea0376ab15d2d926b534743cb96a5c5025a4d5e1b4c097c518b19f3ddb00d5db1da2554b37b33c6793cd349458b30ccb4f97f2c9fb2415ecb03b331f7eeb822700c8c5231542b7ddb3ba8143862fb938ecbf389c35c4ae6e9c87f7091007be8559dfc8cd0f2c62fcd1e686121d244172148d2245971dea14aa49ab8d99bbca192a0f998\\\"},\\\"objectref\\\":{\\\"authorizer\\\":\\\"bigframes-dev.us.bigframes-default-connection\\\",\\\"uri\\\":\\\"gs://bigframes_blob_test/image_blur_transformed_verbose/k9-guard-dog-paw-balm.png\\\"}}\"}\n", + " \n", " \n", " \n", " 1\n", - " \n", + " \n", " bob\n", " image/png\n", " 1182951\n", " 2025-03-20 17:45:02+00:00\n", - " \n", - " \n", - " \n", - " \n", - " {'status': '', 'content': {'uri': 'gs://bigframes_blob_test/image_blur_transformed_verbose/k9-guard-dog-hot-spot-spray.png', 'version': None, 'authorizer': 'bigframes-dev.us.bigframes-default-connection', 'details': None}}\n", + " \n", + " \n", + " \n", + " {\"status\": \"\", \"content\": \"{\\\"access_urls\\\":{\\\"expiry_time\\\":\\\"2026-02-14T01:54:36Z\\\",\\\"read_url\\\":\\\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed_verbose%2Fk9-guard-dog-hot-spot-spray.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260213%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20260213T195436Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&X-Goog-Signature=0ba9574610b9e22ddfcdecdc7fdcdba1a055d7438207449537e65e176f5192df72342b2f3b6a4c2af4d1f25bb0c56d5a50d3a7ef0428830542112127add8b49073aa45e8f3a90d9948efe8d36e89e3c204f711147bb232db4eb5ec74be15cdeea78846864ef10ae336133955e9ffc3b48898ee28c31796f03a86cd6ec33c6833429babda1533621cba844b0dba1584afec5a2328b113c5f503157d984bcb08f37c4dc80fdb850af116e26117041a2d5c509393b5de5be40828ba47513258499f6daa9be6d1ac46a8f86eceae170c10317042abd9441355f53ed1cf7b0ed6ef6942175f768271a5658cdb578d85e7a0d7c27a734fe822b8b9c2a573cd81d00008\\\",\\\"write_url\\\":\\\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed_verbose%2Fk9-guard-dog-hot-spot-spray.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260213%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20260213T195436Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&X-Goog-Signature=a202ed2db5f9df354dded2df43c960540dbd13b2179aa360b8c957e14a9eaeb02343e9f3327018cbfe39604922147a55cd016366c6ec9b35eeda317986fb38f14f47643ce5fab9efcd2968300485ff9597b58ea1c6d583c3e8b8c9afe35c0eafc5f70c15133a850c0bd7277550711340275881c2f76ce00157acca3a2b37cef12ee4a0bb9e755f7f1ccce8143acb8b6c94b5ce032e8e0ec6776370250303f4a7cda13ca3ecae0dad91ae7d9e829060011bdb063f51ea9f49e743ccc99e2dd2a9706f8d492f4f319eb753e4b70cd24653e7bb878a43b93dcca378399a70524b3f751982ddb72867bdf00d54bb0792273367ba7fd9dae327d67296ae569961ac8c\\\"},\\\"objectref\\\":{\\\"authorizer\\\":\\\"bigframes-dev.us.bigframes-default-connection\\\",\\\"uri\\\":\\\"gs://bigframes_blob_test/image_blur_transformed_verbose/k9-guard-dog-hot-spot-spray.png\\\"}}\"}\n", + " \n", " \n", " \n", " 2\n", - " \n", + " \n", " bob\n", " image/png\n", " 1520884\n", " 2025-03-20 17:44:55+00:00\n", - " \n", - " \n", - " \n", - " \n", - " {'status': '', 'content': {'uri': 'gs://bigframes_blob_test/image_blur_transformed_verbose/fluffy-buns-chinchilla-food-variety-pack.png', 'version': None, 'authorizer': 'bigframes-dev.us.bigframes-default-connection', 'details': None}}\n", + " \n", + " \n", + " \n", + " {\"status\": \"\", \"content\": \"{\\\"access_urls\\\":{\\\"expiry_time\\\":\\\"2026-02-14T01:54:36Z\\\",\\\"read_url\\\":\\\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed_verbose%2Ffluffy-buns-chinchilla-food-variety-pack.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260213%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20260213T195436Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&X-Goog-Signature=a373a46b9148a831b260341ca1c616a71313d10f31ee0fbb1e782c4ea00581244dbac2db9d929bf48150320d21d6934a6e4393b07e4057b50066dfab66881f0185db8cc5598f7456f5c3b9fac176f755de22819bdfb093b9f44eeed862d84e67a0ed39f450bc19330fa9970a85df94107092b8ed698fc60a9c5ad351d3db547407441c6018d428ff370cfd0fb780017152e864dcc6379c1caf95bf3213bf91577af6f342bb8054002390a43d210a3ae8dc82291282b6817e2ed3bc2b13e2b96d2d036a3bea121a128ef49bdba5bed70c482b62c7ce9cd1abecc84308ec8309c7aca2b99315016e136042a8dd1f820a73951212663ee386177da4b040242350a4\\\",\\\"write_url\\\":\\\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed_verbose%2Ffluffy-buns-chinchilla-food-variety-pack.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260213%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20260213T195436Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&X-Goog-Signature=651c5db745f3342c1c56d5c1480e5c9939059214def05a41c61f83870953be8c4a18e9a46918612915368f5ea931a31872fb598235df2f948b2a9503323ecab43fc287e57355c0b1fd17383eada4776861ab67f4ba5d4f108d3c414c783daa9938826de724c50c4c635c2980fa699d3268524f88f4db5bff5110d0dc927406dfd51fb5c79e8616abbc03790f88b53bf2f0500880ce30d45c82c972c073d3f54bd1795b13f3bb364d071ef662e007a3c57aa7c5ceb83d54519f80273739102d7a3dd561834bae6ceccbe2630fb0e383c89268131c7532ecb9de8eab68fa20bfeec4f1e4209136bcf93cd1e202893070021c157e3066070e74a14f2b3fa2746b5a\\\"},\\\"objectref\\\":{\\\"authorizer\\\":\\\"bigframes-dev.us.bigframes-default-connection\\\",\\\"uri\\\":\\\"gs://bigframes_blob_test/image_blur_transformed_verbose/fluffy-buns-chinchilla-food-variety-pack.png\\\"}}\"}\n", + " \n", " \n", " \n", " 3\n", - " \n", + " \n", " alice\n", " image/png\n", " 1235401\n", " 2025-03-20 17:45:19+00:00\n", - " \n", - " \n", - " \n", - " \n", - " {'status': '', 'content': {'uri': 'gs://bigframes_blob_test/image_blur_transformed_verbose/purrfect-perch-cat-scratcher.png', 'version': None, 'authorizer': 'bigframes-dev.us.bigframes-default-connection', 'details': None}}\n", + " \n", + " \n", + " \n", + " {\"status\": \"\", \"content\": \"{\\\"access_urls\\\":{\\\"expiry_time\\\":\\\"2026-02-14T01:54:36Z\\\",\\\"read_url\\\":\\\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed_verbose%2Fpurrfect-perch-cat-scratcher.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260213%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20260213T195436Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&X-Goog-Signature=17e58c0f0b096057a970ca342d4afd32ad2d0bd094e006979eb190a7b65a3542d513f98ea4699613dc9d6262b31ad98064bc02121d2c60cd720b762c42c4c4ddeaf4231b25f8d7cf1ecca8f562669fcac9ade88d3d634a46b85a4c37759f51f06a076d925af6fe5a2814934956581f62633f1f6c7f1604eab21a2b15841cef353f0c34ecb1d521571f4bbd25073671d4eaf607229cc519a52c72bce988d40a530c9bce972705ea6ba49193d2547c00b866874eaa6d3c2f422c34d06f4b726da6ddf5d1d0c8f4c93fab9349acad923ac2392d00ff2f53f7796feb7b58c7849f066f61ce7fe4e113eb15c8d0e866498a9f2f6c99a436af5a9553b0fcd747210eb6\\\",\\\"write_url\\\":\\\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed_verbose%2Fpurrfect-perch-cat-scratcher.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260213%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20260213T195436Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&X-Goog-Signature=1e46e8a5164b8959e89bd2641a74b82a152b2d9e8a46ce90569e356edb9897984ce3d7cf531572fbe453ae2b649f633c6e99dd16f97e757a9fbed0fa4d49fcd0051f213d5927070a98e77fc232356298da3e62e446bf58331c0cf4d72267452e4ddbf5a40e39635a516b064e028b8db339fee32b2803015dd97763ca23ca0f9d3fed8efd93f41ce5ff96eed74f700e920f5ffc44dff164f410ad6c1d2857f59d0841bbc7b2ecabb59acd95257ca645002dd8a12bbaa6bb09668bcfc6d1edafb76a18e3d275e57a9a4f1b41bf75edc54eaff26f99abcdf0b8d81fca1ce66881900de7ee680cae13dcddaa41431abada65cfc9fec40e19886791c9698c0bd124c9\\\"},\\\"objectref\\\":{\\\"authorizer\\\":\\\"bigframes-dev.us.bigframes-default-connection\\\",\\\"uri\\\":\\\"gs://bigframes_blob_test/image_blur_transformed_verbose/purrfect-perch-cat-scratcher.png\\\"}}\"}\n", + " \n", " \n", " \n", " 4\n", - " \n", + " \n", " bob\n", " image/png\n", " 1591923\n", " 2025-03-20 17:44:47+00:00\n", - " \n", - " \n", - " \n", - " \n", - " {'status': '', 'content': {'uri': 'gs://bigframes_blob_test/image_blur_transformed_verbose/chirpy-seed-deluxe-bird-food.png', 'version': None, 'authorizer': 'bigframes-dev.us.bigframes-default-connection', 'details': None}}\n", + " \n", + " \n", + " \n", + " {\"status\": \"\", \"content\": \"{\\\"access_urls\\\":{\\\"expiry_time\\\":\\\"2026-02-14T01:54:36Z\\\",\\\"read_url\\\":\\\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed_verbose%2Fchirpy-seed-deluxe-bird-food.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260213%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20260213T195436Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&X-Goog-Signature=ac600eca17a4cddbd853b269ccf744df32b2afd6e611e031e5efe4cb338c29acc9961c40428483a0da04c8835f04756b123349172e9bc926ce17723a21101c0ef57c287f0c385ef3272f88cad1511d7a500340ca0d135bedd010d26508d890a400cce0af1f3f838cd4801cb7c2e1a3cb96a6287959b09ae85bc796c6210a6b0a0dd499a206e44ad21a5d2e2bf13d193e658e9c83f7c9ee8d58b9b2ad2238f8f473447b7f4f55a0f7f870dea44b18f93e7ccc6391d3b4987cbe28b7520cf5e16bd895923690fb229c23bc2493320efbb04daab6551cb7db0911a99e9e1009782a5d658296ec35fb8d4833a73a8738b6316d670dca39cdee36bd13b9a0f8d5f75a\\\",\\\"write_url\\\":\\\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed_verbose%2Fchirpy-seed-deluxe-bird-food.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260213%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20260213T195436Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&X-Goog-Signature=26d4fbdb3c3d6e61c17145ca8b14643a79ce604bf3d13c9dbe6e75a5873c7f5556bed0dfceb38f862488c30b8c18e41d4db358105a92882f30454a9d3728a432166156e71a3df9e75682670fe8281a6d5d91915ddf54ec7c3a3b33f81524d2121d2320da40eb1bfddc92e934a3fc10b637bff27b9a5ddd8fa513accd602395c34f1211c3b3c9cda7b0958db5995a8871e8efa4c62ac173b629ab3f8c286750842e5faff135fd4c5698ed4c8a26835e1e7fb0b8f24231d7372f4d6cee0ee3063e2f1daf050a2108a0ba44ae3fd83e61c2a0c3fe048029f25c949f64bea854312a95468d7087fbf8fd739640eea7eaba5cc3fa1fcaae1336891ebcf7304704d1a0\\\"},\\\"objectref\\\":{\\\"authorizer\\\":\\\"bigframes-dev.us.bigframes-default-connection\\\",\\\"uri\\\":\\\"gs://bigframes_blob_test/image_blur_transformed_verbose/chirpy-seed-deluxe-bird-food.png\\\"}}\"}\n", + " \n", " \n", " \n", "\n", - "

5 rows \u00d7 10 columns

\n", + "

5 rows × 10 columns

\n", "[5 rows x 10 columns in total]" ], "text/plain": [ " image author content_type \\\n", - "0 {'uri': 'gs://cloud-samples-data/bigquery/tuto... alice image/png \n", - "1 {'uri': 'gs://cloud-samples-data/bigquery/tuto... bob image/png \n", - "2 {'uri': 'gs://cloud-samples-data/bigquery/tuto... bob image/png \n", - "3 {'uri': 'gs://cloud-samples-data/bigquery/tuto... alice image/png \n", - "4 {'uri': 'gs://cloud-samples-data/bigquery/tuto... bob image/png \n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-02-14T01:5... alice image/png \n", + "1 {\"access_urls\":{\"expiry_time\":\"2026-02-14T01:5... bob image/png \n", + "2 {\"access_urls\":{\"expiry_time\":\"2026-02-14T01:5... bob image/png \n", + "3 {\"access_urls\":{\"expiry_time\":\"2026-02-14T01:5... alice image/png \n", + "4 {\"access_urls\":{\"expiry_time\":\"2026-02-14T01:5... bob image/png \n", "\n", " size updated \\\n", "0 1591240 2025-03-20 17:45:04+00:00 \n", @@ -988,44 +1161,44 @@ "4 1591923 2025-03-20 17:44:47+00:00 \n", "\n", " blurred \\\n", - "0 {'uri': 'gs://bigframes_blob_test/image_blur_t... \n", - "1 {'uri': 'gs://bigframes_blob_test/image_blur_t... \n", - "2 {'uri': 'gs://bigframes_blob_test/image_blur_t... \n", - "3 {'uri': 'gs://bigframes_blob_test/image_blur_t... \n", - "4 {'uri': 'gs://bigframes_blob_test/image_blur_t... \n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-02-14T01:5... \n", + "1 {\"access_urls\":{\"expiry_time\":\"2026-02-14T01:5... \n", + "2 {\"access_urls\":{\"expiry_time\":\"2026-02-14T01:5... \n", + "3 {\"access_urls\":{\"expiry_time\":\"2026-02-14T01:5... \n", + "4 {\"access_urls\":{\"expiry_time\":\"2026-02-14T01:5... \n", "\n", " resized \\\n", - "0 {'uri': 'gs://bigframes_blob_test/image_resize... \n", - "1 {'uri': 'gs://bigframes_blob_test/image_resize... \n", - "2 {'uri': 'gs://bigframes_blob_test/image_resize... \n", - "3 {'uri': 'gs://bigframes_blob_test/image_resize... \n", - "4 {'uri': 'gs://bigframes_blob_test/image_resize... \n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-02-14T01:5... \n", + "1 {\"access_urls\":{\"expiry_time\":\"2026-02-14T01:5... \n", + "2 {\"access_urls\":{\"expiry_time\":\"2026-02-14T01:5... \n", + "3 {\"access_urls\":{\"expiry_time\":\"2026-02-14T01:5... \n", + "4 {\"access_urls\":{\"expiry_time\":\"2026-02-14T01:5... \n", "\n", " normalized \\\n", - "0 {'uri': 'gs://bigframes_blob_test/image_normal... \n", - "1 {'uri': 'gs://bigframes_blob_test/image_normal... \n", - "2 {'uri': 'gs://bigframes_blob_test/image_normal... \n", - "3 {'uri': 'gs://bigframes_blob_test/image_normal... \n", - "4 {'uri': 'gs://bigframes_blob_test/image_normal... \n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-02-14T01:5... \n", + "1 {\"access_urls\":{\"expiry_time\":\"2026-02-14T01:5... \n", + "2 {\"access_urls\":{\"expiry_time\":\"2026-02-14T01:5... \n", + "3 {\"access_urls\":{\"expiry_time\":\"2026-02-14T01:5... \n", + "4 {\"access_urls\":{\"expiry_time\":\"2026-02-14T01:5... \n", "\n", - " blur_resized \\\n", - "0 {'uri': 'gs://bigframes_blob_test/image_blur_r... \n", - "1 {'uri': 'gs://bigframes_blob_test/image_blur_r... \n", - "2 {'uri': 'gs://bigframes_blob_test/image_blur_r... \n", - "3 {'uri': 'gs://bigframes_blob_test/image_blur_r... \n", - "4 {'uri': 'gs://bigframes_blob_test/image_blur_r... \n", + " blurred_verbose \\\n", + "0 {\"status\": \"\", \"content\": \"{\\\"access_urls\\\":{\\... \n", + "1 {\"status\": \"\", \"content\": \"{\\\"access_urls\\\":{\\... \n", + "2 {\"status\": \"\", \"content\": \"{\\\"access_urls\\\":{\\... \n", + "3 {\"status\": \"\", \"content\": \"{\\\"access_urls\\\":{\\... \n", + "4 {\"status\": \"\", \"content\": \"{\\\"access_urls\\\":{\\... \n", "\n", - " blurred_verbose \n", - "0 {'status': '', 'content': {'uri': 'gs://bigfra... \n", - "1 {'status': '', 'content': {'uri': 'gs://bigfra... \n", - "2 {'status': '', 'content': {'uri': 'gs://bigfra... \n", - "3 {'status': '', 'content': {'uri': 'gs://bigfra... \n", - "4 {'status': '', 'content': {'uri': 'gs://bigfra... \n", + " blur_resized \n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-02-14T01:5... \n", + "1 {\"access_urls\":{\"expiry_time\":\"2026-02-14T01:5... \n", + "2 {\"access_urls\":{\"expiry_time\":\"2026-02-14T01:5... \n", + "3 {\"access_urls\":{\"expiry_time\":\"2026-02-14T01:5... \n", + "4 {\"access_urls\":{\"expiry_time\":\"2026-02-14T01:5... \n", "\n", "[5 rows x 10 columns]" ] }, - "execution_count": 11, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -1045,7 +1218,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 16, "metadata": { "id": "mRUGfcaFVW-3" }, @@ -1054,7 +1227,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/core/log_adapter.py:182: FutureWarning: Since upgrading the default model can cause unintended breakages, the\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:183: FutureWarning: Since upgrading the default model can cause unintended breakages, the\n", "default model will be removed in BigFrames 3.0. Please supply an\n", "explicit model to avoid this message.\n", " return method(*args, **kwargs)\n" @@ -1068,7 +1241,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 17, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -1082,22 +1255,18 @@ "name": "stderr", "output_type": "stream", "text": [ - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:987: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", "instead of using `db_dtypes` in the future when available in pandas\n", "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", + " return prop(*args, **kwargs)\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:987: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", "instead of using `db_dtypes` in the future when available in pandas\n", "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", + " return prop(*args, **kwargs)\n" ] }, { @@ -1128,32 +1297,32 @@ " \n", " \n", " 0\n", - " The item is a tin of K9 Guard dog paw balm.\n", - " \n", + " The image shows a tin of K9Guard Dog Paw Balm.\n", + " \n", " \n", " \n", " 1\n", " The item is K9 Guard Dog Hot Spot Spray.\n", - " \n", + " \n", " \n", " \n", "\n", - "

2 rows \u00d7 2 columns

\n", + "

2 rows × 2 columns

\n", "[2 rows x 2 columns in total]" ], "text/plain": [ - " ml_generate_text_llm_result \\\n", - "0 The item is a tin of K9 Guard dog paw balm. \n", - "1 The item is K9 Guard Dog Hot Spot Spray. \n", + " ml_generate_text_llm_result \\\n", + "0 The image shows a tin of K9Guard Dog Paw Balm. \n", + "1 The item is K9 Guard Dog Hot Spot Spray. \n", "\n", " image \n", - "0 {'uri': 'gs://cloud-samples-data/bigquery/tuto... \n", - "1 {'uri': 'gs://cloud-samples-data/bigquery/tuto... \n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-02-14T01:5... \n", + "1 {\"access_urls\":{\"expiry_time\":\"2026-02-14T01:5... \n", "\n", "[2 rows x 2 columns]" ] }, - "execution_count": 13, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -1167,22 +1336,11 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 18, "metadata": { "id": "IG3J3HsKhyBY" }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" - ] - } - ], + "outputs": [], "source": [ "# Ask different questions\n", "df_image[\"question\"] = [\"what item is it?\", \"what color is the picture?\"]" @@ -1190,7 +1348,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -1204,80 +1362,13 @@ "name": "stderr", "output_type": "stream", "text": [ - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:987: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", "instead of using `db_dtypes` in the future when available in pandas\n", "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", + " return prop(*args, **kwargs)\n" ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ml_generate_text_llm_resultimage
0The item is a tin of K9Guard Dog Paw Balm.
1The bottle is mostly white, with a light blue accents. The background is a light gray. There are also black and green elements on the bottle's label.
\n", - "

2 rows \u00d7 2 columns

\n", - "
[2 rows x 2 columns in total]" - ], - "text/plain": [ - " ml_generate_text_llm_result \\\n", - "0 The item is a tin of K9Guard Dog Paw Balm. \n", - "1 The bottle is mostly white, with a light blue ... \n", - "\n", - " image \n", - "0 {'uri': 'gs://cloud-samples-data/bigquery/tuto... \n", - "1 {'uri': 'gs://cloud-samples-data/bigquery/tuto... \n", - "\n", - "[2 rows x 2 columns]" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" } ], "source": [ @@ -1287,7 +1378,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -1296,104 +1387,7 @@ "id": "KATVv2CO5RT1", "outputId": "6ec01f27-70b6-4f69-c545-e5e3c879480c" }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/core/log_adapter.py:182: FutureWarning: Since upgrading the default model can cause unintended breakages, the\n", - "default model will be removed in BigFrames 3.0. Please supply an\n", - "explicit model to avoid this message.\n", - " return method(*args, **kwargs)\n", - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ml_generate_embedding_resultml_generate_embedding_statusml_generate_embedding_start_secml_generate_embedding_end_seccontent
0[ 0.00638842 0.01666344 0.00451782 ... -0.02...<NA><NA>{\"access_urls\":{\"expiry_time\":\"2025-10-25T00:2...
1[ 0.00973689 0.02148374 0.00244311 ... 0.00...<NA><NA>{\"access_urls\":{\"expiry_time\":\"2025-10-25T00:2...
\n", - "

2 rows \u00d7 5 columns

\n", - "
[2 rows x 5 columns in total]" - ], - "text/plain": [ - " ml_generate_embedding_result \\\n", - "0 [ 0.00638842 0.01666344 0.00451782 ... -0.02... \n", - "1 [ 0.00973689 0.02148374 0.00244311 ... 0.00... \n", - "\n", - " ml_generate_embedding_status ml_generate_embedding_start_sec \\\n", - "0 \n", - "1 \n", - "\n", - " ml_generate_embedding_end_sec \\\n", - "0 \n", - "1 \n", - "\n", - " content \n", - "0 {\"access_urls\":{\"expiry_time\":\"2025-10-25T00:2... \n", - "1 {\"access_urls\":{\"expiry_time\":\"2025-10-25T00:2... \n", - "\n", - "[2 rows x 5 columns]" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# Generate embeddings.\n", "embed_model = llm.MultimodalEmbeddingGenerator()\n", @@ -1533,7 +1527,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -1545,18 +1539,7 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:987: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" - ] - } - ], + "outputs": [], "source": [ "# The audio_transcribe function is a convenience wrapper around bigframes.bigquery.ai.generate.\n", "# Here's how to perform the same operation directly:\n", @@ -1588,22 +1571,7 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
0    {'status': '', 'content': 'Now, as all books, ...
" - ], - "text/plain": [ - "0 {'status': '', 'content': 'Now, as all books, ...\n", - "Name: transcription_results, dtype: struct[pyarrow]" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# To get verbose results (including status), we can extract both fields from the result struct.\n", "transcribed_content_series = transcribed_results.struct.field(\"result\")\n", @@ -1722,7 +1690,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.15" + "version": "3.13.0" } }, "nbformat": 4, From 0e7eb696c0af38fbc6e8944c1eac120c0b34f65e Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Fri, 13 Feb 2026 21:13:47 +0000 Subject: [PATCH 3/8] docs: rerun notebook --- .../multimodal/multimodal_dataframe.ipynb | 678 ++++++++++++++---- 1 file changed, 539 insertions(+), 139 deletions(-) diff --git a/notebooks/multimodal/multimodal_dataframe.ipynb b/notebooks/multimodal/multimodal_dataframe.ipynb index 0b72720a92..4449512d36 100644 --- a/notebooks/multimodal/multimodal_dataframe.ipynb +++ b/notebooks/multimodal/multimodal_dataframe.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -83,7 +83,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ @@ -92,7 +92,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 13, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -130,7 +130,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ @@ -216,7 +216,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -237,7 +237,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 16, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -286,23 +286,23 @@ " \n", " \n", " 0\n", - " \n", + " \n", " \n", " \n", " 1\n", - " \n", + " \n", " \n", " \n", " 2\n", - " \n", + " \n", " \n", " \n", " 3\n", - " \n", + " \n", " \n", " \n", " 4\n", - " \n", + " \n", " \n", " \n", "\n", @@ -311,16 +311,16 @@ ], "text/plain": [ " image\n", - "0 {\"access_urls\":{\"expiry_time\":\"2026-02-14T01:5...\n", - "1 {\"access_urls\":{\"expiry_time\":\"2026-02-14T01:5...\n", - "2 {\"access_urls\":{\"expiry_time\":\"2026-02-14T01:5...\n", - "3 {\"access_urls\":{\"expiry_time\":\"2026-02-14T01:5...\n", - "4 {\"access_urls\":{\"expiry_time\":\"2026-02-14T01:5...\n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-02-14T02:5...\n", + "1 {\"access_urls\":{\"expiry_time\":\"2026-02-14T02:5...\n", + "2 {\"access_urls\":{\"expiry_time\":\"2026-02-14T02:5...\n", + "3 {\"access_urls\":{\"expiry_time\":\"2026-02-14T02:5...\n", + "4 {\"access_urls\":{\"expiry_time\":\"2026-02-14T02:5...\n", "\n", "[5 rows x 1 columns]" ] }, - "execution_count": 6, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -351,7 +351,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 17, "metadata": { "id": "YYYVn7NDH0Me" }, @@ -399,7 +399,7 @@ " \n", " \n", " 0\n", - " \n", + " \n", " alice\n", " image/png\n", " 1591240\n", @@ -407,7 +407,7 @@ " \n", " \n", " 1\n", - " \n", + " \n", " bob\n", " image/png\n", " 1182951\n", @@ -415,7 +415,7 @@ " \n", " \n", " 2\n", - " \n", + " \n", " bob\n", " image/png\n", " 1520884\n", @@ -423,7 +423,7 @@ " \n", " \n", " 3\n", - " \n", + " \n", " alice\n", " image/png\n", " 1235401\n", @@ -431,7 +431,7 @@ " \n", " \n", " 4\n", - " \n", + " \n", " bob\n", " image/png\n", " 1591923\n", @@ -444,11 +444,11 @@ ], "text/plain": [ " image author content_type \\\n", - "0 {\"access_urls\":{\"expiry_time\":\"2026-02-14T01:5... alice image/png \n", - "1 {\"access_urls\":{\"expiry_time\":\"2026-02-14T01:5... bob image/png \n", - "2 {\"access_urls\":{\"expiry_time\":\"2026-02-14T01:5... bob image/png \n", - "3 {\"access_urls\":{\"expiry_time\":\"2026-02-14T01:5... alice image/png \n", - "4 {\"access_urls\":{\"expiry_time\":\"2026-02-14T01:5... bob image/png \n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-02-14T02:5... alice image/png \n", + "1 {\"access_urls\":{\"expiry_time\":\"2026-02-14T02:5... bob image/png \n", + "2 {\"access_urls\":{\"expiry_time\":\"2026-02-14T02:5... bob image/png \n", + "3 {\"access_urls\":{\"expiry_time\":\"2026-02-14T02:5... alice image/png \n", + "4 {\"access_urls\":{\"expiry_time\":\"2026-02-14T02:5... bob image/png \n", "\n", " size updated \n", "0 1591240 2025-03-20 17:45:04+00:00 \n", @@ -460,7 +460,7 @@ "[5 rows x 5 columns]" ] }, - "execution_count": 7, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -486,7 +486,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 18, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -499,7 +499,7 @@ { "data": { "text/html": [ - "" + "" ], "text/plain": [ "" @@ -511,7 +511,7 @@ { "data": { "text/html": [ - "" + "" ], "text/plain": [ "" @@ -542,7 +542,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 19, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -694,8 +694,7 @@ "id": "b6RRZb3qPi_T" }, "source": [ - "# You can also chain functions together\n", - "df_image[\"blur_resized\"] = apply_transformation(df_image[\"blurred\"], f\"gs://{OUTPUT_BUCKET}/image_blur_resize_transformed/\", image_resize, 300, 200, 0.0, 0.0)" + "### You can also chain functions together" ] }, { @@ -709,7 +708,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 20, "metadata": { "id": "YYYVn7NDH0Me" }, @@ -783,7 +782,7 @@ "[5 rows x 1 columns]" ] }, - "execution_count": 10, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -804,7 +803,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 21, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -817,7 +816,7 @@ { "data": { "text/html": [ - "" + "" ], "text/plain": [ "" @@ -829,7 +828,7 @@ { "data": { "text/html": [ - "" + "" ], "text/plain": [ "" @@ -856,7 +855,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 22, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -886,7 +885,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 23, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -916,8 +915,8 @@ "### Using `verbose` mode for detailed output\n", "\n", "All multimodal functions support a `verbose` parameter, which defaults to `False`.\n", - "\\n\n", - "* When `verbose=False` (the default), the function will only return the main content of the result (e.g., the transformed image, the extracted text).\\n\n", + "\n", + "* When `verbose=False` (the default), the function will only return the main content of the result (e.g., the transformed image, the extracted text).\n", "* When `verbose=True`, the function returns a `STRUCT` containing two fields:\n", " * `content`: The main result of the operation.\n", " * `status`: An informational field. If the operation is successful, this will be empty. If an error occurs during the processing of a specific row, this field will contain the error message, allowing the overall job to complete without failing.\n", @@ -927,7 +926,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 24, "metadata": {}, "outputs": [ { @@ -999,7 +998,7 @@ "[5 rows x 1 columns]" ] }, - "execution_count": 14, + "execution_count": 24, "metadata": {}, "output_type": "execute_result" } @@ -1011,7 +1010,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 25, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -1077,68 +1076,68 @@ " \n", " \n", " 0\n", - " \n", + " \n", " alice\n", " image/png\n", " 1591240\n", " 2025-03-20 17:45:04+00:00\n", - " \n", - " \n", - " \n", - " {\"status\": \"\", \"content\": \"{\\\"access_urls\\\":{\\\"expiry_time\\\":\\\"2026-02-14T01:54:36Z\\\",\\\"read_url\\\":\\\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed_verbose%2Fk9-guard-dog-paw-balm.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260213%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20260213T195436Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&X-Goog-Signature=023abcc625f85fdc1ac5ebb937bea4795f0dd4c81407b5669495c882a80cc4e097315c70242d7c7bd270ab39a4a23a6bcb1f1abb055cdadb5fb92cbd243c68158f8c384274f9da5058e884d00aa2c37503d01b7da715f0287c899df4f74beb82be069de2d1062c79a19675491fa21df859132a93cae3d69056472671972da4147b3e62bdd2594707848237b8fc8dd9ecbf49e8f8e1900e1d045d5ae58103ead988f0e499cbfcc6301982682d490c3fe2856ffe82f96b4d6f1274174b6d1b4b6b70d4edf2625feb23d5923060d7a471adeebc46159124fc0a16618711b410c243dec2c24c024bdbceb3e16d3e61febf35181e6f616a8bb57052f48a554aa46b45\\\",\\\"write_url\\\":\\\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed_verbose%2Fk9-guard-dog-paw-balm.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260213%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20260213T195436Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&X-Goog-Signature=6c057949db2bffffa248cd691152f21d9d7d013649cf306ad9a93e08eaf24721691fefc8ee08a300f19e488e3dabf784ce79322185c81c1bc7ee7a45228b36e335a06ce5d9aa10ee1c1d349d377d81fae6943110e4a8d66eb6a1c4a25fba1cff76cb698ed132aeeb26953be41d13d47bdec08dbe41c284cae5af05b18ea0376ab15d2d926b534743cb96a5c5025a4d5e1b4c097c518b19f3ddb00d5db1da2554b37b33c6793cd349458b30ccb4f97f2c9fb2415ecb03b331f7eeb822700c8c5231542b7ddb3ba8143862fb938ecbf389c35c4ae6e9c87f7091007be8559dfc8cd0f2c62fcd1e686121d244172148d2245971dea14aa49ab8d99bbca192a0f998\\\"},\\\"objectref\\\":{\\\"authorizer\\\":\\\"bigframes-dev.us.bigframes-default-connection\\\",\\\"uri\\\":\\\"gs://bigframes_blob_test/image_blur_transformed_verbose/k9-guard-dog-paw-balm.png\\\"}}\"}\n", - " \n", + " \n", + " \n", + " \n", + " {\"status\": \"\", \"content\": \"{\\\"access_urls\\\":{\\\"expiry_time\\\":\\\"2026-02-14T02:58:13Z\\\",\\\"read_url\\\":\\\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed_verbose%2Fk9-guard-dog-paw-balm.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260213%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20260213T205813Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&X-Goog-Signature=8d2b714a76ac46c5af4a505f0867b0fcc3bd56b5190382ca42565dbd00e0a6e8aca32d8ac1db6003b847b92af4fcf6f2d2bf1175b3f5e804ed1c5505a095e4698462782a0695009c377d3c1d8ca00855d96bdeffc1b26c4d0cc4c3a6b58e343cb83c8eb0fcb206288be9b97b4759f2c40947f59f7d4816ad344192b18be3ce1a03e3cffed83d39f66b3572e542e7886ca46e5e8fd67b4ad2072ff3ab05cbc66bbb619109011ca70323237e3c97178d56c416bbf8c544b8beebfb53c5a3d9cf1513efd6b6edad603e41f21cb8375d25cddbc524a39213cf9d0da518292b6cbddce6a31468f554a6aa20e40752fb00b50b49fdabe77fa474e8fcb8e0b97280736e\\\",\\\"write_url\\\":\\\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed_verbose%2Fk9-guard-dog-paw-balm.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260213%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20260213T205813Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&X-Goog-Signature=60dfe615382ea90d6af20427f885980794fe943f09b15457f1f26ed353f59b4abfcd0805578e9ba6c0b1688d1307c439b8294a3f74d2b0b43e89fc23d4b8802e06c713565394d6a962edbac9b874f309edde8c26c95c79d36d71f66d99dcae13d3da56044a4b9e995def8adfca7ce113e8c359cee784a3b99f582bace1271d4e0f9832fa2935d6cc847c7ffe99244257b3aa0920264027d8089566406c99792c19f933bb55db5b8f46d7755fc6aa868a2e9441b44c52839d99dd2ccc826a06c5b5d0b8575c4e836d2036e58d68dcaf41a7aefd9c3fbaa9b57bd6d6f20de38cbec71e66ec8f77c8470c69a8c001f36dd0a47717db408cc3ea7b747bec084a4d1f\\\"},\\\"objectref\\\":{\\\"authorizer\\\":\\\"bigframes-dev.us.bigframes-default-connection\\\",\\\"uri\\\":\\\"gs://bigframes_blob_test/image_blur_transformed_verbose/k9-guard-dog-paw-balm.png\\\"}}\"}\n", + " \n", " \n", " \n", " 1\n", - " \n", + " \n", " bob\n", " image/png\n", " 1182951\n", " 2025-03-20 17:45:02+00:00\n", - " \n", - " \n", - " \n", - " {\"status\": \"\", \"content\": \"{\\\"access_urls\\\":{\\\"expiry_time\\\":\\\"2026-02-14T01:54:36Z\\\",\\\"read_url\\\":\\\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed_verbose%2Fk9-guard-dog-hot-spot-spray.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260213%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20260213T195436Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&X-Goog-Signature=0ba9574610b9e22ddfcdecdc7fdcdba1a055d7438207449537e65e176f5192df72342b2f3b6a4c2af4d1f25bb0c56d5a50d3a7ef0428830542112127add8b49073aa45e8f3a90d9948efe8d36e89e3c204f711147bb232db4eb5ec74be15cdeea78846864ef10ae336133955e9ffc3b48898ee28c31796f03a86cd6ec33c6833429babda1533621cba844b0dba1584afec5a2328b113c5f503157d984bcb08f37c4dc80fdb850af116e26117041a2d5c509393b5de5be40828ba47513258499f6daa9be6d1ac46a8f86eceae170c10317042abd9441355f53ed1cf7b0ed6ef6942175f768271a5658cdb578d85e7a0d7c27a734fe822b8b9c2a573cd81d00008\\\",\\\"write_url\\\":\\\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed_verbose%2Fk9-guard-dog-hot-spot-spray.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260213%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20260213T195436Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&X-Goog-Signature=a202ed2db5f9df354dded2df43c960540dbd13b2179aa360b8c957e14a9eaeb02343e9f3327018cbfe39604922147a55cd016366c6ec9b35eeda317986fb38f14f47643ce5fab9efcd2968300485ff9597b58ea1c6d583c3e8b8c9afe35c0eafc5f70c15133a850c0bd7277550711340275881c2f76ce00157acca3a2b37cef12ee4a0bb9e755f7f1ccce8143acb8b6c94b5ce032e8e0ec6776370250303f4a7cda13ca3ecae0dad91ae7d9e829060011bdb063f51ea9f49e743ccc99e2dd2a9706f8d492f4f319eb753e4b70cd24653e7bb878a43b93dcca378399a70524b3f751982ddb72867bdf00d54bb0792273367ba7fd9dae327d67296ae569961ac8c\\\"},\\\"objectref\\\":{\\\"authorizer\\\":\\\"bigframes-dev.us.bigframes-default-connection\\\",\\\"uri\\\":\\\"gs://bigframes_blob_test/image_blur_transformed_verbose/k9-guard-dog-hot-spot-spray.png\\\"}}\"}\n", - " \n", + " \n", + " \n", + " \n", + " {\"status\": \"\", \"content\": \"{\\\"access_urls\\\":{\\\"expiry_time\\\":\\\"2026-02-14T02:58:13Z\\\",\\\"read_url\\\":\\\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed_verbose%2Fk9-guard-dog-hot-spot-spray.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260213%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20260213T205813Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&X-Goog-Signature=0eb022977eec931fe33382899ed9f31ea6bc3b49b772a7e9f79a76e0a22c16267591fb28589a42dfe297361730acbd5391c2b1a28c07584e2be7f2e911eeae824f43dd85930a6f7f4ca07a0dba27af4be7750b000a4a2a961409d9cf6d9ab694cb2f5ef9b5747cbd569bb0689e384d3064636bc8a1c68a7a6a20aa5511d4fd51f496be35257b801c841f797a4a295c35827af35c6d2329676273248f101a02bb3a329561bcd5a8be6655f93b034a6338f3828e117b14e7786068887272ba42ff5f24f87c5f8b4a819a810af1d5c67d0e3c50fe0b26715603e74edadf5fc571d92f4a251758921cb3c5136286d621087eb2ceafb7a4a97eea6ae2670031cccebb\\\",\\\"write_url\\\":\\\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed_verbose%2Fk9-guard-dog-hot-spot-spray.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260213%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20260213T205813Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&X-Goog-Signature=3b15d6570ebfd1dfbe8a6b0025a7eb80c9d7f6747a27c44586099f291d98ea40dcea90d6fd282fec0a9ce3e609372918e51b10c93515920642bc629129779e9dbd8dceef8cc72dac96e9da344cf44e0f5f1de4f6ba8a4e4c8fa4851df9203dd5904068d7b519b4740638ceade4f0bd70ac2f0f33cc7cd7770ea6363142c3d68c8ac59e845919f2f053d2818218416bff2edabf37d60f7954f6d55520a979700cbf61c186f21a607af48fbdeefb38953711d919efd3e4d5caee5fad36dbe798469f3c6bdd4d806cd7730ea448eee78fe94d3465528e2fe4bdc91369fd6de38f8c1d16bace9c4c898a91fc8c342fe9eb6c09b2f35aaed36910cedbda75605ea1b1\\\"},\\\"objectref\\\":{\\\"authorizer\\\":\\\"bigframes-dev.us.bigframes-default-connection\\\",\\\"uri\\\":\\\"gs://bigframes_blob_test/image_blur_transformed_verbose/k9-guard-dog-hot-spot-spray.png\\\"}}\"}\n", + " \n", " \n", " \n", " 2\n", - " \n", + " \n", " bob\n", " image/png\n", " 1520884\n", " 2025-03-20 17:44:55+00:00\n", - " \n", - " \n", - " \n", - " {\"status\": \"\", \"content\": \"{\\\"access_urls\\\":{\\\"expiry_time\\\":\\\"2026-02-14T01:54:36Z\\\",\\\"read_url\\\":\\\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed_verbose%2Ffluffy-buns-chinchilla-food-variety-pack.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260213%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20260213T195436Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&X-Goog-Signature=a373a46b9148a831b260341ca1c616a71313d10f31ee0fbb1e782c4ea00581244dbac2db9d929bf48150320d21d6934a6e4393b07e4057b50066dfab66881f0185db8cc5598f7456f5c3b9fac176f755de22819bdfb093b9f44eeed862d84e67a0ed39f450bc19330fa9970a85df94107092b8ed698fc60a9c5ad351d3db547407441c6018d428ff370cfd0fb780017152e864dcc6379c1caf95bf3213bf91577af6f342bb8054002390a43d210a3ae8dc82291282b6817e2ed3bc2b13e2b96d2d036a3bea121a128ef49bdba5bed70c482b62c7ce9cd1abecc84308ec8309c7aca2b99315016e136042a8dd1f820a73951212663ee386177da4b040242350a4\\\",\\\"write_url\\\":\\\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed_verbose%2Ffluffy-buns-chinchilla-food-variety-pack.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260213%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20260213T195436Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&X-Goog-Signature=651c5db745f3342c1c56d5c1480e5c9939059214def05a41c61f83870953be8c4a18e9a46918612915368f5ea931a31872fb598235df2f948b2a9503323ecab43fc287e57355c0b1fd17383eada4776861ab67f4ba5d4f108d3c414c783daa9938826de724c50c4c635c2980fa699d3268524f88f4db5bff5110d0dc927406dfd51fb5c79e8616abbc03790f88b53bf2f0500880ce30d45c82c972c073d3f54bd1795b13f3bb364d071ef662e007a3c57aa7c5ceb83d54519f80273739102d7a3dd561834bae6ceccbe2630fb0e383c89268131c7532ecb9de8eab68fa20bfeec4f1e4209136bcf93cd1e202893070021c157e3066070e74a14f2b3fa2746b5a\\\"},\\\"objectref\\\":{\\\"authorizer\\\":\\\"bigframes-dev.us.bigframes-default-connection\\\",\\\"uri\\\":\\\"gs://bigframes_blob_test/image_blur_transformed_verbose/fluffy-buns-chinchilla-food-variety-pack.png\\\"}}\"}\n", - " \n", + " \n", + " \n", + " \n", + " {\"status\": \"\", \"content\": \"{\\\"access_urls\\\":{\\\"expiry_time\\\":\\\"2026-02-14T02:58:13Z\\\",\\\"read_url\\\":\\\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed_verbose%2Ffluffy-buns-chinchilla-food-variety-pack.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260213%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20260213T205813Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&X-Goog-Signature=19c6bdad06410bb165f07c62722ea7bc2aa417422c84b588bd63a24074bc8cfa9a0d01c7ed44203ca5ba4a368cacd7425f3525d0bb6db02eaada53711ad0f065865d2ce3c0e93d12218e887f6449b510e0b8fecf6c2ba27968b6b3c79d7453cfc514306bd9e07e745892d630a2b33a66be8eafaffea22a7fc79abd1526ca24efde5bfa1c467da87c8f8b86174cb37e8792e546659242b01ea82972bb5469cc5941862e4a222cdd57ae62d928eacdb20301ad049f44b6f97b35e0275a90b84af2a4e320a411908b16da0e1de7be894dbc1c88d3c828fd3924c359b61bfcdfc527a2e6338c44e46df68258ea714a84f800b3554d691c8e1f00965b6db8a130d39d\\\",\\\"write_url\\\":\\\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed_verbose%2Ffluffy-buns-chinchilla-food-variety-pack.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260213%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20260213T205813Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&X-Goog-Signature=29eb2181bb94a9a2d33f02f8c1a00d98a399cfeb6fc344e47188e441fb5d5866f3ff67d79219f0bc8649918e08eb55cf9018aed588255428f25712570908ab4d7bdd5c855c5d9d472d749154cb6a1535d466ddf134f9cf373816fb511c77f19cc19421b8d20a19c3cfc79f197e09b0d9b9dcc8bdb02e823256f142a9b95a6b9425ed1b7696dc282ae814fb594e45e62c493334c4e3628836728fe396f33cc092fccd762d71e560223f488b76874cf8a21ee3dbaf2fe23faaab4062e3f42086bc9c46845e913bd8becd63e3877b1cfd2a806f85c082add4f34532bf91623b8eed95e902721516df4ba8fe715581ccdfbb0c69dc56652beee1434d08126064fae2\\\"},\\\"objectref\\\":{\\\"authorizer\\\":\\\"bigframes-dev.us.bigframes-default-connection\\\",\\\"uri\\\":\\\"gs://bigframes_blob_test/image_blur_transformed_verbose/fluffy-buns-chinchilla-food-variety-pack.png\\\"}}\"}\n", + " \n", " \n", " \n", " 3\n", - " \n", + " \n", " alice\n", " image/png\n", " 1235401\n", " 2025-03-20 17:45:19+00:00\n", - " \n", - " \n", - " \n", - " {\"status\": \"\", \"content\": \"{\\\"access_urls\\\":{\\\"expiry_time\\\":\\\"2026-02-14T01:54:36Z\\\",\\\"read_url\\\":\\\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed_verbose%2Fpurrfect-perch-cat-scratcher.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260213%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20260213T195436Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&X-Goog-Signature=17e58c0f0b096057a970ca342d4afd32ad2d0bd094e006979eb190a7b65a3542d513f98ea4699613dc9d6262b31ad98064bc02121d2c60cd720b762c42c4c4ddeaf4231b25f8d7cf1ecca8f562669fcac9ade88d3d634a46b85a4c37759f51f06a076d925af6fe5a2814934956581f62633f1f6c7f1604eab21a2b15841cef353f0c34ecb1d521571f4bbd25073671d4eaf607229cc519a52c72bce988d40a530c9bce972705ea6ba49193d2547c00b866874eaa6d3c2f422c34d06f4b726da6ddf5d1d0c8f4c93fab9349acad923ac2392d00ff2f53f7796feb7b58c7849f066f61ce7fe4e113eb15c8d0e866498a9f2f6c99a436af5a9553b0fcd747210eb6\\\",\\\"write_url\\\":\\\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed_verbose%2Fpurrfect-perch-cat-scratcher.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260213%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20260213T195436Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&X-Goog-Signature=1e46e8a5164b8959e89bd2641a74b82a152b2d9e8a46ce90569e356edb9897984ce3d7cf531572fbe453ae2b649f633c6e99dd16f97e757a9fbed0fa4d49fcd0051f213d5927070a98e77fc232356298da3e62e446bf58331c0cf4d72267452e4ddbf5a40e39635a516b064e028b8db339fee32b2803015dd97763ca23ca0f9d3fed8efd93f41ce5ff96eed74f700e920f5ffc44dff164f410ad6c1d2857f59d0841bbc7b2ecabb59acd95257ca645002dd8a12bbaa6bb09668bcfc6d1edafb76a18e3d275e57a9a4f1b41bf75edc54eaff26f99abcdf0b8d81fca1ce66881900de7ee680cae13dcddaa41431abada65cfc9fec40e19886791c9698c0bd124c9\\\"},\\\"objectref\\\":{\\\"authorizer\\\":\\\"bigframes-dev.us.bigframes-default-connection\\\",\\\"uri\\\":\\\"gs://bigframes_blob_test/image_blur_transformed_verbose/purrfect-perch-cat-scratcher.png\\\"}}\"}\n", - " \n", + " \n", + " \n", + " \n", + " {\"status\": \"\", \"content\": \"{\\\"access_urls\\\":{\\\"expiry_time\\\":\\\"2026-02-14T02:58:13Z\\\",\\\"read_url\\\":\\\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed_verbose%2Fpurrfect-perch-cat-scratcher.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260213%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20260213T205813Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&X-Goog-Signature=6d400c9b92869c7a979ff223125518ec9f7fb37197e25b7bfbc0e0376830ba344a1f5b3a9f105d45faf9138b2e67e18f778a144a6cf4bb832d73efdd49003c95c50123c4de9bde28c3b70695bfbfb655d16c2aa92a6fad861d071af509202b82b71e99e0aa992367fb266bda24074acbcfc7201b087d4fb62b9c42d47482406ad9b87dda7ca803274fceca26d7e20e899ba6b6c4a07f0af67ed5b12f7af72d311777dd99e276bda3e3b30e3d723e70d0bd4d6a53abdb02f1108319d6270ee9e5fe30c3782f31f33c6fafce71fb533609358ad970e7b6d35b8486700ba3e7d7f7bfb9fa2f13d5fe0edd83a8201837288d919c3da7ff967e0a1340233de4ee4bf2\\\",\\\"write_url\\\":\\\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed_verbose%2Fpurrfect-perch-cat-scratcher.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260213%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20260213T205813Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&X-Goog-Signature=4f6c06e462a7faa5f1f3a341ba115d84cdb8a674f58d9678f3c6b1e69c668259d144dbe59b6a73739758a0080fe7931e13026f0346df9eecb3f4f808deb199e7b2edcb7c6d3933e591b9689cad95fe85a02aabc98a59c80fcd695fc655f9501840eebbcf1ebf03433dc9370985599c14a65f8f9b763f14571977688d5da6e56b6979cec3200499dbd8a81fe58786558257e93dfc159a866878468a36cbb8abecce2c4845ab930862f5cd74dce148fd5103279361fc0343735cda980311222c0710236002648d6b6b735d68e2fded9e1c96501a8f080a2c4e913183e752b61912ea1ab3f09586ef1e2564744a5f5d477526b7eb49702c173752cd15c91b0a283d\\\"},\\\"objectref\\\":{\\\"authorizer\\\":\\\"bigframes-dev.us.bigframes-default-connection\\\",\\\"uri\\\":\\\"gs://bigframes_blob_test/image_blur_transformed_verbose/purrfect-perch-cat-scratcher.png\\\"}}\"}\n", + " \n", " \n", " \n", " 4\n", - " \n", + " \n", " bob\n", " image/png\n", " 1591923\n", " 2025-03-20 17:44:47+00:00\n", - " \n", - " \n", - " \n", - " {\"status\": \"\", \"content\": \"{\\\"access_urls\\\":{\\\"expiry_time\\\":\\\"2026-02-14T01:54:36Z\\\",\\\"read_url\\\":\\\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed_verbose%2Fchirpy-seed-deluxe-bird-food.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260213%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20260213T195436Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&X-Goog-Signature=ac600eca17a4cddbd853b269ccf744df32b2afd6e611e031e5efe4cb338c29acc9961c40428483a0da04c8835f04756b123349172e9bc926ce17723a21101c0ef57c287f0c385ef3272f88cad1511d7a500340ca0d135bedd010d26508d890a400cce0af1f3f838cd4801cb7c2e1a3cb96a6287959b09ae85bc796c6210a6b0a0dd499a206e44ad21a5d2e2bf13d193e658e9c83f7c9ee8d58b9b2ad2238f8f473447b7f4f55a0f7f870dea44b18f93e7ccc6391d3b4987cbe28b7520cf5e16bd895923690fb229c23bc2493320efbb04daab6551cb7db0911a99e9e1009782a5d658296ec35fb8d4833a73a8738b6316d670dca39cdee36bd13b9a0f8d5f75a\\\",\\\"write_url\\\":\\\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed_verbose%2Fchirpy-seed-deluxe-bird-food.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260213%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20260213T195436Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&X-Goog-Signature=26d4fbdb3c3d6e61c17145ca8b14643a79ce604bf3d13c9dbe6e75a5873c7f5556bed0dfceb38f862488c30b8c18e41d4db358105a92882f30454a9d3728a432166156e71a3df9e75682670fe8281a6d5d91915ddf54ec7c3a3b33f81524d2121d2320da40eb1bfddc92e934a3fc10b637bff27b9a5ddd8fa513accd602395c34f1211c3b3c9cda7b0958db5995a8871e8efa4c62ac173b629ab3f8c286750842e5faff135fd4c5698ed4c8a26835e1e7fb0b8f24231d7372f4d6cee0ee3063e2f1daf050a2108a0ba44ae3fd83e61c2a0c3fe048029f25c949f64bea854312a95468d7087fbf8fd739640eea7eaba5cc3fa1fcaae1336891ebcf7304704d1a0\\\"},\\\"objectref\\\":{\\\"authorizer\\\":\\\"bigframes-dev.us.bigframes-default-connection\\\",\\\"uri\\\":\\\"gs://bigframes_blob_test/image_blur_transformed_verbose/chirpy-seed-deluxe-bird-food.png\\\"}}\"}\n", - " \n", + " \n", + " \n", + " \n", + " {\"status\": \"\", \"content\": \"{\\\"access_urls\\\":{\\\"expiry_time\\\":\\\"2026-02-14T02:58:13Z\\\",\\\"read_url\\\":\\\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed_verbose%2Fchirpy-seed-deluxe-bird-food.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260213%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20260213T205813Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&X-Goog-Signature=ae50ee928cb422e14117cd4e6743c5501ec897036ee52e285ef18a65cb083f50c34fb7f7b859df5ef9cc05a522726cb89a86270a7b48a02082ec4de96fa351340b4b07bd5cd2b097aa2d86a9272ddb0ab61725041e7ddfeb5c3851df473ab3f9dbbcaa0956847f7da76ee2706b5b999e02c7d4058d66566573cbc9e15e6bf8c7ff92ef7470ad4730ab3214d33447a2fa29ac3f12909b9e68f77ab9e879b24b320d16ec7ebddd29cf482f79bfc62d06ea623868f6b79fc40b912efa78d81f1a8c3b368ca7f3b30881bc269e9a7ccc91ad312a41ff378c808e5ae9016711333708d1f87b090dce503ba37da918b0b0a776554375a9cf5be61a934c3f186e24da10\\\",\\\"write_url\\\":\\\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed_verbose%2Fchirpy-seed-deluxe-bird-food.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260213%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20260213T205813Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&X-Goog-Signature=0046282ccd9e822e87f190d24935b0adab7f38ed6ecfd4c4c0b0c6c9049267684796306f0058e3d8aaf91c166500c25bed719d5953989262050faec7b4dd1f9d3e1c6a71ecdcf45a0e92e61cf962010a2268770a8e75d68849ba98742c874f7e49034fedb01fc1e5d8f37321b36f66f386b6d1de964de10977f6df5896b47451219b65c4f9a254855216402bacd2f88ebdc473dd95502d51f1b8b2b198ea1b1bb845bcd0eefa498f5e6486457d10e4d636c864d7c184bb03a03f66407401e556897109caeda270b32792f2ddff82d8ce3841b77ad993209803f2abaae7e8f78a3c4cfa9eca0ef9d7b920b79419572bb1c794759ef87b8b90b718f6454b8c4f74\\\"},\\\"objectref\\\":{\\\"authorizer\\\":\\\"bigframes-dev.us.bigframes-default-connection\\\",\\\"uri\\\":\\\"gs://bigframes_blob_test/image_blur_transformed_verbose/chirpy-seed-deluxe-bird-food.png\\\"}}\"}\n", + " \n", " \n", " \n", "\n", @@ -1147,11 +1146,11 @@ ], "text/plain": [ " image author content_type \\\n", - "0 {\"access_urls\":{\"expiry_time\":\"2026-02-14T01:5... alice image/png \n", - "1 {\"access_urls\":{\"expiry_time\":\"2026-02-14T01:5... bob image/png \n", - "2 {\"access_urls\":{\"expiry_time\":\"2026-02-14T01:5... bob image/png \n", - "3 {\"access_urls\":{\"expiry_time\":\"2026-02-14T01:5... alice image/png \n", - "4 {\"access_urls\":{\"expiry_time\":\"2026-02-14T01:5... bob image/png \n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-02-14T02:5... alice image/png \n", + "1 {\"access_urls\":{\"expiry_time\":\"2026-02-14T02:5... bob image/png \n", + "2 {\"access_urls\":{\"expiry_time\":\"2026-02-14T02:5... bob image/png \n", + "3 {\"access_urls\":{\"expiry_time\":\"2026-02-14T02:5... alice image/png \n", + "4 {\"access_urls\":{\"expiry_time\":\"2026-02-14T02:5... bob image/png \n", "\n", " size updated \\\n", "0 1591240 2025-03-20 17:45:04+00:00 \n", @@ -1161,25 +1160,25 @@ "4 1591923 2025-03-20 17:44:47+00:00 \n", "\n", " blurred \\\n", - "0 {\"access_urls\":{\"expiry_time\":\"2026-02-14T01:5... \n", - "1 {\"access_urls\":{\"expiry_time\":\"2026-02-14T01:5... \n", - "2 {\"access_urls\":{\"expiry_time\":\"2026-02-14T01:5... \n", - "3 {\"access_urls\":{\"expiry_time\":\"2026-02-14T01:5... \n", - "4 {\"access_urls\":{\"expiry_time\":\"2026-02-14T01:5... \n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-02-14T02:5... \n", + "1 {\"access_urls\":{\"expiry_time\":\"2026-02-14T02:5... \n", + "2 {\"access_urls\":{\"expiry_time\":\"2026-02-14T02:5... \n", + "3 {\"access_urls\":{\"expiry_time\":\"2026-02-14T02:5... \n", + "4 {\"access_urls\":{\"expiry_time\":\"2026-02-14T02:5... \n", "\n", " resized \\\n", - "0 {\"access_urls\":{\"expiry_time\":\"2026-02-14T01:5... \n", - "1 {\"access_urls\":{\"expiry_time\":\"2026-02-14T01:5... \n", - "2 {\"access_urls\":{\"expiry_time\":\"2026-02-14T01:5... \n", - "3 {\"access_urls\":{\"expiry_time\":\"2026-02-14T01:5... \n", - "4 {\"access_urls\":{\"expiry_time\":\"2026-02-14T01:5... \n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-02-14T02:5... \n", + "1 {\"access_urls\":{\"expiry_time\":\"2026-02-14T02:5... \n", + "2 {\"access_urls\":{\"expiry_time\":\"2026-02-14T02:5... \n", + "3 {\"access_urls\":{\"expiry_time\":\"2026-02-14T02:5... \n", + "4 {\"access_urls\":{\"expiry_time\":\"2026-02-14T02:5... \n", "\n", " normalized \\\n", - "0 {\"access_urls\":{\"expiry_time\":\"2026-02-14T01:5... \n", - "1 {\"access_urls\":{\"expiry_time\":\"2026-02-14T01:5... \n", - "2 {\"access_urls\":{\"expiry_time\":\"2026-02-14T01:5... \n", - "3 {\"access_urls\":{\"expiry_time\":\"2026-02-14T01:5... \n", - "4 {\"access_urls\":{\"expiry_time\":\"2026-02-14T01:5... \n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-02-14T02:5... \n", + "1 {\"access_urls\":{\"expiry_time\":\"2026-02-14T02:5... \n", + "2 {\"access_urls\":{\"expiry_time\":\"2026-02-14T02:5... \n", + "3 {\"access_urls\":{\"expiry_time\":\"2026-02-14T02:5... \n", + "4 {\"access_urls\":{\"expiry_time\":\"2026-02-14T02:5... \n", "\n", " blurred_verbose \\\n", "0 {\"status\": \"\", \"content\": \"{\\\"access_urls\\\":{\\... \n", @@ -1189,16 +1188,16 @@ "4 {\"status\": \"\", \"content\": \"{\\\"access_urls\\\":{\\... \n", "\n", " blur_resized \n", - "0 {\"access_urls\":{\"expiry_time\":\"2026-02-14T01:5... \n", - "1 {\"access_urls\":{\"expiry_time\":\"2026-02-14T01:5... \n", - "2 {\"access_urls\":{\"expiry_time\":\"2026-02-14T01:5... \n", - "3 {\"access_urls\":{\"expiry_time\":\"2026-02-14T01:5... \n", - "4 {\"access_urls\":{\"expiry_time\":\"2026-02-14T01:5... \n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-02-14T02:5... \n", + "1 {\"access_urls\":{\"expiry_time\":\"2026-02-14T02:5... \n", + "2 {\"access_urls\":{\"expiry_time\":\"2026-02-14T02:5... \n", + "3 {\"access_urls\":{\"expiry_time\":\"2026-02-14T02:5... \n", + "4 {\"access_urls\":{\"expiry_time\":\"2026-02-14T02:5... \n", "\n", "[5 rows x 10 columns]" ] }, - "execution_count": 15, + "execution_count": 25, "metadata": {}, "output_type": "execute_result" } @@ -1218,7 +1217,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 26, "metadata": { "id": "mRUGfcaFVW-3" }, @@ -1241,7 +1240,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 27, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -1297,58 +1296,84 @@ " \n", " \n", " 0\n", - " The image shows a tin of K9Guard Dog Paw Balm.\n", - " \n", + " Based on the image, the item is a tin of dog paw balm. It's labeled \"K9Guard Dog Paw Balm.\"\n", + " \n", " \n", " \n", " 1\n", - " The item is K9 Guard Dog Hot Spot Spray.\n", - " \n", + " The item is a bottle of K9 Guard Dog Hot Spot Spray.\n", + " \n", + " \n", + " \n", + " 2\n", + " The item is rabbit food or treats from \"Fluffy Buns\". There are three varieties: \"Timoth Hay Lend Variety Blend\", \"Herbal Greeıs Mix Variety Blend\", and \"Berry & Blossom Treat Blend\".\\n\n", + " \n", + " \n", + " \n", + " 3\n", + " The item is a cat tree.\\n\n", + " \n", + " \n", + " \n", + " 4\n", + " The item is a bag of \"Chirpy Seed\" Deluxe Bird Food.\n", + " \n", " \n", " \n", "\n", - "

2 rows × 2 columns

\n", - "[2 rows x 2 columns in total]" + "

5 rows × 2 columns

\n", + "[5 rows x 2 columns in total]" ], "text/plain": [ - " ml_generate_text_llm_result \\\n", - "0 The image shows a tin of K9Guard Dog Paw Balm. \n", - "1 The item is K9 Guard Dog Hot Spot Spray. \n", + " ml_generate_text_llm_result \\\n", + "0 Based on the image, the item is a tin of dog p... \n", + "1 The item is a bottle of K9 Guard Dog Hot Spot ... \n", + "2 The item is rabbit food or treats from \"Fluffy... \n", + "3 The item is a cat tree.\\n \n", + "4 The item is a bag of \"Chirpy Seed\" Deluxe Bird... \n", "\n", " image \n", - "0 {\"access_urls\":{\"expiry_time\":\"2026-02-14T01:5... \n", - "1 {\"access_urls\":{\"expiry_time\":\"2026-02-14T01:5... \n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-02-14T03:0... \n", + "1 {\"access_urls\":{\"expiry_time\":\"2026-02-14T03:0... \n", + "2 {\"access_urls\":{\"expiry_time\":\"2026-02-14T03:0... \n", + "3 {\"access_urls\":{\"expiry_time\":\"2026-02-14T03:0... \n", + "4 {\"access_urls\":{\"expiry_time\":\"2026-02-14T03:0... \n", "\n", - "[2 rows x 2 columns]" + "[5 rows x 2 columns]" ] }, - "execution_count": 17, + "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Ask the same question on the images\n", - "df_image = df_image.head(2)\n", "answer = gemini.predict(df_image, prompt=[\"what item is it?\", df_image[\"image\"]])\n", "answer[[\"ml_generate_text_llm_result\", \"image\"]]" ] }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 28, "metadata": { "id": "IG3J3HsKhyBY" }, "outputs": [], "source": [ "# Ask different questions\n", - "df_image[\"question\"] = [\"what item is it?\", \"what color is the picture?\"]" + "df_image[\"question\"] = [\n", + " \"what item is it?\",\n", + " \"what color is the picture?\",\n", + " \"what is the product name?\",\n", + " \"is it for pets?\",\n", + " \"what is the weight of the product?\",\n", + "]" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 29, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -1362,6 +1387,12 @@ "name": "stderr", "output_type": "stream", "text": [ + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:987: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", + " return prop(*args, **kwargs)\n", "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:987: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", "instead of using `db_dtypes` in the future when available in pandas\n", "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", @@ -1369,6 +1400,84 @@ "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", " return prop(*args, **kwargs)\n" ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ml_generate_text_llm_resultimage
0The item is a tin of K9 Guard Dog Paw Balm.
1The picture has multiple colors, including white, light blue, black, and green. The background is a light gray.\\n
2Here are the three product names that are visible in the image:\\n\\n1. **Timothy Hay Blend Variety Blend**\\n2. **Herbal Greens Mix Variety Blend**\\n3. **Berry & Blossom Treat Blend**
3Yes, the item in the image is a cat tree, which is a type of furniture designed for pets, specifically cats.
4The net weight of the product is 15 oz or 257g.
\n", + "

5 rows × 2 columns

\n", + "
[5 rows x 2 columns in total]" + ], + "text/plain": [ + " ml_generate_text_llm_result \\\n", + "0 The item is a tin of K9 Guard Dog Paw Balm. \n", + "1 The picture has multiple colors, including whi... \n", + "2 Here are the three product names that are visi... \n", + "3 Yes, the item in the image is a cat tree, whic... \n", + "4 The net weight of the product is 15 oz or 257g. \n", + "\n", + " image \n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-02-14T03:0... \n", + "1 {\"access_urls\":{\"expiry_time\":\"2026-02-14T03:0... \n", + "2 {\"access_urls\":{\"expiry_time\":\"2026-02-14T03:0... \n", + "3 {\"access_urls\":{\"expiry_time\":\"2026-02-14T03:0... \n", + "4 {\"access_urls\":{\"expiry_time\":\"2026-02-14T03:0... \n", + "\n", + "[5 rows x 2 columns]" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ @@ -1378,7 +1487,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 30, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -1387,7 +1496,138 @@ "id": "KATVv2CO5RT1", "outputId": "6ec01f27-70b6-4f69-c545-e5e3c879480c" }, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:183: FutureWarning: Since upgrading the default model can cause unintended breakages, the\n", + "default model will be removed in BigFrames 3.0. Please supply an\n", + "explicit model to avoid this message.\n", + " return method(*args, **kwargs)\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:987: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", + " return prop(*args, **kwargs)\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:987: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ml_generate_embedding_resultml_generate_embedding_statusml_generate_embedding_start_secml_generate_embedding_end_seccontent
0[ 0.00638822 0.01666385 0.00451817 ... -0.02...<NA><NA>{\"access_urls\":{\"expiry_time\":\"2026-02-14T03:0...
1[ 0.00973689 0.02148374 0.00244311 ... 0.00...<NA><NA>{\"access_urls\":{\"expiry_time\":\"2026-02-14T03:0...
2[ 0.01197331 0.02138491 0.05967776 ... -0.01...<NA><NA>{\"access_urls\":{\"expiry_time\":\"2026-02-14T03:0...
3[-0.02621007 0.02797794 0.04416854 ... -0.01...<NA><NA>{\"access_urls\":{\"expiry_time\":\"2026-02-14T03:0...
4[ 0.05918613 0.01251376 0.01907326 ... 0.01...<NA><NA>{\"access_urls\":{\"expiry_time\":\"2026-02-14T03:0...
\n", + "

5 rows × 5 columns

\n", + "
[5 rows x 5 columns in total]" + ], + "text/plain": [ + " ml_generate_embedding_result \\\n", + "0 [ 0.00638822 0.01666385 0.00451817 ... -0.02... \n", + "1 [ 0.00973689 0.02148374 0.00244311 ... 0.00... \n", + "2 [ 0.01197331 0.02138491 0.05967776 ... -0.01... \n", + "3 [-0.02621007 0.02797794 0.04416854 ... -0.01... \n", + "4 [ 0.05918613 0.01251376 0.01907326 ... 0.01... \n", + "\n", + " ml_generate_embedding_status ml_generate_embedding_start_sec \\\n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", + "\n", + " ml_generate_embedding_end_sec \\\n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", + "\n", + " content \n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-02-14T03:0... \n", + "1 {\"access_urls\":{\"expiry_time\":\"2026-02-14T03:0... \n", + "2 {\"access_urls\":{\"expiry_time\":\"2026-02-14T03:0... \n", + "3 {\"access_urls\":{\"expiry_time\":\"2026-02-14T03:0... \n", + "4 {\"access_urls\":{\"expiry_time\":\"2026-02-14T03:0... \n", + "\n", + "[5 rows x 5 columns]" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Generate embeddings.\n", "embed_model = llm.MultimodalEmbeddingGenerator()\n", @@ -1408,9 +1648,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 31, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/pandas/__init__.py:151: PreviewWarning: udf is in preview.\n", + " return global_session.with_default_session(\n" + ] + } + ], "source": [ "# Construct the canonical connection ID\n", "FULL_CONNECTION_ID = f\"{PROJECT}.{LOCATION}.bigframes-default-connection\"\n", @@ -1489,9 +1738,60 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 32, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
extracted_textchunked
0CritterCuisine Pro 5000 - Automatic Pet Feeder...[\"CritterCuisine Pro 5000 - Automatic Pet Feed...
\n", + "

1 rows × 2 columns

\n", + "
[1 rows x 2 columns in total]" + ], + "text/plain": [ + " extracted_text \\\n", + "0 CritterCuisine Pro 5000 - Automatic Pet Feeder... \n", + "\n", + " chunked \n", + "0 [\"CritterCuisine Pro 5000 - Automatic Pet Feed... \n", + "\n", + "[1 rows x 2 columns]" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df_pdf = bpd.from_glob_path(\"gs://cloud-samples-data/bigquery/tutorials/cymbal-pets/documents/*\", name=\"pdf\")\n", "\n", @@ -1509,9 +1809,36 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 33, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
0    CritterCuisine Pro 5000 - Automatic Pet Feeder...\n",
+              "0    on a level, stable surface to prevent tipping....\n",
+              "0    included)\\nto maintain the schedule during pow...\n",
+              "0    digits for Meal 1 will flash.\\n\u0000. Use the UP/D...\n",
+              "0    paperclip) for 5\\nseconds. This will reset all...\n",
+              "0    unit with a damp cloth. Do not immerse the bas...\n",
+              "0    continues,\\ncontact customer support.\\nE2: Foo...
" + ], + "text/plain": [ + "0 CritterCuisine Pro 5000 - Automatic Pet Feeder...\n", + "0 on a level, stable surface to prevent tipping....\n", + "0 included)\\nto maintain the schedule during pow...\n", + "0 digits for Meal 1 will flash.\\n\u0000. Use the UP/D...\n", + "0 paperclip) for 5\\nseconds. This will reset all...\n", + "0 unit with a damp cloth. Do not immerse the bas...\n", + "0 continues,\\ncontact customer support.\\nE2: Foo...\n", + "Name: chunked, dtype: string" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Explode the chunks to see each chunk as a separate row\n", "chunked = df_pdf[\"chunked\"].explode()\n", @@ -1527,7 +1854,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 34, "metadata": {}, "outputs": [], "source": [ @@ -1537,9 +1864,34 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 35, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:987: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" + ] + }, + { + "data": { + "text/html": [ + "
0    Now, as all books, not primarily intended as p...
" + ], + "text/plain": [ + "0 Now, as all books, not primarily intended as p...\n", + "Name: transcribed_content, dtype: string" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# The audio_transcribe function is a convenience wrapper around bigframes.bigquery.ai.generate.\n", "# Here's how to perform the same operation directly:\n", @@ -1569,9 +1921,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 36, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
0    {'status': '', 'content': 'Now, as all books, ...
" + ], + "text/plain": [ + "0 {'status': '', 'content': 'Now, as all books, ...\n", + "Name: transcription_results, dtype: struct[pyarrow]" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# To get verbose results (including status), we can extract both fields from the result struct.\n", "transcribed_content_series = transcribed_results.struct.field(\"result\")\n", @@ -1604,9 +1971,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 37, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/pandas/__init__.py:151: PreviewWarning: udf is in preview.\n", + " return global_session.with_default_session(\n" + ] + } + ], "source": [ "# Construct the canonical connection ID\n", "FULL_CONNECTION_ID = f\"{PROJECT}.{LOCATION}.bigframes-default-connection\"\n", @@ -1646,9 +2022,33 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 38, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/utils.py:228: PreviewWarning: The JSON-related API `parse_json` is in preview. Its behavior may\n", + "change in future versions.\n", + " warnings.warn(bfe.format_message(msg), category=bfe.PreviewWarning)\n" + ] + }, + { + "data": { + "text/html": [ + "
0    {\"ExifOffset\":47,\"Make\":\"MyCamera\"}
" + ], + "text/plain": [ + "0 {\"ExifOffset\":47,\"Make\":\"MyCamera\"}\n", + "Name: blob_col, dtype: extension>[pyarrow]" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Create a Multimodal DataFrame from the sample image URIs\n", "exif_image_df = bpd.from_glob_path(\n", From 10570600dd7f38f1a7cf88cfaddab65625b4c288 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Wed, 18 Feb 2026 22:08:51 +0000 Subject: [PATCH 4/8] fix: fix broken link for image_blur --- .../multimodal/multimodal_dataframe.ipynb | 586 +++++++++++------- 1 file changed, 353 insertions(+), 233 deletions(-) diff --git a/notebooks/multimodal/multimodal_dataframe.ipynb b/notebooks/multimodal/multimodal_dataframe.ipynb index 4449512d36..aec56f05fb 100644 --- a/notebooks/multimodal/multimodal_dataframe.ipynb +++ b/notebooks/multimodal/multimodal_dataframe.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 11, + "execution_count": 40, "metadata": {}, "outputs": [], "source": [ @@ -83,7 +83,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 41, "metadata": {}, "outputs": [], "source": [ @@ -92,7 +92,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 42, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -130,7 +130,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 43, "metadata": {}, "outputs": [], "source": [ @@ -216,7 +216,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 44, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -237,7 +237,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 45, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -286,23 +286,23 @@ " \n", " \n", " 0\n", - " \n", + " \n", " \n", " \n", " 1\n", - " \n", + " \n", " \n", " \n", " 2\n", - " \n", + " \n", " \n", " \n", " 3\n", - " \n", + " \n", " \n", " \n", " 4\n", - " \n", + " \n", " \n", " \n", "\n", @@ -311,16 +311,16 @@ ], "text/plain": [ " image\n", - "0 {\"access_urls\":{\"expiry_time\":\"2026-02-14T02:5...\n", - "1 {\"access_urls\":{\"expiry_time\":\"2026-02-14T02:5...\n", - "2 {\"access_urls\":{\"expiry_time\":\"2026-02-14T02:5...\n", - "3 {\"access_urls\":{\"expiry_time\":\"2026-02-14T02:5...\n", - "4 {\"access_urls\":{\"expiry_time\":\"2026-02-14T02:5...\n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5...\n", + "1 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5...\n", + "2 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5...\n", + "3 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5...\n", + "4 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5...\n", "\n", "[5 rows x 1 columns]" ] }, - "execution_count": 16, + "execution_count": 45, "metadata": {}, "output_type": "execute_result" } @@ -351,7 +351,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 46, "metadata": { "id": "YYYVn7NDH0Me" }, @@ -399,7 +399,7 @@ " \n", " \n", " 0\n", - " \n", + " \n", " alice\n", " image/png\n", " 1591240\n", @@ -407,7 +407,7 @@ " \n", " \n", " 1\n", - " \n", + " \n", " bob\n", " image/png\n", " 1182951\n", @@ -415,7 +415,7 @@ " \n", " \n", " 2\n", - " \n", + " \n", " bob\n", " image/png\n", " 1520884\n", @@ -423,7 +423,7 @@ " \n", " \n", " 3\n", - " \n", + " \n", " alice\n", " image/png\n", " 1235401\n", @@ -431,7 +431,7 @@ " \n", " \n", " 4\n", - " \n", + " \n", " bob\n", " image/png\n", " 1591923\n", @@ -444,11 +444,11 @@ ], "text/plain": [ " image author content_type \\\n", - "0 {\"access_urls\":{\"expiry_time\":\"2026-02-14T02:5... alice image/png \n", - "1 {\"access_urls\":{\"expiry_time\":\"2026-02-14T02:5... bob image/png \n", - "2 {\"access_urls\":{\"expiry_time\":\"2026-02-14T02:5... bob image/png \n", - "3 {\"access_urls\":{\"expiry_time\":\"2026-02-14T02:5... alice image/png \n", - "4 {\"access_urls\":{\"expiry_time\":\"2026-02-14T02:5... bob image/png \n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... alice image/png \n", + "1 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... bob image/png \n", + "2 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... bob image/png \n", + "3 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... alice image/png \n", + "4 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... bob image/png \n", "\n", " size updated \n", "0 1591240 2025-03-20 17:45:04+00:00 \n", @@ -460,7 +460,7 @@ "[5 rows x 5 columns]" ] }, - "execution_count": 17, + "execution_count": 46, "metadata": {}, "output_type": "execute_result" } @@ -486,7 +486,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 47, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -499,7 +499,7 @@ { "data": { "text/html": [ - "" + "" ], "text/plain": [ "" @@ -511,7 +511,7 @@ { "data": { "text/html": [ - "" + "" ], "text/plain": [ "" @@ -542,7 +542,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -579,30 +579,72 @@ " bigquery_connection=FULL_CONNECTION_ID,\n", " packages=[\"opencv-python\", \"numpy\", \"requests\"],\n", ")\n", - "def image_blur(src_rt: str, dst_rt: str, ext: str, kx: int, ky: int, verbose: bool) -> str:\n", + "def image_blur(\n", + " src_rt: str, dst_rt: str, ext: str, kx: int, ky: int, verbose: bool\n", + ") -> str:\n", " import json\n", " import cv2 as cv\n", " import numpy as np\n", " import requests\n", + " import base64\n", " from requests import adapters\n", - " try:\n", + "\n", + " try: \n", " session = requests.Session()\n", " session.mount(\"https://\", adapters.HTTPAdapter(max_retries=3))\n", - " src_obj, dst_obj = json.loads(src_rt), json.loads(dst_rt)\n", - " src_url, dst_url = src_obj[\"access_urls\"][\"read_url\"], dst_obj[\"access_urls\"][\"write_url\"]\n", + " src_obj = json.loads(src_rt)\n", + " src_url = src_obj[\"access_urls\"][\"read_url\"]\n", + " \n", " response = session.get(src_url, timeout=30)\n", " response.raise_for_status()\n", + " \n", " img = cv.imdecode(np.frombuffer(response.content, np.uint8), cv.IMREAD_UNCHANGED)\n", + " if img is None:\n", + " raise ValueError(\"cv.imdecode failed\")\n", + " \n", " kx, ky = int(kx), int(ky)\n", - " img_blurred = cv.blur(img, (kx, ky))\n", - " ext = ext.lower()\n", + " img_blurred = cv.blur(img, ksize=(kx, ky))\n", + " \n", + " ext = ext or \".jpeg\"\n", " success, encoded = cv.imencode(ext, img_blurred)\n", " if not success:\n", " raise ValueError(f\"cv.imencode failed for extension {ext}\")\n", - " session.put(dst_url, data=encoded.tobytes(), headers={\"Content-Type\": \"image/\" + ext.replace(\".\", \"\")}, timeout=30).raise_for_status()\n", - " return json.dumps({\"status\": \"\", \"content\": dst_rt}) if verbose else dst_obj[\"objectref\"][\"uri\"]\n", - " except Exception as e:\n", - " if verbose: return json.dumps({\"status\": str(e), \"content\": \"\"})\n", + " \n", + " # Handle two output modes\n", + " if dst_rt: # GCS/Series output mode\n", + " dst_obj = json.loads(dst_rt)\n", + " dst_url = dst_obj[\"access_urls\"][\"write_url\"]\n", + " \n", + " ext_ct = ext.replace(\".\", \"\")\n", + " ext_mappings = {\"jpg\": \"jpeg\", \"tif\": \"tiff\"}\n", + " ext_ct = ext_mappings.get(ext_ct, ext_ct)\n", + " content_type = \"image/\" + ext_ct\n", + " \n", + " session.put(dst_url, data=encoded.tobytes(), headers={\"Content-Type\": content_type}, timeout=30).raise_for_status()\n", + " \n", + " # Extract URI for OBJ.MAKE_REF compatibility \n", + " uri = dst_obj[\"objectref\"][\"uri\"]\n", + " \n", + " if verbose:\n", + " return json.dumps({\"status\": \"\", \"content\": uri})\n", + " else:\n", + " return uri # ReturN simple URI string, not full JSON\n", + " \n", + " else: # BigQuery bytes output mode \n", + " image_bytes = encoded.tobytes()\n", + " if verbose:\n", + " result = {\n", + " \"status\": \"\",\n", + " \"content\": base64.b64encode(image_bytes).decode(),\n", + " \"content_type\": f\"image/{ext_ct}\" if 'ext_ct' in locals() else \"image/jpeg\"\n", + " }\n", + " return json.dumps(result)\n", + " else: \n", + " return base64.b64encode(image_bytes).decode()\n", + " \n", + " except Exception as e: \n", + " if verbose: \n", + " return json.dumps({\"status\": str(e), \"content\": \"\"}) \n", " raise e\n", "\n", "@bpd.udf(\n", @@ -613,7 +655,10 @@ " bigquery_connection=FULL_CONNECTION_ID,\n", " packages=[\"opencv-python\", \"numpy\", \"requests\"],\n", ")\n", - "def image_resize(src_rt: str, dst_rt: str, ext: str, dx: int, dy: int, fx: float, fy: float, verbose: bool) -> str:\n", + "def image_resize(\n", + " src_rt: str, dst_rt: str, ext: str, dx: int, dy: int,\n", + " fx: float, fy: float, verbose: bool\n", + ") -> str:\n", " import json\n", " import cv2 as cv\n", " import numpy as np\n", @@ -627,12 +672,28 @@ " response = session.get(src_url, timeout=30)\n", " response.raise_for_status()\n", " img = cv.imdecode(np.frombuffer(response.content, np.uint8), cv.IMREAD_UNCHANGED)\n", + " if img is None:\n", + " raise ValueError(\"cv.imdecode failed\")\n", " img_resized = cv.resize(img, dsize=(dx, dy), fx=fx, fy=fy)\n", + " ext = ext or \".jpeg\"\n", " success, encoded = cv.imencode(ext, img_resized)\n", - " session.put(dst_url, data=encoded.tobytes(), headers={\"Content-Type\": \"image/\" + ext.replace(\".\", \"\")}, timeout=30).raise_for_status()\n", - " return json.dumps({\"status\": \"\", \"content\": dst_rt}) if verbose else dst_obj[\"objectref\"][\"uri\"]\n", + " if not success:\n", + " raise ValueError(f\"cv.imencode failed for extension {ext}\")\n", + " ext_ct = ext.replace(\".\", \"\").lower()\n", + " ext_mappings = {\"jpg\": \"jpeg\", \"tif\": \"tiff\"}\n", + " ext_ct = ext_mappings.get(ext_ct, ext_ct)\n", + " content_type = \"image/\" + ext_ct\n", + " session.put(\n", + " dst_url, data=encoded.tobytes(),\n", + " headers={\"Content-Type\": content_type},\n", + " timeout=30\n", + " ).raise_for_status()\n", + " return json.dumps(\n", + " {\"status\": \"\", \"content\": dst_rt}\n", + " ) if verbose else dst_obj[\"objectref\"][\"uri\"]\n", " except Exception as e:\n", - " if verbose: return json.dumps({\"status\": str(e), \"content\": \"\"})\n", + " if verbose:\n", + " return json.dumps({\"status\": str(e), \"content\": \"\"})\n", " raise e\n", "\n", "@bpd.udf(\n", @@ -643,7 +704,10 @@ " bigquery_connection=FULL_CONNECTION_ID,\n", " packages=[\"opencv-python\", \"numpy\", \"requests\"],\n", ")\n", - "def image_normalize(src_rt: str, dst_rt: str, ext: str, alpha: float, beta: float, norm_type: str, verbose: bool) -> str:\n", + "def image_normalize(\n", + " src_rt: str, dst_rt: str, ext: str, alpha: float,\n", + " beta: float, norm_type: str, verbose: bool\n", + ") -> str:\n", " import json\n", " import cv2 as cv\n", " import numpy as np\n", @@ -657,13 +721,28 @@ " response = session.get(src_url, timeout=30)\n", " response.raise_for_status()\n", " img = cv.imdecode(np.frombuffer(response.content, np.uint8), cv.IMREAD_UNCHANGED)\n", + " if img is None:\n", + " raise ValueError(\"cv.imdecode failed\")\n", " norm_map = {\"inf\": cv.NORM_INF, \"l1\": cv.NORM_L1, \"l2\": cv.NORM_L2, \"minmax\": cv.NORM_MINMAX}\n", " img_normalized = cv.normalize(img, None, alpha=alpha, beta=beta, norm_type=norm_map[norm_type])\n", + " ext = ext or \".jpeg\"\n", " success, encoded = cv.imencode(ext, img_normalized)\n", - " session.put(dst_url, data=encoded.tobytes(), headers={\"Content-Type\": \"image/\" + ext.replace(\".\", \"\")}, timeout=30).raise_for_status()\n", - " return json.dumps({\"status\": \"\", \"content\": dst_rt}) if verbose else dst_obj[\"objectref\"][\"uri\"]\n", + " if not success:\n", + " raise ValueError(f\"cv.imencode failed for extension {ext}\")\n", + " ext_ct = ext.replace(\".\", \"\").lower()\n", + " ext_mappings = {\"jpg\": \"jpeg\", \"tif\": \"tiff\"}\n", + " ext_ct = ext_mappings.get(ext_ct, ext_ct)\n", + " content_type = \"image/\" + ext_ct\n", + " session.put(\n", + " dst_url, data=encoded.tobytes(),\n", + " headers={\"Content-Type\": content_type}, timeout=30\n", + " ).raise_for_status()\n", + " return json.dumps(\n", + " {\"status\": \"\", \"content\": dst_rt}\n", + " ) if verbose else dst_obj[\"objectref\"][\"uri\"]\n", " except Exception as e:\n", - " if verbose: return json.dumps({\"status\": str(e), \"content\": \"\"})\n", + " if verbose:\n", + " return json.dumps({\"status\": str(e), \"content\": \"\"})\n", " raise e\n", "\n", "def apply_transformation(series, dst_folder, udf, *args, verbose=False):\n", @@ -679,13 +758,24 @@ " \"dst_rt\": get_runtime_json_str(dst_blob, mode=\"RW\"),\n", " \"ext\": dst_uri.str.extract(r\"(\\.[0-9a-zA-Z]+$)\")[0]\n", " })\n", - " res = df_transform.apply(udf, axis=1, args=(*args, verbose))\n", + " res = df_transform[[\"src_rt\", \"dst_rt\", \"ext\"]].apply(\n", + " udf, axis=1, args=(*args, verbose)\n", + " )\n", " return res if verbose else res.str.to_blob(connection=FULL_CONNECTION_ID)\n", "\n", "# Apply transformations\n", - "df_image[\"blurred\"] = apply_transformation(df_image[\"image\"], f\"gs://{OUTPUT_BUCKET}/image_blur_transformed/\", image_blur, 20, 20)\n", - "df_image[\"resized\"] = apply_transformation(df_image[\"image\"], f\"gs://{OUTPUT_BUCKET}/image_resize_transformed/\", image_resize, 300, 200, 0.0, 0.0)\n", - "df_image[\"normalized\"] = apply_transformation(df_image[\"image\"], f\"gs://{OUTPUT_BUCKET}/image_normalize_transformed/\", image_normalize, 50.0, 150.0, \"minmax\")" + "df_image[\"blurred\"] = apply_transformation(\n", + " df_image[\"image\"], f\"gs://{OUTPUT_BUCKET}/image_blur_transformed_v2/\",\n", + " image_blur, 20, 20\n", + ")\n", + "df_image[\"resized\"] = apply_transformation(\n", + " df_image[\"image\"], f\"gs://{OUTPUT_BUCKET}/image_resize_transformed_v2/\",\n", + " image_resize, 300, 200, 0.0, 0.0\n", + ")\n", + "df_image[\"normalized\"] = apply_transformation(\n", + " df_image[\"image\"], f\"gs://{OUTPUT_BUCKET}/image_normalize_transformed_v2/\",\n", + " image_normalize, 50.0, 150.0, \"minmax\"\n", + ")" ] }, { @@ -708,7 +798,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 49, "metadata": { "id": "YYYVn7NDH0Me" }, @@ -748,23 +838,23 @@ " \n", " \n", " 0\n", - " {\"status\": \"\", \"content\": \"{\\\"access_urls\\\":{\\...\n", + " {\"status\": \"\", \"content\": \"gs://bigframes_blob...\n", " \n", " \n", " 1\n", - " {\"status\": \"\", \"content\": \"{\\\"access_urls\\\":{\\...\n", + " {\"status\": \"\", \"content\": \"gs://bigframes_blob...\n", " \n", " \n", " 2\n", - " {\"status\": \"\", \"content\": \"{\\\"access_urls\\\":{\\...\n", + " {\"status\": \"\", \"content\": \"gs://bigframes_blob...\n", " \n", " \n", " 3\n", - " {\"status\": \"\", \"content\": \"{\\\"access_urls\\\":{\\...\n", + " {\"status\": \"\", \"content\": \"gs://bigframes_blob...\n", " \n", " \n", " 4\n", - " {\"status\": \"\", \"content\": \"{\\\"access_urls\\\":{\\...\n", + " {\"status\": \"\", \"content\": \"gs://bigframes_blob...\n", " \n", " \n", "\n", @@ -773,22 +863,26 @@ ], "text/plain": [ " blurred_verbose\n", - "0 {\"status\": \"\", \"content\": \"{\\\"access_urls\\\":{\\...\n", - "1 {\"status\": \"\", \"content\": \"{\\\"access_urls\\\":{\\...\n", - "2 {\"status\": \"\", \"content\": \"{\\\"access_urls\\\":{\\...\n", - "3 {\"status\": \"\", \"content\": \"{\\\"access_urls\\\":{\\...\n", - "4 {\"status\": \"\", \"content\": \"{\\\"access_urls\\\":{\\...\n", + "0 {\"status\": \"\", \"content\": \"gs://bigframes_blob...\n", + "1 {\"status\": \"\", \"content\": \"gs://bigframes_blob...\n", + "2 {\"status\": \"\", \"content\": \"gs://bigframes_blob...\n", + "3 {\"status\": \"\", \"content\": \"gs://bigframes_blob...\n", + "4 {\"status\": \"\", \"content\": \"gs://bigframes_blob...\n", "\n", "[5 rows x 1 columns]" ] }, - "execution_count": 20, + "execution_count": 49, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "df_image[\"blurred_verbose\"] = apply_transformation(df_image[\"image\"], f\"gs://{OUTPUT_BUCKET}/image_blur_transformed_verbose/\", image_blur, 20, 20, verbose=True)\n", + "df_image[\"blurred_verbose\"] = apply_transformation(\n", + " df_image[\"image\"],\n", + " f\"gs://{OUTPUT_BUCKET}/image_blur_transformed_verbose_v2/\",\n", + " image_blur, 20, 20, verbose=True\n", + ")\n", "df_image[[\"blurred_verbose\"]]" ] }, @@ -803,7 +897,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 50, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -816,7 +910,7 @@ { "data": { "text/html": [ - "" + "" ], "text/plain": [ "" @@ -828,7 +922,7 @@ { "data": { "text/html": [ - "" + "" ], "text/plain": [ "" @@ -855,7 +949,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 51, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -878,14 +972,23 @@ } ], "source": [ - "df_image[\"blurred\"] = apply_transformation(df_image[\"image\"], f\"gs://{OUTPUT_BUCKET}/image_blur_transformed/\", image_blur, 20, 20)\n", - "df_image[\"resized\"] = apply_transformation(df_image[\"image\"], f\"gs://{OUTPUT_BUCKET}/image_resize_transformed/\", image_resize, 300, 200, 0.0, 0.0)\n", - "df_image[\"normalized\"] = apply_transformation(df_image[\"image\"], f\"gs://{OUTPUT_BUCKET}/image_normalize_transformed/\", image_normalize, 50.0, 150.0, \"minmax\")" + "df_image[\"blurred\"] = apply_transformation(\n", + " df_image[\"image\"], f\"gs://{OUTPUT_BUCKET}/image_blur_transformed/\",\n", + " image_blur, 20, 20\n", + ")\n", + "df_image[\"resized\"] = apply_transformation(\n", + " df_image[\"image\"], f\"gs://{OUTPUT_BUCKET}/image_resize_transformed/\",\n", + " image_resize, 300, 200, 0.0, 0.0\n", + ")\n", + "df_image[\"normalized\"] = apply_transformation(\n", + " df_image[\"image\"], f\"gs://{OUTPUT_BUCKET}/image_normalize_transformed/\",\n", + " image_normalize, 50.0, 150.0, \"minmax\"\n", + ")" ] }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 52, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -905,7 +1008,11 @@ ], "source": [ "# You can also chain functions together\n", - "df_image[\"blur_resized\"] = apply_transformation(df_image[\"blurred\"], f\"gs://{OUTPUT_BUCKET}/image_blur_resize_transformed/\", image_resize, 300, 200, 0.0, 0.0)" + "df_image[\"blur_resized\"] = apply_transformation(\n", + " df_image[\"blurred\"],\n", + " f\"gs://{OUTPUT_BUCKET}/image_blur_resize_transformed_v2/\",\n", + " image_resize, 300, 200, 0.0, 0.0\n", + ")" ] }, { @@ -926,7 +1033,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 53, "metadata": {}, "outputs": [ { @@ -964,23 +1071,23 @@ " \n", " \n", " 0\n", - " {\"status\": \"\", \"content\": \"{\\\"access_urls\\\":{\\...\n", + " {\"status\": \"\", \"content\": \"gs://bigframes_blob...\n", " \n", " \n", " 1\n", - " {\"status\": \"\", \"content\": \"{\\\"access_urls\\\":{\\...\n", + " {\"status\": \"\", \"content\": \"gs://bigframes_blob...\n", " \n", " \n", " 2\n", - " {\"status\": \"\", \"content\": \"{\\\"access_urls\\\":{\\...\n", + " {\"status\": \"\", \"content\": \"gs://bigframes_blob...\n", " \n", " \n", " 3\n", - " {\"status\": \"\", \"content\": \"{\\\"access_urls\\\":{\\...\n", + " {\"status\": \"\", \"content\": \"gs://bigframes_blob...\n", " \n", " \n", " 4\n", - " {\"status\": \"\", \"content\": \"{\\\"access_urls\\\":{\\...\n", + " {\"status\": \"\", \"content\": \"gs://bigframes_blob...\n", " \n", " \n", "\n", @@ -989,28 +1096,41 @@ ], "text/plain": [ " blurred_verbose\n", - "0 {\"status\": \"\", \"content\": \"{\\\"access_urls\\\":{\\...\n", - "1 {\"status\": \"\", \"content\": \"{\\\"access_urls\\\":{\\...\n", - "2 {\"status\": \"\", \"content\": \"{\\\"access_urls\\\":{\\...\n", - "3 {\"status\": \"\", \"content\": \"{\\\"access_urls\\\":{\\...\n", - "4 {\"status\": \"\", \"content\": \"{\\\"access_urls\\\":{\\...\n", + "0 {\"status\": \"\", \"content\": \"gs://bigframes_blob...\n", + "1 {\"status\": \"\", \"content\": \"gs://bigframes_blob...\n", + "2 {\"status\": \"\", \"content\": \"gs://bigframes_blob...\n", + "3 {\"status\": \"\", \"content\": \"gs://bigframes_blob...\n", + "4 {\"status\": \"\", \"content\": \"gs://bigframes_blob...\n", "\n", "[5 rows x 1 columns]" ] }, - "execution_count": 24, + "execution_count": 53, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "df_image[\"blurred_verbose\"] = apply_transformation(df_image[\"image\"], f\"gs://{OUTPUT_BUCKET}/image_blur_transformed_verbose/\", image_blur, 20, 20, verbose=True)\n", + "df_image[\"blurred_verbose\"] = apply_transformation(\n", + " df_image[\"image\"],\n", + " f\"gs://{OUTPUT_BUCKET}/image_blur_transformed_verbose_v2/\",\n", + " image_blur, 20, 20, verbose=True\n", + ")\n", "df_image[[\"blurred_verbose\"]]" ] }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 54, + "metadata": {}, + "outputs": [], + "source": [ + "df_image = df_image._cached()" + ] + }, + { + "cell_type": "code", + "execution_count": 55, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -1076,68 +1196,68 @@ " \n", " \n", " 0\n", - " \n", + " \n", " alice\n", " image/png\n", " 1591240\n", " 2025-03-20 17:45:04+00:00\n", - " \n", - " \n", - " \n", - " {\"status\": \"\", \"content\": \"{\\\"access_urls\\\":{\\\"expiry_time\\\":\\\"2026-02-14T02:58:13Z\\\",\\\"read_url\\\":\\\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed_verbose%2Fk9-guard-dog-paw-balm.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260213%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20260213T205813Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&X-Goog-Signature=8d2b714a76ac46c5af4a505f0867b0fcc3bd56b5190382ca42565dbd00e0a6e8aca32d8ac1db6003b847b92af4fcf6f2d2bf1175b3f5e804ed1c5505a095e4698462782a0695009c377d3c1d8ca00855d96bdeffc1b26c4d0cc4c3a6b58e343cb83c8eb0fcb206288be9b97b4759f2c40947f59f7d4816ad344192b18be3ce1a03e3cffed83d39f66b3572e542e7886ca46e5e8fd67b4ad2072ff3ab05cbc66bbb619109011ca70323237e3c97178d56c416bbf8c544b8beebfb53c5a3d9cf1513efd6b6edad603e41f21cb8375d25cddbc524a39213cf9d0da518292b6cbddce6a31468f554a6aa20e40752fb00b50b49fdabe77fa474e8fcb8e0b97280736e\\\",\\\"write_url\\\":\\\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed_verbose%2Fk9-guard-dog-paw-balm.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260213%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20260213T205813Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&X-Goog-Signature=60dfe615382ea90d6af20427f885980794fe943f09b15457f1f26ed353f59b4abfcd0805578e9ba6c0b1688d1307c439b8294a3f74d2b0b43e89fc23d4b8802e06c713565394d6a962edbac9b874f309edde8c26c95c79d36d71f66d99dcae13d3da56044a4b9e995def8adfca7ce113e8c359cee784a3b99f582bace1271d4e0f9832fa2935d6cc847c7ffe99244257b3aa0920264027d8089566406c99792c19f933bb55db5b8f46d7755fc6aa868a2e9441b44c52839d99dd2ccc826a06c5b5d0b8575c4e836d2036e58d68dcaf41a7aefd9c3fbaa9b57bd6d6f20de38cbec71e66ec8f77c8470c69a8c001f36dd0a47717db408cc3ea7b747bec084a4d1f\\\"},\\\"objectref\\\":{\\\"authorizer\\\":\\\"bigframes-dev.us.bigframes-default-connection\\\",\\\"uri\\\":\\\"gs://bigframes_blob_test/image_blur_transformed_verbose/k9-guard-dog-paw-balm.png\\\"}}\"}\n", - " \n", + " \n", + " \n", + " \n", + " {\"status\": \"\", \"content\": \"gs://bigframes_blob_test/image_blur_transformed_verbose_v2/k9-guard-dog-paw-balm.png\"}\n", + " \n", " \n", " \n", " 1\n", - " \n", + " \n", " bob\n", " image/png\n", " 1182951\n", " 2025-03-20 17:45:02+00:00\n", - " \n", - " \n", - " \n", - " {\"status\": \"\", \"content\": \"{\\\"access_urls\\\":{\\\"expiry_time\\\":\\\"2026-02-14T02:58:13Z\\\",\\\"read_url\\\":\\\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed_verbose%2Fk9-guard-dog-hot-spot-spray.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260213%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20260213T205813Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&X-Goog-Signature=0eb022977eec931fe33382899ed9f31ea6bc3b49b772a7e9f79a76e0a22c16267591fb28589a42dfe297361730acbd5391c2b1a28c07584e2be7f2e911eeae824f43dd85930a6f7f4ca07a0dba27af4be7750b000a4a2a961409d9cf6d9ab694cb2f5ef9b5747cbd569bb0689e384d3064636bc8a1c68a7a6a20aa5511d4fd51f496be35257b801c841f797a4a295c35827af35c6d2329676273248f101a02bb3a329561bcd5a8be6655f93b034a6338f3828e117b14e7786068887272ba42ff5f24f87c5f8b4a819a810af1d5c67d0e3c50fe0b26715603e74edadf5fc571d92f4a251758921cb3c5136286d621087eb2ceafb7a4a97eea6ae2670031cccebb\\\",\\\"write_url\\\":\\\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed_verbose%2Fk9-guard-dog-hot-spot-spray.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260213%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20260213T205813Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&X-Goog-Signature=3b15d6570ebfd1dfbe8a6b0025a7eb80c9d7f6747a27c44586099f291d98ea40dcea90d6fd282fec0a9ce3e609372918e51b10c93515920642bc629129779e9dbd8dceef8cc72dac96e9da344cf44e0f5f1de4f6ba8a4e4c8fa4851df9203dd5904068d7b519b4740638ceade4f0bd70ac2f0f33cc7cd7770ea6363142c3d68c8ac59e845919f2f053d2818218416bff2edabf37d60f7954f6d55520a979700cbf61c186f21a607af48fbdeefb38953711d919efd3e4d5caee5fad36dbe798469f3c6bdd4d806cd7730ea448eee78fe94d3465528e2fe4bdc91369fd6de38f8c1d16bace9c4c898a91fc8c342fe9eb6c09b2f35aaed36910cedbda75605ea1b1\\\"},\\\"objectref\\\":{\\\"authorizer\\\":\\\"bigframes-dev.us.bigframes-default-connection\\\",\\\"uri\\\":\\\"gs://bigframes_blob_test/image_blur_transformed_verbose/k9-guard-dog-hot-spot-spray.png\\\"}}\"}\n", - " \n", + " \n", + " \n", + " \n", + " {\"status\": \"\", \"content\": \"gs://bigframes_blob_test/image_blur_transformed_verbose_v2/k9-guard-dog-hot-spot-spray.png\"}\n", + " \n", " \n", " \n", " 2\n", - " \n", + " \n", " bob\n", " image/png\n", " 1520884\n", " 2025-03-20 17:44:55+00:00\n", - " \n", - " \n", - " \n", - " {\"status\": \"\", \"content\": \"{\\\"access_urls\\\":{\\\"expiry_time\\\":\\\"2026-02-14T02:58:13Z\\\",\\\"read_url\\\":\\\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed_verbose%2Ffluffy-buns-chinchilla-food-variety-pack.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260213%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20260213T205813Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&X-Goog-Signature=19c6bdad06410bb165f07c62722ea7bc2aa417422c84b588bd63a24074bc8cfa9a0d01c7ed44203ca5ba4a368cacd7425f3525d0bb6db02eaada53711ad0f065865d2ce3c0e93d12218e887f6449b510e0b8fecf6c2ba27968b6b3c79d7453cfc514306bd9e07e745892d630a2b33a66be8eafaffea22a7fc79abd1526ca24efde5bfa1c467da87c8f8b86174cb37e8792e546659242b01ea82972bb5469cc5941862e4a222cdd57ae62d928eacdb20301ad049f44b6f97b35e0275a90b84af2a4e320a411908b16da0e1de7be894dbc1c88d3c828fd3924c359b61bfcdfc527a2e6338c44e46df68258ea714a84f800b3554d691c8e1f00965b6db8a130d39d\\\",\\\"write_url\\\":\\\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed_verbose%2Ffluffy-buns-chinchilla-food-variety-pack.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260213%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20260213T205813Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&X-Goog-Signature=29eb2181bb94a9a2d33f02f8c1a00d98a399cfeb6fc344e47188e441fb5d5866f3ff67d79219f0bc8649918e08eb55cf9018aed588255428f25712570908ab4d7bdd5c855c5d9d472d749154cb6a1535d466ddf134f9cf373816fb511c77f19cc19421b8d20a19c3cfc79f197e09b0d9b9dcc8bdb02e823256f142a9b95a6b9425ed1b7696dc282ae814fb594e45e62c493334c4e3628836728fe396f33cc092fccd762d71e560223f488b76874cf8a21ee3dbaf2fe23faaab4062e3f42086bc9c46845e913bd8becd63e3877b1cfd2a806f85c082add4f34532bf91623b8eed95e902721516df4ba8fe715581ccdfbb0c69dc56652beee1434d08126064fae2\\\"},\\\"objectref\\\":{\\\"authorizer\\\":\\\"bigframes-dev.us.bigframes-default-connection\\\",\\\"uri\\\":\\\"gs://bigframes_blob_test/image_blur_transformed_verbose/fluffy-buns-chinchilla-food-variety-pack.png\\\"}}\"}\n", - " \n", + " \n", + " \n", + " \n", + " {\"status\": \"\", \"content\": \"gs://bigframes_blob_test/image_blur_transformed_verbose_v2/fluffy-buns-chinchilla-food-variety-pack.png\"}\n", + " \n", " \n", " \n", " 3\n", - " \n", + " \n", " alice\n", " image/png\n", " 1235401\n", " 2025-03-20 17:45:19+00:00\n", - " \n", - " \n", - " \n", - " {\"status\": \"\", \"content\": \"{\\\"access_urls\\\":{\\\"expiry_time\\\":\\\"2026-02-14T02:58:13Z\\\",\\\"read_url\\\":\\\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed_verbose%2Fpurrfect-perch-cat-scratcher.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260213%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20260213T205813Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&X-Goog-Signature=6d400c9b92869c7a979ff223125518ec9f7fb37197e25b7bfbc0e0376830ba344a1f5b3a9f105d45faf9138b2e67e18f778a144a6cf4bb832d73efdd49003c95c50123c4de9bde28c3b70695bfbfb655d16c2aa92a6fad861d071af509202b82b71e99e0aa992367fb266bda24074acbcfc7201b087d4fb62b9c42d47482406ad9b87dda7ca803274fceca26d7e20e899ba6b6c4a07f0af67ed5b12f7af72d311777dd99e276bda3e3b30e3d723e70d0bd4d6a53abdb02f1108319d6270ee9e5fe30c3782f31f33c6fafce71fb533609358ad970e7b6d35b8486700ba3e7d7f7bfb9fa2f13d5fe0edd83a8201837288d919c3da7ff967e0a1340233de4ee4bf2\\\",\\\"write_url\\\":\\\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed_verbose%2Fpurrfect-perch-cat-scratcher.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260213%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20260213T205813Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&X-Goog-Signature=4f6c06e462a7faa5f1f3a341ba115d84cdb8a674f58d9678f3c6b1e69c668259d144dbe59b6a73739758a0080fe7931e13026f0346df9eecb3f4f808deb199e7b2edcb7c6d3933e591b9689cad95fe85a02aabc98a59c80fcd695fc655f9501840eebbcf1ebf03433dc9370985599c14a65f8f9b763f14571977688d5da6e56b6979cec3200499dbd8a81fe58786558257e93dfc159a866878468a36cbb8abecce2c4845ab930862f5cd74dce148fd5103279361fc0343735cda980311222c0710236002648d6b6b735d68e2fded9e1c96501a8f080a2c4e913183e752b61912ea1ab3f09586ef1e2564744a5f5d477526b7eb49702c173752cd15c91b0a283d\\\"},\\\"objectref\\\":{\\\"authorizer\\\":\\\"bigframes-dev.us.bigframes-default-connection\\\",\\\"uri\\\":\\\"gs://bigframes_blob_test/image_blur_transformed_verbose/purrfect-perch-cat-scratcher.png\\\"}}\"}\n", - " \n", + " \n", + " \n", + " \n", + " {\"status\": \"\", \"content\": \"gs://bigframes_blob_test/image_blur_transformed_verbose_v2/purrfect-perch-cat-scratcher.png\"}\n", + " \n", " \n", " \n", " 4\n", - " \n", + " \n", " bob\n", " image/png\n", " 1591923\n", " 2025-03-20 17:44:47+00:00\n", - " \n", - " \n", - " \n", - " {\"status\": \"\", \"content\": \"{\\\"access_urls\\\":{\\\"expiry_time\\\":\\\"2026-02-14T02:58:13Z\\\",\\\"read_url\\\":\\\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed_verbose%2Fchirpy-seed-deluxe-bird-food.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260213%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20260213T205813Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&X-Goog-Signature=ae50ee928cb422e14117cd4e6743c5501ec897036ee52e285ef18a65cb083f50c34fb7f7b859df5ef9cc05a522726cb89a86270a7b48a02082ec4de96fa351340b4b07bd5cd2b097aa2d86a9272ddb0ab61725041e7ddfeb5c3851df473ab3f9dbbcaa0956847f7da76ee2706b5b999e02c7d4058d66566573cbc9e15e6bf8c7ff92ef7470ad4730ab3214d33447a2fa29ac3f12909b9e68f77ab9e879b24b320d16ec7ebddd29cf482f79bfc62d06ea623868f6b79fc40b912efa78d81f1a8c3b368ca7f3b30881bc269e9a7ccc91ad312a41ff378c808e5ae9016711333708d1f87b090dce503ba37da918b0b0a776554375a9cf5be61a934c3f186e24da10\\\",\\\"write_url\\\":\\\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed_verbose%2Fchirpy-seed-deluxe-bird-food.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260213%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20260213T205813Z&X-Goog-Expires=21600&X-Goog-SignedHeaders=host&X-Goog-Signature=0046282ccd9e822e87f190d24935b0adab7f38ed6ecfd4c4c0b0c6c9049267684796306f0058e3d8aaf91c166500c25bed719d5953989262050faec7b4dd1f9d3e1c6a71ecdcf45a0e92e61cf962010a2268770a8e75d68849ba98742c874f7e49034fedb01fc1e5d8f37321b36f66f386b6d1de964de10977f6df5896b47451219b65c4f9a254855216402bacd2f88ebdc473dd95502d51f1b8b2b198ea1b1bb845bcd0eefa498f5e6486457d10e4d636c864d7c184bb03a03f66407401e556897109caeda270b32792f2ddff82d8ce3841b77ad993209803f2abaae7e8f78a3c4cfa9eca0ef9d7b920b79419572bb1c794759ef87b8b90b718f6454b8c4f74\\\"},\\\"objectref\\\":{\\\"authorizer\\\":\\\"bigframes-dev.us.bigframes-default-connection\\\",\\\"uri\\\":\\\"gs://bigframes_blob_test/image_blur_transformed_verbose/chirpy-seed-deluxe-bird-food.png\\\"}}\"}\n", - " \n", + " \n", + " \n", + " \n", + " {\"status\": \"\", \"content\": \"gs://bigframes_blob_test/image_blur_transformed_verbose_v2/chirpy-seed-deluxe-bird-food.png\"}\n", + " \n", " \n", " \n", "\n", @@ -1146,11 +1266,11 @@ ], "text/plain": [ " image author content_type \\\n", - "0 {\"access_urls\":{\"expiry_time\":\"2026-02-14T02:5... alice image/png \n", - "1 {\"access_urls\":{\"expiry_time\":\"2026-02-14T02:5... bob image/png \n", - "2 {\"access_urls\":{\"expiry_time\":\"2026-02-14T02:5... bob image/png \n", - "3 {\"access_urls\":{\"expiry_time\":\"2026-02-14T02:5... alice image/png \n", - "4 {\"access_urls\":{\"expiry_time\":\"2026-02-14T02:5... bob image/png \n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... alice image/png \n", + "1 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... bob image/png \n", + "2 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... bob image/png \n", + "3 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... alice image/png \n", + "4 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... bob image/png \n", "\n", " size updated \\\n", "0 1591240 2025-03-20 17:45:04+00:00 \n", @@ -1160,44 +1280,44 @@ "4 1591923 2025-03-20 17:44:47+00:00 \n", "\n", " blurred \\\n", - "0 {\"access_urls\":{\"expiry_time\":\"2026-02-14T02:5... \n", - "1 {\"access_urls\":{\"expiry_time\":\"2026-02-14T02:5... \n", - "2 {\"access_urls\":{\"expiry_time\":\"2026-02-14T02:5... \n", - "3 {\"access_urls\":{\"expiry_time\":\"2026-02-14T02:5... \n", - "4 {\"access_urls\":{\"expiry_time\":\"2026-02-14T02:5... \n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... \n", + "1 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... \n", + "2 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... \n", + "3 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... \n", + "4 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... \n", "\n", " resized \\\n", - "0 {\"access_urls\":{\"expiry_time\":\"2026-02-14T02:5... \n", - "1 {\"access_urls\":{\"expiry_time\":\"2026-02-14T02:5... \n", - "2 {\"access_urls\":{\"expiry_time\":\"2026-02-14T02:5... \n", - "3 {\"access_urls\":{\"expiry_time\":\"2026-02-14T02:5... \n", - "4 {\"access_urls\":{\"expiry_time\":\"2026-02-14T02:5... \n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... \n", + "1 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... \n", + "2 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... \n", + "3 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... \n", + "4 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... \n", "\n", " normalized \\\n", - "0 {\"access_urls\":{\"expiry_time\":\"2026-02-14T02:5... \n", - "1 {\"access_urls\":{\"expiry_time\":\"2026-02-14T02:5... \n", - "2 {\"access_urls\":{\"expiry_time\":\"2026-02-14T02:5... \n", - "3 {\"access_urls\":{\"expiry_time\":\"2026-02-14T02:5... \n", - "4 {\"access_urls\":{\"expiry_time\":\"2026-02-14T02:5... \n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... \n", + "1 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... \n", + "2 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... \n", + "3 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... \n", + "4 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... \n", "\n", " blurred_verbose \\\n", - "0 {\"status\": \"\", \"content\": \"{\\\"access_urls\\\":{\\... \n", - "1 {\"status\": \"\", \"content\": \"{\\\"access_urls\\\":{\\... \n", - "2 {\"status\": \"\", \"content\": \"{\\\"access_urls\\\":{\\... \n", - "3 {\"status\": \"\", \"content\": \"{\\\"access_urls\\\":{\\... \n", - "4 {\"status\": \"\", \"content\": \"{\\\"access_urls\\\":{\\... \n", + "0 {\"status\": \"\", \"content\": \"gs://bigframes_blob... \n", + "1 {\"status\": \"\", \"content\": \"gs://bigframes_blob... \n", + "2 {\"status\": \"\", \"content\": \"gs://bigframes_blob... \n", + "3 {\"status\": \"\", \"content\": \"gs://bigframes_blob... \n", + "4 {\"status\": \"\", \"content\": \"gs://bigframes_blob... \n", "\n", " blur_resized \n", - "0 {\"access_urls\":{\"expiry_time\":\"2026-02-14T02:5... \n", - "1 {\"access_urls\":{\"expiry_time\":\"2026-02-14T02:5... \n", - "2 {\"access_urls\":{\"expiry_time\":\"2026-02-14T02:5... \n", - "3 {\"access_urls\":{\"expiry_time\":\"2026-02-14T02:5... \n", - "4 {\"access_urls\":{\"expiry_time\":\"2026-02-14T02:5... \n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... \n", + "1 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... \n", + "2 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... \n", + "3 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... \n", + "4 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... \n", "\n", "[5 rows x 10 columns]" ] }, - "execution_count": 25, + "execution_count": 55, "metadata": {}, "output_type": "execute_result" } @@ -1217,7 +1337,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 57, "metadata": { "id": "mRUGfcaFVW-3" }, @@ -1240,7 +1360,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 58, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -1296,28 +1416,28 @@ " \n", " \n", " 0\n", - " Based on the image, the item is a tin of dog paw balm. It's labeled \"K9Guard Dog Paw Balm.\"\n", - " \n", + " The item is a container of K9Guard Dog Paw Balm.\n", + " \n", " \n", " \n", " 1\n", - " The item is a bottle of K9 Guard Dog Hot Spot Spray.\n", - " \n", + " The item is K9 Guard Dog Hot Spot Spray.\n", + " \n", " \n", " \n", " 2\n", - " The item is rabbit food or treats from \"Fluffy Buns\". There are three varieties: \"Timoth Hay Lend Variety Blend\", \"Herbal Greeıs Mix Variety Blend\", and \"Berry & Blossom Treat Blend\".\\n\n", - " \n", + " The items are different flavors of rabbit food or treats. They are labeled as \"Timoth Hay Lend Variety Blend,\" \"Herbal Greeis Mix Variety Blend,\" and \"Berry & Blossom Treat Blend,\" by the brand \"Fluffy Buns.\"\\n\n", + " \n", " \n", " \n", " 3\n", " The item is a cat tree.\\n\n", - " \n", + " \n", " \n", " \n", " 4\n", - " The item is a bag of \"Chirpy Seed\" Deluxe Bird Food.\n", - " \n", + " The item is a bag of bird seed. Specifically, it's \"Chirpy Seed\", described as \"Deluxe Bird Food\" and a \"Premium Blend\".\\n\n", + " \n", " \n", " \n", "\n", @@ -1326,23 +1446,23 @@ ], "text/plain": [ " ml_generate_text_llm_result \\\n", - "0 Based on the image, the item is a tin of dog p... \n", - "1 The item is a bottle of K9 Guard Dog Hot Spot ... \n", - "2 The item is rabbit food or treats from \"Fluffy... \n", + "0 The item is a container of K9Guard Dog Paw Balm. \n", + "1 The item is K9 Guard Dog Hot Spot Spray. \n", + "2 The items are different flavors of rabbit food... \n", "3 The item is a cat tree.\\n \n", - "4 The item is a bag of \"Chirpy Seed\" Deluxe Bird... \n", + "4 The item is a bag of bird seed. Specifically, ... \n", "\n", " image \n", - "0 {\"access_urls\":{\"expiry_time\":\"2026-02-14T03:0... \n", - "1 {\"access_urls\":{\"expiry_time\":\"2026-02-14T03:0... \n", - "2 {\"access_urls\":{\"expiry_time\":\"2026-02-14T03:0... \n", - "3 {\"access_urls\":{\"expiry_time\":\"2026-02-14T03:0... \n", - "4 {\"access_urls\":{\"expiry_time\":\"2026-02-14T03:0... \n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... \n", + "1 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... \n", + "2 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... \n", + "3 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... \n", + "4 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... \n", "\n", "[5 rows x 2 columns]" ] }, - "execution_count": 27, + "execution_count": 58, "metadata": {}, "output_type": "execute_result" } @@ -1355,7 +1475,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 59, "metadata": { "id": "IG3J3HsKhyBY" }, @@ -1373,7 +1493,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 60, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -1429,28 +1549,28 @@ " \n", " \n", " 0\n", - " The item is a tin of K9 Guard Dog Paw Balm.\n", - " \n", + " The item is a tin of dog paw balm. Specifically, it is \"K9 Guard Dog Paw Balm\".\n", + " \n", " \n", " \n", " 1\n", - " The picture has multiple colors, including white, light blue, black, and green. The background is a light gray.\\n\n", - " \n", + " The picture has a white bottle with a blue spray nozzle and accents. The background is gray.\n", + " \n", " \n", " \n", " 2\n", - " Here are the three product names that are visible in the image:\\n\\n1. **Timothy Hay Blend Variety Blend**\\n2. **Herbal Greens Mix Variety Blend**\\n3. **Berry & Blossom Treat Blend**\n", - " \n", + " Here are the product names based on the image:\\n\\n* **Timoth Hay Lend Variety Plend**\\n* **Herbal Greeıs Mix Variety Blend**\\n* **Berry & Blossom Treat Blend**\n", + " \n", " \n", " \n", " 3\n", - " Yes, the item in the image is a cat tree, which is a type of furniture designed for pets, specifically cats.\n", - " \n", + " Yes, the item in the image appears to be a cat tree, which is a piece of furniture specifically designed for cats.\\n\n", + " \n", " \n", " \n", " 4\n", - " The net weight of the product is 15 oz or 257g.\n", - " \n", + " The weight of the product is 15 oz (257g).\n", + " \n", " \n", " \n", "\n", @@ -1459,23 +1579,23 @@ ], "text/plain": [ " ml_generate_text_llm_result \\\n", - "0 The item is a tin of K9 Guard Dog Paw Balm. \n", - "1 The picture has multiple colors, including whi... \n", - "2 Here are the three product names that are visi... \n", - "3 Yes, the item in the image is a cat tree, whic... \n", - "4 The net weight of the product is 15 oz or 257g. \n", + "0 The item is a tin of dog paw balm. Specificall... \n", + "1 The picture has a white bottle with a blue spr... \n", + "2 Here are the product names based on the image:... \n", + "3 Yes, the item in the image appears to be a cat... \n", + "4 The weight of the product is 15 oz (257g). \n", "\n", " image \n", - "0 {\"access_urls\":{\"expiry_time\":\"2026-02-14T03:0... \n", - "1 {\"access_urls\":{\"expiry_time\":\"2026-02-14T03:0... \n", - "2 {\"access_urls\":{\"expiry_time\":\"2026-02-14T03:0... \n", - "3 {\"access_urls\":{\"expiry_time\":\"2026-02-14T03:0... \n", - "4 {\"access_urls\":{\"expiry_time\":\"2026-02-14T03:0... \n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... \n", + "1 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... \n", + "2 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... \n", + "3 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... \n", + "4 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... \n", "\n", "[5 rows x 2 columns]" ] }, - "execution_count": 29, + "execution_count": 60, "metadata": {}, "output_type": "execute_result" } @@ -1487,7 +1607,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 61, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -1548,43 +1668,43 @@ " \n", " \n", " 0\n", - " [ 0.00638822 0.01666385 0.00451817 ... -0.02...\n", + " [ 0.00638615 0.01666428 0.00452227 ... -0.02...\n", " \n", " <NA>\n", " <NA>\n", - " {\"access_urls\":{\"expiry_time\":\"2026-02-14T03:0...\n", + " {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5...\n", " \n", " \n", " 1\n", - " [ 0.00973689 0.02148374 0.00244311 ... 0.00...\n", + " [ 0.00973672 0.02148364 0.00244308 ... 0.00...\n", " \n", " <NA>\n", " <NA>\n", - " {\"access_urls\":{\"expiry_time\":\"2026-02-14T03:0...\n", + " {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5...\n", " \n", " \n", " 2\n", - " [ 0.01197331 0.02138491 0.05967776 ... -0.01...\n", + " [ 0.01197349 0.02138474 0.05967783 ... -0.01...\n", " \n", " <NA>\n", " <NA>\n", - " {\"access_urls\":{\"expiry_time\":\"2026-02-14T03:0...\n", + " {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5...\n", " \n", " \n", " 3\n", - " [-0.02621007 0.02797794 0.04416854 ... -0.01...\n", + " [-0.02621164 0.02797647 0.04416908 ... -0.01...\n", " \n", " <NA>\n", " <NA>\n", - " {\"access_urls\":{\"expiry_time\":\"2026-02-14T03:0...\n", + " {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5...\n", " \n", " \n", " 4\n", - " [ 0.05918613 0.01251376 0.01907326 ... 0.01...\n", + " [ 0.05918628 0.0125137 0.01907336 ... 0.01...\n", " \n", " <NA>\n", " <NA>\n", - " {\"access_urls\":{\"expiry_time\":\"2026-02-14T03:0...\n", + " {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5...\n", " \n", " \n", "\n", @@ -1593,11 +1713,11 @@ ], "text/plain": [ " ml_generate_embedding_result \\\n", - "0 [ 0.00638822 0.01666385 0.00451817 ... -0.02... \n", - "1 [ 0.00973689 0.02148374 0.00244311 ... 0.00... \n", - "2 [ 0.01197331 0.02138491 0.05967776 ... -0.01... \n", - "3 [-0.02621007 0.02797794 0.04416854 ... -0.01... \n", - "4 [ 0.05918613 0.01251376 0.01907326 ... 0.01... \n", + "0 [ 0.00638615 0.01666428 0.00452227 ... -0.02... \n", + "1 [ 0.00973672 0.02148364 0.00244308 ... 0.00... \n", + "2 [ 0.01197349 0.02138474 0.05967783 ... -0.01... \n", + "3 [-0.02621164 0.02797647 0.04416908 ... -0.01... \n", + "4 [ 0.05918628 0.0125137 0.01907336 ... 0.01... \n", "\n", " ml_generate_embedding_status ml_generate_embedding_start_sec \\\n", "0 \n", @@ -1614,16 +1734,16 @@ "4 \n", "\n", " content \n", - "0 {\"access_urls\":{\"expiry_time\":\"2026-02-14T03:0... \n", - "1 {\"access_urls\":{\"expiry_time\":\"2026-02-14T03:0... \n", - "2 {\"access_urls\":{\"expiry_time\":\"2026-02-14T03:0... \n", - "3 {\"access_urls\":{\"expiry_time\":\"2026-02-14T03:0... \n", - "4 {\"access_urls\":{\"expiry_time\":\"2026-02-14T03:0... \n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... \n", + "1 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... \n", + "2 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... \n", + "3 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... \n", + "4 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... \n", "\n", "[5 rows x 5 columns]" ] }, - "execution_count": 30, + "execution_count": 61, "metadata": {}, "output_type": "execute_result" } @@ -1648,7 +1768,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 62, "metadata": {}, "outputs": [ { @@ -1738,7 +1858,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 63, "metadata": {}, "outputs": [ { @@ -1787,7 +1907,7 @@ "[1 rows x 2 columns]" ] }, - "execution_count": 32, + "execution_count": 63, "metadata": {}, "output_type": "execute_result" } @@ -1809,7 +1929,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 64, "metadata": {}, "outputs": [ { @@ -1834,7 +1954,7 @@ "Name: chunked, dtype: string" ] }, - "execution_count": 33, + "execution_count": 64, "metadata": {}, "output_type": "execute_result" } @@ -1854,7 +1974,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 65, "metadata": {}, "outputs": [], "source": [ @@ -1864,7 +1984,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -1887,7 +2007,7 @@ "Name: transcribed_content, dtype: string" ] }, - "execution_count": 35, + "execution_count": 66, "metadata": {}, "output_type": "execute_result" } @@ -1896,7 +2016,7 @@ "# The audio_transcribe function is a convenience wrapper around bigframes.bigquery.ai.generate.\n", "# Here's how to perform the same operation directly:\n", "\n", - "audio_series = df['audio']\n", + "audio_series = df[\"audio\"]\n", "prompt_text = (\n", " \"**Task:** Transcribe the provided audio. **Instructions:** - Your response \"\n", " \"must contain only the verbatim transcription of the audio. - Do not include \"\n", @@ -1921,7 +2041,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 67, "metadata": {}, "outputs": [ { @@ -1934,7 +2054,7 @@ "Name: transcription_results, dtype: struct[pyarrow]" ] }, - "execution_count": 36, + "execution_count": 67, "metadata": {}, "output_type": "execute_result" } @@ -1971,7 +2091,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 68, "metadata": {}, "outputs": [ { @@ -2022,7 +2142,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 69, "metadata": {}, "outputs": [ { @@ -2044,7 +2164,7 @@ "Name: blob_col, dtype: extension>[pyarrow]" ] }, - "execution_count": 38, + "execution_count": 69, "metadata": {}, "output_type": "execute_result" } @@ -2076,7 +2196,7 @@ "provenance": [] }, "kernelspec": { - "display_name": "venv", + "display_name": "venv (3.13.0)", "language": "python", "name": "python3" }, From 633c82968ecdfb5c4ab2fe88a1ba39923500f55f Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Fri, 20 Feb 2026 03:36:26 +0000 Subject: [PATCH 5/8] fix: restore retry logic in multimodal udf functions --- .../multimodal/multimodal_dataframe.ipynb | 1029 ++++------------- 1 file changed, 233 insertions(+), 796 deletions(-) diff --git a/notebooks/multimodal/multimodal_dataframe.ipynb b/notebooks/multimodal/multimodal_dataframe.ipynb index aec56f05fb..1d63290b5d 100644 --- a/notebooks/multimodal/multimodal_dataframe.ipynb +++ b/notebooks/multimodal/multimodal_dataframe.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 40, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -83,7 +83,7 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -92,7 +92,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 3, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -130,7 +130,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -171,37 +171,7 @@ " return bbq.json_value(get_metadata(series), \"$.size\").astype(\"Int64\")\n", "\n", "def get_updated(series):\n", - " return bpd.to_datetime(bbq.json_value(get_metadata(series), \"$.updated\").astype(\"Int64\"), unit=\"us\", utc=True)\n", - "\n", - "def display_blob(series, n=3):\n", - " import IPython.display as ipy_display\n", - " import pandas as pd\n", - " import requests\n", - " \n", - " # Retrieve access URLs and content types\n", - " runtime_json = bbq.to_json_string(bbq.obj.get_access_url(series, mode=\"R\"))\n", - " read_url = bbq.json_value(runtime_json, \"$.access_urls.read_url\")\n", - " content_type = get_content_type(series)\n", - " \n", - " # Pull to pandas to display\n", - " pdf = bpd.DataFrame({\"read_url\": read_url, \"content_type\": content_type}).head(n).to_pandas()\n", - " \n", - " width = bigframes.options.display.blob_display_width\n", - " height = bigframes.options.display.blob_display_height\n", - " \n", - " for _, row in pdf.iterrows():\n", - " if pd.isna(row[\"read_url\"]):\n", - " ipy_display.display(\"\")\n", - " elif pd.isna(row[\"content_type\"]):\n", - " ipy_display.display(requests.get(row[\"read_url\"]).content)\n", - " elif row[\"content_type\"].casefold().startswith(\"image\"):\n", - " ipy_display.display(ipy_display.Image(url=row[\"read_url\"], width=width, height=height))\n", - " elif row[\"content_type\"].casefold().startswith(\"audio\"):\n", - " ipy_display.display(ipy_display.Audio(requests.get(row[\"read_url\"]).content))\n", - " elif row[\"content_type\"].casefold().startswith(\"video\"):\n", - " ipy_display.display(ipy_display.Video(row[\"read_url\"], width=width, height=height))\n", - " else:\n", - " ipy_display.display(requests.get(row[\"read_url\"]).content)" + " return bpd.to_datetime(bbq.json_value(get_metadata(series), \"$.updated\").astype(\"Int64\"), unit=\"us\", utc=True)" ] }, { @@ -216,7 +186,7 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 5, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -229,15 +199,12 @@ "# Create blob columns from wildcard path.\n", "df_image = bpd.from_glob_path(\n", " \"gs://cloud-samples-data/bigquery/tutorials/cymbal-pets/images/*\", name=\"image\"\n", - ")\n", - "\n", - "# From an existing object table\n", - "# df = bpd.read_gbq_object_table(\"\", name=\"blob_col\")" + ")" ] }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 6, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -286,23 +253,23 @@ " \n", " \n", " 0\n", - " \n", + " \n", " \n", " \n", " 1\n", - " \n", + " \n", " \n", " \n", " 2\n", - " \n", + " \n", " \n", " \n", " 3\n", - " \n", + " \n", " \n", " \n", " 4\n", - " \n", + " \n", " \n", " \n", "\n", @@ -311,16 +278,16 @@ ], "text/plain": [ " image\n", - "0 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5...\n", - "1 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5...\n", - "2 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5...\n", - "3 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5...\n", - "4 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5...\n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-02-20T08:5...\n", + "1 {\"access_urls\":{\"expiry_time\":\"2026-02-20T08:5...\n", + "2 {\"access_urls\":{\"expiry_time\":\"2026-02-20T08:5...\n", + "3 {\"access_urls\":{\"expiry_time\":\"2026-02-20T08:5...\n", + "4 {\"access_urls\":{\"expiry_time\":\"2026-02-20T08:5...\n", "\n", "[5 rows x 1 columns]" ] }, - "execution_count": 45, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -351,7 +318,7 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 7, "metadata": { "id": "YYYVn7NDH0Me" }, @@ -399,7 +366,7 @@ " \n", " \n", " 0\n", - " \n", + " \n", " alice\n", " image/png\n", " 1591240\n", @@ -407,7 +374,7 @@ " \n", " \n", " 1\n", - " \n", + " \n", " bob\n", " image/png\n", " 1182951\n", @@ -415,7 +382,7 @@ " \n", " \n", " 2\n", - " \n", + " \n", " bob\n", " image/png\n", " 1520884\n", @@ -423,7 +390,7 @@ " \n", " \n", " 3\n", - " \n", + " \n", " alice\n", " image/png\n", " 1235401\n", @@ -431,7 +398,7 @@ " \n", " \n", " 4\n", - " \n", + " \n", " bob\n", " image/png\n", " 1591923\n", @@ -444,11 +411,11 @@ ], "text/plain": [ " image author content_type \\\n", - "0 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... alice image/png \n", - "1 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... bob image/png \n", - "2 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... bob image/png \n", - "3 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... alice image/png \n", - "4 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... bob image/png \n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-02-20T08:5... alice image/png \n", + "1 {\"access_urls\":{\"expiry_time\":\"2026-02-20T08:5... bob image/png \n", + "2 {\"access_urls\":{\"expiry_time\":\"2026-02-20T08:5... bob image/png \n", + "3 {\"access_urls\":{\"expiry_time\":\"2026-02-20T08:5... alice image/png \n", + "4 {\"access_urls\":{\"expiry_time\":\"2026-02-20T08:5... bob image/png \n", "\n", " size updated \n", "0 1591240 2025-03-20 17:45:04+00:00 \n", @@ -460,7 +427,7 @@ "[5 rows x 5 columns]" ] }, - "execution_count": 46, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -475,57 +442,6 @@ "df_image" ] }, - { - "cell_type": "markdown", - "metadata": { - "id": "NUd4Kog_QLRS" - }, - "source": [ - "Then you can filter the rows based on the structured data. And for different content types, you can display them respectively or together." - ] - }, - { - "cell_type": "code", - "execution_count": 47, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 75 - }, - "id": "UGuAk9PNDRF3", - "outputId": "73feb33d-4a05-48fb-96e5-3c48c2a456f3" - }, - "outputs": [ - { - "data": { - "text/html": [ - "" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# filter images and display, you can also display audio and video types\n", - "display_blob(df_image[df_image[\"author\"] == \"alice\"][\"image\"])" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -542,7 +458,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -558,257 +474,16 @@ "text": [ "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/pandas/__init__.py:151: PreviewWarning: udf is in preview.\n", " return global_session.with_default_session(\n", - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dataframe.py:4735: FunctionAxisOnePreviewWarning: DataFrame.apply with parameter axis=1 scenario is in preview.\n", - " warnings.warn(msg, category=bfe.FunctionAxisOnePreviewWarning)\n", - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dataframe.py:4735: FunctionAxisOnePreviewWarning: DataFrame.apply with parameter axis=1 scenario is in preview.\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dataframe.py:4655: FunctionAxisOnePreviewWarning: DataFrame.apply with parameter axis=1 scenario is in preview.\n", " warnings.warn(msg, category=bfe.FunctionAxisOnePreviewWarning)\n", - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dataframe.py:4735: FunctionAxisOnePreviewWarning: DataFrame.apply with parameter axis=1 scenario is in preview.\n", - " warnings.warn(msg, category=bfe.FunctionAxisOnePreviewWarning)\n" - ] - } - ], - "source": [ - "# Construct the canonical connection ID\n", - "FULL_CONNECTION_ID = f\"{PROJECT}.{LOCATION}.bigframes-default-connection\"\n", - "\n", - "@bpd.udf(\n", - " input_types=[str, str, str, int, int, bool],\n", - " output_type=str,\n", - " dataset=DATASET_ID,\n", - " name=\"image_blur\",\n", - " bigquery_connection=FULL_CONNECTION_ID,\n", - " packages=[\"opencv-python\", \"numpy\", \"requests\"],\n", - ")\n", - "def image_blur(\n", - " src_rt: str, dst_rt: str, ext: str, kx: int, ky: int, verbose: bool\n", - ") -> str:\n", - " import json\n", - " import cv2 as cv\n", - " import numpy as np\n", - " import requests\n", - " import base64\n", - " from requests import adapters\n", - "\n", - " try: \n", - " session = requests.Session()\n", - " session.mount(\"https://\", adapters.HTTPAdapter(max_retries=3))\n", - " src_obj = json.loads(src_rt)\n", - " src_url = src_obj[\"access_urls\"][\"read_url\"]\n", - " \n", - " response = session.get(src_url, timeout=30)\n", - " response.raise_for_status()\n", - " \n", - " img = cv.imdecode(np.frombuffer(response.content, np.uint8), cv.IMREAD_UNCHANGED)\n", - " if img is None:\n", - " raise ValueError(\"cv.imdecode failed\")\n", - " \n", - " kx, ky = int(kx), int(ky)\n", - " img_blurred = cv.blur(img, ksize=(kx, ky))\n", - " \n", - " ext = ext or \".jpeg\"\n", - " success, encoded = cv.imencode(ext, img_blurred)\n", - " if not success:\n", - " raise ValueError(f\"cv.imencode failed for extension {ext}\")\n", - " \n", - " # Handle two output modes\n", - " if dst_rt: # GCS/Series output mode\n", - " dst_obj = json.loads(dst_rt)\n", - " dst_url = dst_obj[\"access_urls\"][\"write_url\"]\n", - " \n", - " ext_ct = ext.replace(\".\", \"\")\n", - " ext_mappings = {\"jpg\": \"jpeg\", \"tif\": \"tiff\"}\n", - " ext_ct = ext_mappings.get(ext_ct, ext_ct)\n", - " content_type = \"image/\" + ext_ct\n", - " \n", - " session.put(dst_url, data=encoded.tobytes(), headers={\"Content-Type\": content_type}, timeout=30).raise_for_status()\n", - " \n", - " # Extract URI for OBJ.MAKE_REF compatibility \n", - " uri = dst_obj[\"objectref\"][\"uri\"]\n", - " \n", - " if verbose:\n", - " return json.dumps({\"status\": \"\", \"content\": uri})\n", - " else:\n", - " return uri # ReturN simple URI string, not full JSON\n", - " \n", - " else: # BigQuery bytes output mode \n", - " image_bytes = encoded.tobytes()\n", - " if verbose:\n", - " result = {\n", - " \"status\": \"\",\n", - " \"content\": base64.b64encode(image_bytes).decode(),\n", - " \"content_type\": f\"image/{ext_ct}\" if 'ext_ct' in locals() else \"image/jpeg\"\n", - " }\n", - " return json.dumps(result)\n", - " else: \n", - " return base64.b64encode(image_bytes).decode()\n", - " \n", - " except Exception as e: \n", - " if verbose: \n", - " return json.dumps({\"status\": str(e), \"content\": \"\"}) \n", - " raise e\n", - "\n", - "@bpd.udf(\n", - " input_types=[str, str, str, int, int, float, float, bool],\n", - " output_type=str,\n", - " dataset=DATASET_ID,\n", - " name=\"image_resize\",\n", - " bigquery_connection=FULL_CONNECTION_ID,\n", - " packages=[\"opencv-python\", \"numpy\", \"requests\"],\n", - ")\n", - "def image_resize(\n", - " src_rt: str, dst_rt: str, ext: str, dx: int, dy: int,\n", - " fx: float, fy: float, verbose: bool\n", - ") -> str:\n", - " import json\n", - " import cv2 as cv\n", - " import numpy as np\n", - " import requests\n", - " from requests import adapters\n", - " try:\n", - " session = requests.Session()\n", - " session.mount(\"https://\", adapters.HTTPAdapter(max_retries=3))\n", - " src_obj, dst_obj = json.loads(src_rt), json.loads(dst_rt)\n", - " src_url, dst_url = src_obj[\"access_urls\"][\"read_url\"], dst_obj[\"access_urls\"][\"write_url\"]\n", - " response = session.get(src_url, timeout=30)\n", - " response.raise_for_status()\n", - " img = cv.imdecode(np.frombuffer(response.content, np.uint8), cv.IMREAD_UNCHANGED)\n", - " if img is None:\n", - " raise ValueError(\"cv.imdecode failed\")\n", - " img_resized = cv.resize(img, dsize=(dx, dy), fx=fx, fy=fy)\n", - " ext = ext or \".jpeg\"\n", - " success, encoded = cv.imencode(ext, img_resized)\n", - " if not success:\n", - " raise ValueError(f\"cv.imencode failed for extension {ext}\")\n", - " ext_ct = ext.replace(\".\", \"\").lower()\n", - " ext_mappings = {\"jpg\": \"jpeg\", \"tif\": \"tiff\"}\n", - " ext_ct = ext_mappings.get(ext_ct, ext_ct)\n", - " content_type = \"image/\" + ext_ct\n", - " session.put(\n", - " dst_url, data=encoded.tobytes(),\n", - " headers={\"Content-Type\": content_type},\n", - " timeout=30\n", - " ).raise_for_status()\n", - " return json.dumps(\n", - " {\"status\": \"\", \"content\": dst_rt}\n", - " ) if verbose else dst_obj[\"objectref\"][\"uri\"]\n", - " except Exception as e:\n", - " if verbose:\n", - " return json.dumps({\"status\": str(e), \"content\": \"\"})\n", - " raise e\n", - "\n", - "@bpd.udf(\n", - " input_types=[str, str, str, float, float, str, bool],\n", - " output_type=str,\n", - " dataset=DATASET_ID,\n", - " name=\"image_normalize\",\n", - " bigquery_connection=FULL_CONNECTION_ID,\n", - " packages=[\"opencv-python\", \"numpy\", \"requests\"],\n", - ")\n", - "def image_normalize(\n", - " src_rt: str, dst_rt: str, ext: str, alpha: float,\n", - " beta: float, norm_type: str, verbose: bool\n", - ") -> str:\n", - " import json\n", - " import cv2 as cv\n", - " import numpy as np\n", - " import requests\n", - " from requests import adapters\n", - " try:\n", - " session = requests.Session()\n", - " session.mount(\"https://\", adapters.HTTPAdapter(max_retries=3))\n", - " src_obj, dst_obj = json.loads(src_rt), json.loads(dst_rt)\n", - " src_url, dst_url = src_obj[\"access_urls\"][\"read_url\"], dst_obj[\"access_urls\"][\"write_url\"]\n", - " response = session.get(src_url, timeout=30)\n", - " response.raise_for_status()\n", - " img = cv.imdecode(np.frombuffer(response.content, np.uint8), cv.IMREAD_UNCHANGED)\n", - " if img is None:\n", - " raise ValueError(\"cv.imdecode failed\")\n", - " norm_map = {\"inf\": cv.NORM_INF, \"l1\": cv.NORM_L1, \"l2\": cv.NORM_L2, \"minmax\": cv.NORM_MINMAX}\n", - " img_normalized = cv.normalize(img, None, alpha=alpha, beta=beta, norm_type=norm_map[norm_type])\n", - " ext = ext or \".jpeg\"\n", - " success, encoded = cv.imencode(ext, img_normalized)\n", - " if not success:\n", - " raise ValueError(f\"cv.imencode failed for extension {ext}\")\n", - " ext_ct = ext.replace(\".\", \"\").lower()\n", - " ext_mappings = {\"jpg\": \"jpeg\", \"tif\": \"tiff\"}\n", - " ext_ct = ext_mappings.get(ext_ct, ext_ct)\n", - " content_type = \"image/\" + ext_ct\n", - " session.put(\n", - " dst_url, data=encoded.tobytes(),\n", - " headers={\"Content-Type\": content_type}, timeout=30\n", - " ).raise_for_status()\n", - " return json.dumps(\n", - " {\"status\": \"\", \"content\": dst_rt}\n", - " ) if verbose else dst_obj[\"objectref\"][\"uri\"]\n", - " except Exception as e:\n", - " if verbose:\n", - " return json.dumps({\"status\": str(e), \"content\": \"\"})\n", - " raise e\n", - "\n", - "def apply_transformation(series, dst_folder, udf, *args, verbose=False):\n", - " import os\n", - " dst_folder = os.path.join(dst_folder, \"\")\n", - " # Fetch metadata to get the URI\n", - " metadata = bbq.obj.fetch_metadata(series)\n", - " current_uri = metadata.struct.field(\"uri\")\n", - " dst_uri = current_uri.str.replace(r\"^.*\\/(.*)$\", rf\"{dst_folder}\\1\", regex=True)\n", - " dst_blob = dst_uri.str.to_blob(connection=FULL_CONNECTION_ID)\n", - " df_transform = bpd.DataFrame({\n", - " \"src_rt\": get_runtime_json_str(series, mode=\"R\"),\n", - " \"dst_rt\": get_runtime_json_str(dst_blob, mode=\"RW\"),\n", - " \"ext\": dst_uri.str.extract(r\"(\\.[0-9a-zA-Z]+$)\")[0]\n", - " })\n", - " res = df_transform[[\"src_rt\", \"dst_rt\", \"ext\"]].apply(\n", - " udf, axis=1, args=(*args, verbose)\n", - " )\n", - " return res if verbose else res.str.to_blob(connection=FULL_CONNECTION_ID)\n", - "\n", - "# Apply transformations\n", - "df_image[\"blurred\"] = apply_transformation(\n", - " df_image[\"image\"], f\"gs://{OUTPUT_BUCKET}/image_blur_transformed_v2/\",\n", - " image_blur, 20, 20\n", - ")\n", - "df_image[\"resized\"] = apply_transformation(\n", - " df_image[\"image\"], f\"gs://{OUTPUT_BUCKET}/image_resize_transformed_v2/\",\n", - " image_resize, 300, 200, 0.0, 0.0\n", - ")\n", - "df_image[\"normalized\"] = apply_transformation(\n", - " df_image[\"image\"], f\"gs://{OUTPUT_BUCKET}/image_normalize_transformed_v2/\",\n", - " image_normalize, 50.0, 150.0, \"minmax\"\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "b6RRZb3qPi_T" - }, - "source": [ - "### You can also chain functions together" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "4YJCdmLtR-qu" - }, - "source": [ - "Now you can put more information into the table to describe the files. Such as author info from inputs, or other metadata from the gcs object itself." - ] - }, - { - "cell_type": "code", - "execution_count": 49, - "metadata": { - "id": "YYYVn7NDH0Me" - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dataframe.py:4735: FunctionAxisOnePreviewWarning: DataFrame.apply with parameter axis=1 scenario is in preview.\n", - " warnings.warn(msg, category=bfe.FunctionAxisOnePreviewWarning)\n" + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:987: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", + " return prop(*args, **kwargs)\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", + " return prop(*args, **kwargs)\n" ] }, { @@ -832,187 +507,156 @@ " \n", " \n", " \n", - " blurred_verbose\n", + " image\n", + " blurred\n", " \n", " \n", " \n", " \n", " 0\n", - " {\"status\": \"\", \"content\": \"gs://bigframes_blob...\n", + " \n", + " \n", " \n", " \n", " 1\n", - " {\"status\": \"\", \"content\": \"gs://bigframes_blob...\n", + " \n", + " \n", " \n", " \n", " 2\n", - " {\"status\": \"\", \"content\": \"gs://bigframes_blob...\n", + " \n", + " \n", " \n", " \n", " 3\n", - " {\"status\": \"\", \"content\": \"gs://bigframes_blob...\n", + " \n", + " \n", " \n", " \n", " 4\n", - " {\"status\": \"\", \"content\": \"gs://bigframes_blob...\n", + " \n", + " \n", " \n", " \n", "\n", - "

5 rows × 1 columns

\n", - "[5 rows x 1 columns in total]" + "

5 rows × 2 columns

\n", + "[5 rows x 2 columns in total]" ], "text/plain": [ - " blurred_verbose\n", - "0 {\"status\": \"\", \"content\": \"gs://bigframes_blob...\n", - "1 {\"status\": \"\", \"content\": \"gs://bigframes_blob...\n", - "2 {\"status\": \"\", \"content\": \"gs://bigframes_blob...\n", - "3 {\"status\": \"\", \"content\": \"gs://bigframes_blob...\n", - "4 {\"status\": \"\", \"content\": \"gs://bigframes_blob...\n", + " image \\\n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-02-20T08:5... \n", + "1 {\"access_urls\":{\"expiry_time\":\"2026-02-20T08:5... \n", + "2 {\"access_urls\":{\"expiry_time\":\"2026-02-20T08:5... \n", + "3 {\"access_urls\":{\"expiry_time\":\"2026-02-20T08:5... \n", + "4 {\"access_urls\":{\"expiry_time\":\"2026-02-20T08:5... \n", "\n", - "[5 rows x 1 columns]" + " blurred \n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-02-20T08:5... \n", + "1 {\"access_urls\":{\"expiry_time\":\"2026-02-20T08:5... \n", + "2 {\"access_urls\":{\"expiry_time\":\"2026-02-20T08:5... \n", + "3 {\"access_urls\":{\"expiry_time\":\"2026-02-20T08:5... \n", + "4 {\"access_urls\":{\"expiry_time\":\"2026-02-20T08:5... \n", + "\n", + "[5 rows x 2 columns]" ] }, - "execution_count": 49, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "df_image[\"blurred_verbose\"] = apply_transformation(\n", - " df_image[\"image\"],\n", - " f\"gs://{OUTPUT_BUCKET}/image_blur_transformed_verbose_v2/\",\n", - " image_blur, 20, 20, verbose=True\n", + "# Construct the canonical connection ID\n", + "FULL_CONNECTION_ID = f\"{PROJECT}.{LOCATION}.bigframes-default-connection\"\n", + "\n", + "@bpd.udf(\n", + " input_types=[str, str, str, int, int, bool],\n", + " output_type=str,\n", + " dataset=DATASET_ID,\n", + " name=\"image_blur\",\n", + " bigquery_connection=FULL_CONNECTION_ID,\n", + " packages=[\"opencv-python\", \"numpy\", \"requests\"],\n", ")\n", - "df_image[[\"blurred_verbose\"]]" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "NUd4Kog_QLRS" - }, - "source": [ - "Then you can filter the rows based on the structured data. And for different content types, you can display them respectively or together." - ] - }, - { - "cell_type": "code", - "execution_count": 50, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 75 - }, - "id": "UGuAk9PNDRF3", - "outputId": "73feb33d-4a05-48fb-96e5-3c48c2a456f3" - }, - "outputs": [ - { - "data": { - "text/html": [ - "" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# filter images and display, you can also display audio and video types\n", - "display_blob(df_image[df_image[\"author\"] == \"alice\"][\"image\"])" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "1IJuakwJTZey" - }, - "source": [ - "### 3. Conduct image transformations\n", - "BigFrames Multimodal DataFrame provides image(and other) transformation functions. Such as image_blur, image_resize and image_normalize. The output can be saved to GCS folders or to BQ as bytes." - ] - }, - { - "cell_type": "code", - "execution_count": 51, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "VWsl5BBPJ6N7", - "outputId": "45d2356e-322b-4982-cfa7-42d034dc4344" - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dataframe.py:4735: FunctionAxisOnePreviewWarning: DataFrame.apply with parameter axis=1 scenario is in preview.\n", - " warnings.warn(msg, category=bfe.FunctionAxisOnePreviewWarning)\n", - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dataframe.py:4735: FunctionAxisOnePreviewWarning: DataFrame.apply with parameter axis=1 scenario is in preview.\n", - " warnings.warn(msg, category=bfe.FunctionAxisOnePreviewWarning)\n", - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dataframe.py:4735: FunctionAxisOnePreviewWarning: DataFrame.apply with parameter axis=1 scenario is in preview.\n", - " warnings.warn(msg, category=bfe.FunctionAxisOnePreviewWarning)\n" - ] - } - ], - "source": [ + "def image_blur(\n", + " src_rt: str, dst_rt: str, ext: str, kx: int, ky: int, verbose: bool\n", + ") -> str:\n", + " import json\n", + " import cv2 as cv\n", + " import numpy as np\n", + " import requests\n", + " import base64\n", + " from requests import adapters\n", + "\n", + " session = requests.Session()\n", + " session.mount(\"https://\", adapters.HTTPAdapter(max_retries=3))\n", + " src_obj = json.loads(src_rt)\n", + " src_url = src_obj[\"access_urls\"][\"read_url\"]\n", + " \n", + " response = session.get(src_url, timeout=30)\n", + " response.raise_for_status()\n", + " \n", + " img = cv.imdecode(np.frombuffer(response.content, np.uint8), cv.IMREAD_UNCHANGED)\n", + " if img is None:\n", + " raise ValueError(\"cv.imdecode failed\")\n", + " \n", + " kx, ky = int(kx), int(ky)\n", + " img_blurred = cv.blur(img, ksize=(kx, ky))\n", + " \n", + " ext = ext or \".jpeg\"\n", + " success, encoded = cv.imencode(ext, img_blurred)\n", + " if not success:\n", + " raise ValueError(f\"cv.imencode failed for extension {ext}\")\n", + " \n", + " # Handle two output modes\n", + " if dst_rt: # GCS/Series output mode\n", + " dst_obj = json.loads(dst_rt)\n", + " dst_url = dst_obj[\"access_urls\"][\"write_url\"]\n", + " \n", + " ext_ct = ext.replace(\".\", \"\")\n", + " ext_mappings = {\"jpg\": \"jpeg\", \"tif\": \"tiff\"}\n", + " ext_ct = ext_mappings.get(ext_ct, ext_ct)\n", + " content_type = \"image/\" + ext_ct\n", + " \n", + " session.put(dst_url, data=encoded.tobytes(), headers={\"Content-Type\": content_type}, timeout=30).raise_for_status()\n", + " \n", + " uri = dst_obj[\"objectref\"][\"uri\"]\n", + " return json.dumps({\"status\": \"\", \"content\": uri}) if verbose else uri\n", + " \n", + " else: # BigQuery bytes output mode \n", + " image_bytes = encoded.tobytes()\n", + " if verbose:\n", + " return json.dumps({\n", + " \"status\": \"\",\n", + " \"content\": base64.b64encode(image_bytes).decode(),\n", + " \"content_type\": f\"image/{ext_ct}\" if 'ext_ct' in locals() else \"image/jpeg\"\n", + " })\n", + " else: \n", + " return base64.b64encode(image_bytes).decode()\n", + "\n", + "def apply_transformation(series, dst_folder, udf, *args, verbose=False):\n", + " import os\n", + " dst_folder = os.path.join(dst_folder, \"\")\n", + " # Fetch metadata to get the URI\n", + " metadata = bbq.obj.fetch_metadata(series)\n", + " current_uri = metadata.struct.field(\"uri\")\n", + " dst_uri = current_uri.str.replace(r\"^.*\\/(.*)$\", rf\"{dst_folder}\\1\", regex=True)\n", + " dst_blob = dst_uri.str.to_blob(connection=FULL_CONNECTION_ID)\n", + " df_transform = bpd.DataFrame({\n", + " \"src_rt\": get_runtime_json_str(series, mode=\"R\"),\n", + " \"dst_rt\": get_runtime_json_str(dst_blob, mode=\"RW\"),\n", + " \"ext\": dst_uri.str.extract(r\"(\\.[0-9a-zA-Z]+$)\")[0]\n", + " })\n", + " res = df_transform[[\"src_rt\", \"dst_rt\", \"ext\"]].apply(\n", + " udf, axis=1, args=(*args, verbose)\n", + " )\n", + " return res if verbose else res.str.to_blob(connection=FULL_CONNECTION_ID)\n", + "\n", + "# Apply transformations\n", "df_image[\"blurred\"] = apply_transformation(\n", " df_image[\"image\"], f\"gs://{OUTPUT_BUCKET}/image_blur_transformed/\",\n", " image_blur, 20, 20\n", ")\n", - "df_image[\"resized\"] = apply_transformation(\n", - " df_image[\"image\"], f\"gs://{OUTPUT_BUCKET}/image_resize_transformed/\",\n", - " image_resize, 300, 200, 0.0, 0.0\n", - ")\n", - "df_image[\"normalized\"] = apply_transformation(\n", - " df_image[\"image\"], f\"gs://{OUTPUT_BUCKET}/image_normalize_transformed/\",\n", - " image_normalize, 50.0, 150.0, \"minmax\"\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 52, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "rWCAGC8w64vU", - "outputId": "d7d456f0-8b56-492c-fe1b-967e9664d813" - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dataframe.py:4735: FunctionAxisOnePreviewWarning: DataFrame.apply with parameter axis=1 scenario is in preview.\n", - " warnings.warn(msg, category=bfe.FunctionAxisOnePreviewWarning)\n" - ] - } - ], - "source": [ - "# You can also chain functions together\n", - "df_image[\"blur_resized\"] = apply_transformation(\n", - " df_image[\"blurred\"],\n", - " f\"gs://{OUTPUT_BUCKET}/image_blur_resize_transformed_v2/\",\n", - " image_resize, 300, 200, 0.0, 0.0\n", - ")" + "df_image[[\"image\", \"blurred\"]]" ] }, { @@ -1033,14 +677,14 @@ }, { "cell_type": "code", - "execution_count": 53, + "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dataframe.py:4735: FunctionAxisOnePreviewWarning: DataFrame.apply with parameter axis=1 scenario is in preview.\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dataframe.py:4655: FunctionAxisOnePreviewWarning: DataFrame.apply with parameter axis=1 scenario is in preview.\n", " warnings.warn(msg, category=bfe.FunctionAxisOnePreviewWarning)\n" ] }, @@ -1105,7 +749,7 @@ "[5 rows x 1 columns]" ] }, - "execution_count": 53, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -1119,213 +763,6 @@ "df_image[[\"blurred_verbose\"]]" ] }, - { - "cell_type": "code", - "execution_count": 54, - "metadata": {}, - "outputs": [], - "source": [ - "df_image = df_image._cached()" - ] - }, - { - "cell_type": "code", - "execution_count": 55, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 605 - }, - "id": "6NGK6GYSU44B", - "outputId": "859101c1-2ee4-4f9a-e250-e8947127420a" - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:987: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", - " return prop(*args, **kwargs)\n", - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", - " return prop(*args, **kwargs)\n", - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", - " return prop(*args, **kwargs)\n", - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", - " return prop(*args, **kwargs)\n", - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", - " return prop(*args, **kwargs)\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
imageauthorcontent_typesizeupdatedblurredresizednormalizedblurred_verboseblur_resized
0aliceimage/png15912402025-03-20 17:45:04+00:00{\"status\": \"\", \"content\": \"gs://bigframes_blob_test/image_blur_transformed_verbose_v2/k9-guard-dog-paw-balm.png\"}
1bobimage/png11829512025-03-20 17:45:02+00:00{\"status\": \"\", \"content\": \"gs://bigframes_blob_test/image_blur_transformed_verbose_v2/k9-guard-dog-hot-spot-spray.png\"}
2bobimage/png15208842025-03-20 17:44:55+00:00{\"status\": \"\", \"content\": \"gs://bigframes_blob_test/image_blur_transformed_verbose_v2/fluffy-buns-chinchilla-food-variety-pack.png\"}
3aliceimage/png12354012025-03-20 17:45:19+00:00{\"status\": \"\", \"content\": \"gs://bigframes_blob_test/image_blur_transformed_verbose_v2/purrfect-perch-cat-scratcher.png\"}
4bobimage/png15919232025-03-20 17:44:47+00:00{\"status\": \"\", \"content\": \"gs://bigframes_blob_test/image_blur_transformed_verbose_v2/chirpy-seed-deluxe-bird-food.png\"}
\n", - "

5 rows × 10 columns

\n", - "
[5 rows x 10 columns in total]" - ], - "text/plain": [ - " image author content_type \\\n", - "0 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... alice image/png \n", - "1 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... bob image/png \n", - "2 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... bob image/png \n", - "3 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... alice image/png \n", - "4 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... bob image/png \n", - "\n", - " size updated \\\n", - "0 1591240 2025-03-20 17:45:04+00:00 \n", - "1 1182951 2025-03-20 17:45:02+00:00 \n", - "2 1520884 2025-03-20 17:44:55+00:00 \n", - "3 1235401 2025-03-20 17:45:19+00:00 \n", - "4 1591923 2025-03-20 17:44:47+00:00 \n", - "\n", - " blurred \\\n", - "0 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... \n", - "1 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... \n", - "2 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... \n", - "3 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... \n", - "4 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... \n", - "\n", - " resized \\\n", - "0 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... \n", - "1 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... \n", - "2 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... \n", - "3 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... \n", - "4 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... \n", - "\n", - " normalized \\\n", - "0 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... \n", - "1 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... \n", - "2 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... \n", - "3 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... \n", - "4 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... \n", - "\n", - " blurred_verbose \\\n", - "0 {\"status\": \"\", \"content\": \"gs://bigframes_blob... \n", - "1 {\"status\": \"\", \"content\": \"gs://bigframes_blob... \n", - "2 {\"status\": \"\", \"content\": \"gs://bigframes_blob... \n", - "3 {\"status\": \"\", \"content\": \"gs://bigframes_blob... \n", - "4 {\"status\": \"\", \"content\": \"gs://bigframes_blob... \n", - "\n", - " blur_resized \n", - "0 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... \n", - "1 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... \n", - "2 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... \n", - "3 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... \n", - "4 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... \n", - "\n", - "[5 rows x 10 columns]" - ] - }, - "execution_count": 55, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df_image" - ] - }, { "cell_type": "markdown", "metadata": { @@ -1337,7 +774,7 @@ }, { "cell_type": "code", - "execution_count": 57, + "execution_count": 10, "metadata": { "id": "mRUGfcaFVW-3" }, @@ -1360,7 +797,7 @@ }, { "cell_type": "code", - "execution_count": 58, + "execution_count": 11, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -1416,28 +853,28 @@ " \n", " \n", " 0\n", - " The item is a container of K9Guard Dog Paw Balm.\n", - " \n", + " The item is a tin of K9Guard Dog Paw Balm.\n", + " \n", " \n", " \n", " 1\n", " The item is K9 Guard Dog Hot Spot Spray.\n", - " \n", + " \n", " \n", " \n", " 2\n", - " The items are different flavors of rabbit food or treats. They are labeled as \"Timoth Hay Lend Variety Blend,\" \"Herbal Greeis Mix Variety Blend,\" and \"Berry & Blossom Treat Blend,\" by the brand \"Fluffy Buns.\"\\n\n", - " \n", + " The items are pet food or treats, specifically for rabbits or other small pets. The three varieties are: Timothy Hay Blend, Herbal Greens Mix, and Berry & Blossom Treat Blend. All three are by \"Fluffy Buns\".\n", + " \n", " \n", " \n", " 3\n", - " The item is a cat tree.\\n\n", - " \n", + " The item is a cat tree.\n", + " \n", " \n", " \n", " 4\n", - " The item is a bag of bird seed. Specifically, it's \"Chirpy Seed\", described as \"Deluxe Bird Food\" and a \"Premium Blend\".\\n\n", - " \n", + " The item is a bag of bird seed. Specifically, it is a bag of \"Chirpy Seed\" deluxe bird food.\n", + " \n", " \n", " \n", "\n", @@ -1446,23 +883,23 @@ ], "text/plain": [ " ml_generate_text_llm_result \\\n", - "0 The item is a container of K9Guard Dog Paw Balm. \n", + "0 The item is a tin of K9Guard Dog Paw Balm. \n", "1 The item is K9 Guard Dog Hot Spot Spray. \n", - "2 The items are different flavors of rabbit food... \n", - "3 The item is a cat tree.\\n \n", + "2 The items are pet food or treats, specifically... \n", + "3 The item is a cat tree. \n", "4 The item is a bag of bird seed. Specifically, ... \n", "\n", " image \n", - "0 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... \n", - "1 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... \n", - "2 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... \n", - "3 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... \n", - "4 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... \n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-02-20T09:0... \n", + "1 {\"access_urls\":{\"expiry_time\":\"2026-02-20T09:0... \n", + "2 {\"access_urls\":{\"expiry_time\":\"2026-02-20T09:0... \n", + "3 {\"access_urls\":{\"expiry_time\":\"2026-02-20T09:0... \n", + "4 {\"access_urls\":{\"expiry_time\":\"2026-02-20T09:0... \n", "\n", "[5 rows x 2 columns]" ] }, - "execution_count": 58, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -1475,7 +912,7 @@ }, { "cell_type": "code", - "execution_count": 59, + "execution_count": 12, "metadata": { "id": "IG3J3HsKhyBY" }, @@ -1493,7 +930,7 @@ }, { "cell_type": "code", - "execution_count": 60, + "execution_count": 13, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -1549,28 +986,28 @@ " \n", " \n", " 0\n", - " The item is a tin of dog paw balm. Specifically, it is \"K9 Guard Dog Paw Balm\".\n", - " \n", + " The item is a K9 Guard Dog Paw Balm.\n", + " \n", " \n", " \n", " 1\n", - " The picture has a white bottle with a blue spray nozzle and accents. The background is gray.\n", - " \n", + " The picture has multiple colors. The bottle itself is primarily white with teal accents, while other elements like the text and aloe vera illustration introduce green, black, and gray. The background is a neutral gray.\n", + " \n", " \n", " \n", " 2\n", - " Here are the product names based on the image:\\n\\n* **Timoth Hay Lend Variety Plend**\\n* **Herbal Greeıs Mix Variety Blend**\\n* **Berry & Blossom Treat Blend**\n", - " \n", + " Here are the product names from the image:\\n\\n* **Timoth Hay Lend Variety Plend**\\n* **Herbal Greeıs Mix Variety Blend**\\n* **Berry & Blossom Treat Blend**\n", + " \n", " \n", " \n", " 3\n", - " Yes, the item in the image appears to be a cat tree, which is a piece of furniture specifically designed for cats.\\n\n", - " \n", + " Yes, the object in the image is a cat tree, which is a product designed for pets, specifically cats.\n", + " \n", " \n", " \n", " 4\n", " The weight of the product is 15 oz (257g).\n", - " \n", + " \n", " \n", " \n", "\n", @@ -1579,23 +1016,23 @@ ], "text/plain": [ " ml_generate_text_llm_result \\\n", - "0 The item is a tin of dog paw balm. Specificall... \n", - "1 The picture has a white bottle with a blue spr... \n", - "2 Here are the product names based on the image:... \n", - "3 Yes, the item in the image appears to be a cat... \n", + "0 The item is a K9 Guard Dog Paw Balm. \n", + "1 The picture has multiple colors. The bottle it... \n", + "2 Here are the product names from the image:\\n\\n... \n", + "3 Yes, the object in the image is a cat tree, wh... \n", "4 The weight of the product is 15 oz (257g). \n", "\n", " image \n", - "0 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... \n", - "1 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... \n", - "2 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... \n", - "3 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... \n", - "4 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... \n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-02-20T09:0... \n", + "1 {\"access_urls\":{\"expiry_time\":\"2026-02-20T09:0... \n", + "2 {\"access_urls\":{\"expiry_time\":\"2026-02-20T09:0... \n", + "3 {\"access_urls\":{\"expiry_time\":\"2026-02-20T09:0... \n", + "4 {\"access_urls\":{\"expiry_time\":\"2026-02-20T09:0... \n", "\n", "[5 rows x 2 columns]" ] }, - "execution_count": 60, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -1607,7 +1044,7 @@ }, { "cell_type": "code", - "execution_count": 61, + "execution_count": 14, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -1668,19 +1105,19 @@ " \n", " \n", " 0\n", - " [ 0.00638615 0.01666428 0.00452227 ... -0.02...\n", + " [ 0.00638822 0.01666385 0.00451817 ... -0.02...\n", " \n", " <NA>\n", " <NA>\n", - " {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5...\n", + " {\"access_urls\":{\"expiry_time\":\"2026-02-20T09:0...\n", " \n", " \n", " 1\n", - " [ 0.00973672 0.02148364 0.00244308 ... 0.00...\n", + " [ 0.00973689 0.02148374 0.00244311 ... 0.00...\n", " \n", " <NA>\n", " <NA>\n", - " {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5...\n", + " {\"access_urls\":{\"expiry_time\":\"2026-02-20T09:0...\n", " \n", " \n", " 2\n", @@ -1688,23 +1125,23 @@ " \n", " <NA>\n", " <NA>\n", - " {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5...\n", + " {\"access_urls\":{\"expiry_time\":\"2026-02-20T09:0...\n", " \n", " \n", " 3\n", - " [-0.02621164 0.02797647 0.04416908 ... -0.01...\n", + " [-0.02621159 0.02797647 0.04416922 ... -0.01...\n", " \n", " <NA>\n", " <NA>\n", - " {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5...\n", + " {\"access_urls\":{\"expiry_time\":\"2026-02-20T09:0...\n", " \n", " \n", " 4\n", - " [ 0.05918628 0.0125137 0.01907336 ... 0.01...\n", + " [ 0.05918641 0.01251377 0.01907265 ... 0.01...\n", " \n", " <NA>\n", " <NA>\n", - " {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5...\n", + " {\"access_urls\":{\"expiry_time\":\"2026-02-20T09:0...\n", " \n", " \n", "\n", @@ -1713,11 +1150,11 @@ ], "text/plain": [ " ml_generate_embedding_result \\\n", - "0 [ 0.00638615 0.01666428 0.00452227 ... -0.02... \n", - "1 [ 0.00973672 0.02148364 0.00244308 ... 0.00... \n", + "0 [ 0.00638822 0.01666385 0.00451817 ... -0.02... \n", + "1 [ 0.00973689 0.02148374 0.00244311 ... 0.00... \n", "2 [ 0.01197349 0.02138474 0.05967783 ... -0.01... \n", - "3 [-0.02621164 0.02797647 0.04416908 ... -0.01... \n", - "4 [ 0.05918628 0.0125137 0.01907336 ... 0.01... \n", + "3 [-0.02621159 0.02797647 0.04416922 ... -0.01... \n", + "4 [ 0.05918641 0.01251377 0.01907265 ... 0.01... \n", "\n", " ml_generate_embedding_status ml_generate_embedding_start_sec \\\n", "0 \n", @@ -1734,16 +1171,16 @@ "4 \n", "\n", " content \n", - "0 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... \n", - "1 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... \n", - "2 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... \n", - "3 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... \n", - "4 {\"access_urls\":{\"expiry_time\":\"2026-02-19T03:5... \n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-02-20T09:0... \n", + "1 {\"access_urls\":{\"expiry_time\":\"2026-02-20T09:0... \n", + "2 {\"access_urls\":{\"expiry_time\":\"2026-02-20T09:0... \n", + "3 {\"access_urls\":{\"expiry_time\":\"2026-02-20T09:0... \n", + "4 {\"access_urls\":{\"expiry_time\":\"2026-02-20T09:0... \n", "\n", "[5 rows x 5 columns]" ] }, - "execution_count": 61, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -1768,7 +1205,7 @@ }, { "cell_type": "code", - "execution_count": 62, + "execution_count": 15, "metadata": {}, "outputs": [ { @@ -1858,7 +1295,7 @@ }, { "cell_type": "code", - "execution_count": 63, + "execution_count": 16, "metadata": {}, "outputs": [ { @@ -1907,7 +1344,7 @@ "[1 rows x 2 columns]" ] }, - "execution_count": 63, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -1929,7 +1366,7 @@ }, { "cell_type": "code", - "execution_count": 64, + "execution_count": 17, "metadata": {}, "outputs": [ { @@ -1954,7 +1391,7 @@ "Name: chunked, dtype: string" ] }, - "execution_count": 64, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -1974,7 +1411,7 @@ }, { "cell_type": "code", - "execution_count": 65, + "execution_count": 18, "metadata": {}, "outputs": [], "source": [ @@ -1984,7 +1421,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, "metadata": {}, "outputs": [ { @@ -2007,7 +1444,7 @@ "Name: transcribed_content, dtype: string" ] }, - "execution_count": 66, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -2041,7 +1478,7 @@ }, { "cell_type": "code", - "execution_count": 67, + "execution_count": 20, "metadata": {}, "outputs": [ { @@ -2054,7 +1491,7 @@ "Name: transcription_results, dtype: struct[pyarrow]" ] }, - "execution_count": 67, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -2091,7 +1528,7 @@ }, { "cell_type": "code", - "execution_count": 68, + "execution_count": 21, "metadata": {}, "outputs": [ { @@ -2142,7 +1579,7 @@ }, { "cell_type": "code", - "execution_count": 69, + "execution_count": 22, "metadata": {}, "outputs": [ { @@ -2164,7 +1601,7 @@ "Name: blob_col, dtype: extension>[pyarrow]" ] }, - "execution_count": 69, + "execution_count": 22, "metadata": {}, "output_type": "execute_result" } From 53a0bea21abd690e3da40a63ae3e97e3a5fdd6b9 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Fri, 20 Feb 2026 19:18:04 +0000 Subject: [PATCH 6/8] refactor: simplify image_blur udf --- .../multimodal/multimodal_dataframe.ipynb | 138 ++---------------- 1 file changed, 9 insertions(+), 129 deletions(-) diff --git a/notebooks/multimodal/multimodal_dataframe.ipynb b/notebooks/multimodal/multimodal_dataframe.ipynb index 1d63290b5d..1250edaba5 100644 --- a/notebooks/multimodal/multimodal_dataframe.ipynb +++ b/notebooks/multimodal/multimodal_dataframe.ipynb @@ -570,16 +570,14 @@ "FULL_CONNECTION_ID = f\"{PROJECT}.{LOCATION}.bigframes-default-connection\"\n", "\n", "@bpd.udf(\n", - " input_types=[str, str, str, int, int, bool],\n", + " input_types=[str, str, int, int],\n", " output_type=str,\n", " dataset=DATASET_ID,\n", " name=\"image_blur\",\n", " bigquery_connection=FULL_CONNECTION_ID,\n", " packages=[\"opencv-python\", \"numpy\", \"requests\"],\n", ")\n", - "def image_blur(\n", - " src_rt: str, dst_rt: str, ext: str, kx: int, ky: int, verbose: bool\n", - ") -> str:\n", + "def image_blur(src_rt: str, dst_rt: str, kx: int, ky: int) -> str:\n", " import json\n", " import cv2 as cv\n", " import numpy as np\n", @@ -602,36 +600,23 @@ " kx, ky = int(kx), int(ky)\n", " img_blurred = cv.blur(img, ksize=(kx, ky))\n", " \n", - " ext = ext or \".jpeg\"\n", - " success, encoded = cv.imencode(ext, img_blurred)\n", + " success, encoded = cv.imencode(\".jpeg\", img_blurred)\n", " if not success:\n", - " raise ValueError(f\"cv.imencode failed for extension {ext}\")\n", + " raise ValueError(\"cv.imencode failed\")\n", " \n", " # Handle two output modes\n", " if dst_rt: # GCS/Series output mode\n", " dst_obj = json.loads(dst_rt)\n", " dst_url = dst_obj[\"access_urls\"][\"write_url\"]\n", " \n", - " ext_ct = ext.replace(\".\", \"\")\n", - " ext_mappings = {\"jpg\": \"jpeg\", \"tif\": \"tiff\"}\n", - " ext_ct = ext_mappings.get(ext_ct, ext_ct)\n", - " content_type = \"image/\" + ext_ct\n", - " \n", - " session.put(dst_url, data=encoded.tobytes(), headers={\"Content-Type\": content_type}, timeout=30).raise_for_status()\n", + " session.put(dst_url, data=encoded.tobytes(), headers={\"Content-Type\": \"image/jpeg\"}, timeout=30).raise_for_status()\n", " \n", " uri = dst_obj[\"objectref\"][\"uri\"]\n", - " return json.dumps({\"status\": \"\", \"content\": uri}) if verbose else uri\n", + " return uri\n", " \n", " else: # BigQuery bytes output mode \n", " image_bytes = encoded.tobytes()\n", - " if verbose:\n", - " return json.dumps({\n", - " \"status\": \"\",\n", - " \"content\": base64.b64encode(image_bytes).decode(),\n", - " \"content_type\": f\"image/{ext_ct}\" if 'ext_ct' in locals() else \"image/jpeg\"\n", - " })\n", - " else: \n", - " return base64.b64encode(image_bytes).decode()\n", + " return base64.b64encode(image_bytes).decode()\n", "\n", "def apply_transformation(series, dst_folder, udf, *args, verbose=False):\n", " import os\n", @@ -644,10 +629,9 @@ " df_transform = bpd.DataFrame({\n", " \"src_rt\": get_runtime_json_str(series, mode=\"R\"),\n", " \"dst_rt\": get_runtime_json_str(dst_blob, mode=\"RW\"),\n", - " \"ext\": dst_uri.str.extract(r\"(\\.[0-9a-zA-Z]+$)\")[0]\n", " })\n", - " res = df_transform[[\"src_rt\", \"dst_rt\", \"ext\"]].apply(\n", - " udf, axis=1, args=(*args, verbose)\n", + " res = df_transform[[\"src_rt\", \"dst_rt\"]].apply(\n", + " udf, axis=1, args=(*args)\n", " )\n", " return res if verbose else res.str.to_blob(connection=FULL_CONNECTION_ID)\n", "\n", @@ -659,110 +643,6 @@ "df_image[[\"image\", \"blurred\"]]" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Using `verbose` mode for detailed output\n", - "\n", - "All multimodal functions support a `verbose` parameter, which defaults to `False`.\n", - "\n", - "* When `verbose=False` (the default), the function will only return the main content of the result (e.g., the transformed image, the extracted text).\n", - "* When `verbose=True`, the function returns a `STRUCT` containing two fields:\n", - " * `content`: The main result of the operation.\n", - " * `status`: An informational field. If the operation is successful, this will be empty. If an error occurs during the processing of a specific row, this field will contain the error message, allowing the overall job to complete without failing.\n", - "\n", - "Using `verbose=True` is highly recommended for debugging and for workflows where you need to handle potential failures on a row-by-row basis. Let's see it in action with the `image_blur` function." - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dataframe.py:4655: FunctionAxisOnePreviewWarning: DataFrame.apply with parameter axis=1 scenario is in preview.\n", - " warnings.warn(msg, category=bfe.FunctionAxisOnePreviewWarning)\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
blurred_verbose
0{\"status\": \"\", \"content\": \"gs://bigframes_blob...
1{\"status\": \"\", \"content\": \"gs://bigframes_blob...
2{\"status\": \"\", \"content\": \"gs://bigframes_blob...
3{\"status\": \"\", \"content\": \"gs://bigframes_blob...
4{\"status\": \"\", \"content\": \"gs://bigframes_blob...
\n", - "

5 rows × 1 columns

\n", - "
[5 rows x 1 columns in total]" - ], - "text/plain": [ - " blurred_verbose\n", - "0 {\"status\": \"\", \"content\": \"gs://bigframes_blob...\n", - "1 {\"status\": \"\", \"content\": \"gs://bigframes_blob...\n", - "2 {\"status\": \"\", \"content\": \"gs://bigframes_blob...\n", - "3 {\"status\": \"\", \"content\": \"gs://bigframes_blob...\n", - "4 {\"status\": \"\", \"content\": \"gs://bigframes_blob...\n", - "\n", - "[5 rows x 1 columns]" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df_image[\"blurred_verbose\"] = apply_transformation(\n", - " df_image[\"image\"],\n", - " f\"gs://{OUTPUT_BUCKET}/image_blur_transformed_verbose_v2/\",\n", - " image_blur, 20, 20, verbose=True\n", - ")\n", - "df_image[[\"blurred_verbose\"]]" - ] - }, { "cell_type": "markdown", "metadata": { From 4afe9edb5e81c9dff6fd75b484b935ae94216a48 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Fri, 20 Feb 2026 19:28:34 +0000 Subject: [PATCH 7/8] docs: remove ext and verbose --- .../multimodal/multimodal_dataframe.ipynb | 222 ++++++------------ 1 file changed, 69 insertions(+), 153 deletions(-) diff --git a/notebooks/multimodal/multimodal_dataframe.ipynb b/notebooks/multimodal/multimodal_dataframe.ipynb index 1250edaba5..dd0f04c0d7 100644 --- a/notebooks/multimodal/multimodal_dataframe.ipynb +++ b/notebooks/multimodal/multimodal_dataframe.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -83,7 +83,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ @@ -92,7 +92,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 13, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -130,7 +130,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ @@ -186,7 +186,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 15, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -204,7 +204,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 16, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -218,7 +218,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:987: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:990: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", "instead of using `db_dtypes` in the future when available in pandas\n", "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", @@ -253,23 +253,23 @@ " \n", " \n", " 0\n", - " \n", + " \n", " \n", " \n", " 1\n", - " \n", + " \n", " \n", " \n", " 2\n", - " \n", + " \n", " \n", " \n", " 3\n", - " \n", + " \n", " \n", " \n", " 4\n", - " \n", + " \n", " \n", " \n", "\n", @@ -278,16 +278,16 @@ ], "text/plain": [ " image\n", - "0 {\"access_urls\":{\"expiry_time\":\"2026-02-20T08:5...\n", - "1 {\"access_urls\":{\"expiry_time\":\"2026-02-20T08:5...\n", - "2 {\"access_urls\":{\"expiry_time\":\"2026-02-20T08:5...\n", - "3 {\"access_urls\":{\"expiry_time\":\"2026-02-20T08:5...\n", - "4 {\"access_urls\":{\"expiry_time\":\"2026-02-20T08:5...\n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:2...\n", + "1 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:2...\n", + "2 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:2...\n", + "3 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:2...\n", + "4 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:2...\n", "\n", "[5 rows x 1 columns]" ] }, - "execution_count": 6, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -318,7 +318,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 17, "metadata": { "id": "YYYVn7NDH0Me" }, @@ -327,7 +327,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:987: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:990: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", "instead of using `db_dtypes` in the future when available in pandas\n", "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", @@ -366,7 +366,7 @@ " \n", " \n", " 0\n", - " \n", + " \n", " alice\n", " image/png\n", " 1591240\n", @@ -374,7 +374,7 @@ " \n", " \n", " 1\n", - " \n", + " \n", " bob\n", " image/png\n", " 1182951\n", @@ -382,7 +382,7 @@ " \n", " \n", " 2\n", - " \n", + " \n", " bob\n", " image/png\n", " 1520884\n", @@ -390,7 +390,7 @@ " \n", " \n", " 3\n", - " \n", + " \n", " alice\n", " image/png\n", " 1235401\n", @@ -398,7 +398,7 @@ " \n", " \n", " 4\n", - " \n", + " \n", " bob\n", " image/png\n", " 1591923\n", @@ -411,11 +411,11 @@ ], "text/plain": [ " image author content_type \\\n", - "0 {\"access_urls\":{\"expiry_time\":\"2026-02-20T08:5... alice image/png \n", - "1 {\"access_urls\":{\"expiry_time\":\"2026-02-20T08:5... bob image/png \n", - "2 {\"access_urls\":{\"expiry_time\":\"2026-02-20T08:5... bob image/png \n", - "3 {\"access_urls\":{\"expiry_time\":\"2026-02-20T08:5... alice image/png \n", - "4 {\"access_urls\":{\"expiry_time\":\"2026-02-20T08:5... bob image/png \n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:2... alice image/png \n", + "1 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:2... bob image/png \n", + "2 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:2... bob image/png \n", + "3 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:2... alice image/png \n", + "4 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:2... bob image/png \n", "\n", " size updated \n", "0 1591240 2025-03-20 17:45:04+00:00 \n", @@ -427,7 +427,7 @@ "[5 rows x 5 columns]" ] }, - "execution_count": 7, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -458,7 +458,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 18, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -476,7 +476,7 @@ " return global_session.with_default_session(\n", "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dataframe.py:4655: FunctionAxisOnePreviewWarning: DataFrame.apply with parameter axis=1 scenario is in preview.\n", " warnings.warn(msg, category=bfe.FunctionAxisOnePreviewWarning)\n", - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:987: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:990: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", "instead of using `db_dtypes` in the future when available in pandas\n", "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", @@ -514,28 +514,28 @@ " \n", " \n", " 0\n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", " 1\n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", " 2\n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", " 3\n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", " 4\n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", "\n", @@ -544,23 +544,23 @@ ], "text/plain": [ " image \\\n", - "0 {\"access_urls\":{\"expiry_time\":\"2026-02-20T08:5... \n", - "1 {\"access_urls\":{\"expiry_time\":\"2026-02-20T08:5... \n", - "2 {\"access_urls\":{\"expiry_time\":\"2026-02-20T08:5... \n", - "3 {\"access_urls\":{\"expiry_time\":\"2026-02-20T08:5... \n", - "4 {\"access_urls\":{\"expiry_time\":\"2026-02-20T08:5... \n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:2... \n", + "1 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:2... \n", + "2 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:2... \n", + "3 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:2... \n", + "4 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:2... \n", "\n", " blurred \n", - "0 {\"access_urls\":{\"expiry_time\":\"2026-02-20T08:5... \n", - "1 {\"access_urls\":{\"expiry_time\":\"2026-02-20T08:5... \n", - "2 {\"access_urls\":{\"expiry_time\":\"2026-02-20T08:5... \n", - "3 {\"access_urls\":{\"expiry_time\":\"2026-02-20T08:5... \n", - "4 {\"access_urls\":{\"expiry_time\":\"2026-02-20T08:5... \n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:2... \n", + "1 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:2... \n", + "2 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:2... \n", + "3 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:2... \n", + "4 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:2... \n", "\n", "[5 rows x 2 columns]" ] }, - "execution_count": 8, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } @@ -631,7 +631,7 @@ " \"dst_rt\": get_runtime_json_str(dst_blob, mode=\"RW\"),\n", " })\n", " res = df_transform[[\"src_rt\", \"dst_rt\"]].apply(\n", - " udf, axis=1, args=(*args)\n", + " udf, axis=1, args=args\n", " )\n", " return res if verbose else res.str.to_blob(connection=FULL_CONNECTION_ID)\n", "\n", @@ -654,7 +654,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 19, "metadata": { "id": "mRUGfcaFVW-3" }, @@ -677,7 +677,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -691,97 +691,13 @@ "name": "stderr", "output_type": "stream", "text": [ - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:987: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", - " return prop(*args, **kwargs)\n", - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:987: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:990: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", "instead of using `db_dtypes` in the future when available in pandas\n", "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", " return prop(*args, **kwargs)\n" ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ml_generate_text_llm_resultimage
0The item is a tin of K9Guard Dog Paw Balm.
1The item is K9 Guard Dog Hot Spot Spray.
2The items are pet food or treats, specifically for rabbits or other small pets. The three varieties are: Timothy Hay Blend, Herbal Greens Mix, and Berry & Blossom Treat Blend. All three are by \"Fluffy Buns\".
3The item is a cat tree.
4The item is a bag of bird seed. Specifically, it is a bag of \"Chirpy Seed\" deluxe bird food.
\n", - "

5 rows × 2 columns

\n", - "
[5 rows x 2 columns in total]" - ], - "text/plain": [ - " ml_generate_text_llm_result \\\n", - "0 The item is a tin of K9Guard Dog Paw Balm. \n", - "1 The item is K9 Guard Dog Hot Spot Spray. \n", - "2 The items are pet food or treats, specifically... \n", - "3 The item is a cat tree. \n", - "4 The item is a bag of bird seed. Specifically, ... \n", - "\n", - " image \n", - "0 {\"access_urls\":{\"expiry_time\":\"2026-02-20T09:0... \n", - "1 {\"access_urls\":{\"expiry_time\":\"2026-02-20T09:0... \n", - "2 {\"access_urls\":{\"expiry_time\":\"2026-02-20T09:0... \n", - "3 {\"access_urls\":{\"expiry_time\":\"2026-02-20T09:0... \n", - "4 {\"access_urls\":{\"expiry_time\":\"2026-02-20T09:0... \n", - "\n", - "[5 rows x 2 columns]" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" } ], "source": [ @@ -792,7 +708,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "metadata": { "id": "IG3J3HsKhyBY" }, @@ -810,7 +726,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -924,7 +840,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -1085,7 +1001,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -1175,7 +1091,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -1246,7 +1162,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -1291,7 +1207,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -1301,7 +1217,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -1358,7 +1274,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -1408,7 +1324,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -1459,7 +1375,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": null, "metadata": {}, "outputs": [ { From 7a51704cde521871eb5ed284acd796a88e5f3910 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Fri, 20 Feb 2026 20:01:03 +0000 Subject: [PATCH 8/8] refactor: simplify udfs by removing requests.Session --- .../multimodal/multimodal_dataframe.ipynb | 332 +++++++++++------- 1 file changed, 202 insertions(+), 130 deletions(-) diff --git a/notebooks/multimodal/multimodal_dataframe.ipynb b/notebooks/multimodal/multimodal_dataframe.ipynb index dd0f04c0d7..89af576711 100644 --- a/notebooks/multimodal/multimodal_dataframe.ipynb +++ b/notebooks/multimodal/multimodal_dataframe.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 11, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -83,7 +83,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -92,7 +92,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 3, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -130,7 +130,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -186,7 +186,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 5, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -204,7 +204,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 6, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -253,23 +253,23 @@ " \n", " \n", " 0\n", - " \n", + " \n", " \n", " \n", " 1\n", - " \n", + " \n", " \n", " \n", " 2\n", - " \n", + " \n", " \n", " \n", " 3\n", - " \n", + " \n", " \n", " \n", " 4\n", - " \n", + " \n", " \n", " \n", "\n", @@ -278,16 +278,16 @@ ], "text/plain": [ " image\n", - "0 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:2...\n", - "1 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:2...\n", - "2 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:2...\n", - "3 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:2...\n", - "4 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:2...\n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3...\n", + "1 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3...\n", + "2 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3...\n", + "3 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3...\n", + "4 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3...\n", "\n", "[5 rows x 1 columns]" ] }, - "execution_count": 16, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -318,7 +318,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 7, "metadata": { "id": "YYYVn7NDH0Me" }, @@ -366,7 +366,7 @@ " \n", " \n", " 0\n", - " \n", + " \n", " alice\n", " image/png\n", " 1591240\n", @@ -374,7 +374,7 @@ " \n", " \n", " 1\n", - " \n", + " \n", " bob\n", " image/png\n", " 1182951\n", @@ -382,7 +382,7 @@ " \n", " \n", " 2\n", - " \n", + " \n", " bob\n", " image/png\n", " 1520884\n", @@ -390,7 +390,7 @@ " \n", " \n", " 3\n", - " \n", + " \n", " alice\n", " image/png\n", " 1235401\n", @@ -398,7 +398,7 @@ " \n", " \n", " 4\n", - " \n", + " \n", " bob\n", " image/png\n", " 1591923\n", @@ -411,11 +411,11 @@ ], "text/plain": [ " image author content_type \\\n", - "0 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:2... alice image/png \n", - "1 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:2... bob image/png \n", - "2 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:2... bob image/png \n", - "3 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:2... alice image/png \n", - "4 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:2... bob image/png \n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... alice image/png \n", + "1 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... bob image/png \n", + "2 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... bob image/png \n", + "3 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... alice image/png \n", + "4 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... bob image/png \n", "\n", " size updated \n", "0 1591240 2025-03-20 17:45:04+00:00 \n", @@ -427,7 +427,7 @@ "[5 rows x 5 columns]" ] }, - "execution_count": 17, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -458,7 +458,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 8, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -514,28 +514,28 @@ " \n", " \n", " 0\n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", " 1\n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", " 2\n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", " 3\n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", " 4\n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", "\n", @@ -544,23 +544,23 @@ ], "text/plain": [ " image \\\n", - "0 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:2... \n", - "1 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:2... \n", - "2 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:2... \n", - "3 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:2... \n", - "4 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:2... \n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", + "1 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", + "2 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", + "3 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", + "4 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", "\n", " blurred \n", - "0 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:2... \n", - "1 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:2... \n", - "2 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:2... \n", - "3 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:2... \n", - "4 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:2... \n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", + "1 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", + "2 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", + "3 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", + "4 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", "\n", "[5 rows x 2 columns]" ] }, - "execution_count": 18, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -583,14 +583,11 @@ " import numpy as np\n", " import requests\n", " import base64\n", - " from requests import adapters\n", "\n", - " session = requests.Session()\n", - " session.mount(\"https://\", adapters.HTTPAdapter(max_retries=3))\n", " src_obj = json.loads(src_rt)\n", " src_url = src_obj[\"access_urls\"][\"read_url\"]\n", " \n", - " response = session.get(src_url, timeout=30)\n", + " response = requests.get(src_url, timeout=30)\n", " response.raise_for_status()\n", " \n", " img = cv.imdecode(np.frombuffer(response.content, np.uint8), cv.IMREAD_UNCHANGED)\n", @@ -609,7 +606,7 @@ " dst_obj = json.loads(dst_rt)\n", " dst_url = dst_obj[\"access_urls\"][\"write_url\"]\n", " \n", - " session.put(dst_url, data=encoded.tobytes(), headers={\"Content-Type\": \"image/jpeg\"}, timeout=30).raise_for_status()\n", + " requests.put(dst_url, data=encoded.tobytes(), headers={\"Content-Type\": \"image/jpeg\"}, timeout=30).raise_for_status()\n", " \n", " uri = dst_obj[\"objectref\"][\"uri\"]\n", " return uri\n", @@ -654,7 +651,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 9, "metadata": { "id": "mRUGfcaFVW-3" }, @@ -677,7 +674,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -691,6 +688,12 @@ "name": "stderr", "output_type": "stream", "text": [ + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:990: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", + " return prop(*args, **kwargs)\n", "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:990: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", "instead of using `db_dtypes` in the future when available in pandas\n", "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", @@ -698,6 +701,84 @@ "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", " return prop(*args, **kwargs)\n" ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ml_generate_text_llm_resultimage
0The item is a container of K9 Guard Dog Paw Balm.
1The item is K9 Guard Dog Hot Spot Spray.
2The image contains three bags of food, likely for small animals like rabbits or guinea pigs. They are labeled \"Timoth Hay Lend Variety Plend\", \"Herbal Greeıs Mix Variety Blend\", and \"Berry & Blossom Treat Blend\", all under the brand \"Fluffy Buns.\" The bags are yellow, green, and purple, respectively. Each bag has a pile of its contents beneath it.
3The item is a cat tree.\\n
4The item is a bag of bird seed. Specifically, it's labeled \"Chirpy Seed\", \"Deluxe Bird Food\".\\n
\n", + "

5 rows × 2 columns

\n", + "
[5 rows x 2 columns in total]" + ], + "text/plain": [ + " ml_generate_text_llm_result \\\n", + "0 The item is a container of K9 Guard Dog Paw Balm. \n", + "1 The item is K9 Guard Dog Hot Spot Spray. \n", + "2 The image contains three bags of food, likely ... \n", + "3 The item is a cat tree.\\n \n", + "4 The item is a bag of bird seed. Specifically, ... \n", + "\n", + " image \n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4... \n", + "1 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4... \n", + "2 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4... \n", + "3 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4... \n", + "4 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4... \n", + "\n", + "[5 rows x 2 columns]" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ @@ -708,7 +789,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": { "id": "IG3J3HsKhyBY" }, @@ -726,7 +807,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -740,13 +821,13 @@ "name": "stderr", "output_type": "stream", "text": [ - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:987: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:990: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", "instead of using `db_dtypes` in the future when available in pandas\n", "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", " return prop(*args, **kwargs)\n", - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:987: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:990: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", "instead of using `db_dtypes` in the future when available in pandas\n", "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", @@ -782,28 +863,28 @@ " \n", " \n", " 0\n", - " The item is a K9 Guard Dog Paw Balm.\n", - " \n", + " The item is a container of Dog Paw Balm.\n", + " \n", " \n", " \n", " 1\n", - " The picture has multiple colors. The bottle itself is primarily white with teal accents, while other elements like the text and aloe vera illustration introduce green, black, and gray. The background is a neutral gray.\n", - " \n", + " The picture contains many colors, including white, black, green, and a bright blue. The product label predominantly features a bright blue hue. The background is a solid gray.\n", + " \n", " \n", " \n", " 2\n", - " Here are the product names from the image:\\n\\n* **Timoth Hay Lend Variety Plend**\\n* **Herbal Greeıs Mix Variety Blend**\\n* **Berry & Blossom Treat Blend**\n", - " \n", + " Here are the product names from the image:\\n\\n* **Timoth Hay Lend Variety Plend** is the product in the yellow bag.\\n* **Herbal Greeıs Mix Variety Blend** is the product in the green bag.\\n* **Berry & Blossom Treat Blend** is the product in the purple bag.\n", + " \n", " \n", " \n", " 3\n", - " Yes, the object in the image is a cat tree, which is a product designed for pets, specifically cats.\n", - " \n", + " Yes, it is for pets. It appears to be a cat tree or scratching post.\\n\n", + " \n", " \n", " \n", " 4\n", - " The weight of the product is 15 oz (257g).\n", - " \n", + " The image shows that the weight of the product is 15 oz/ 257g.\n", + " \n", " \n", " \n", "\n", @@ -812,23 +893,23 @@ ], "text/plain": [ " ml_generate_text_llm_result \\\n", - "0 The item is a K9 Guard Dog Paw Balm. \n", - "1 The picture has multiple colors. The bottle it... \n", + "0 The item is a container of Dog Paw Balm. \n", + "1 The picture contains many colors, including wh... \n", "2 Here are the product names from the image:\\n\\n... \n", - "3 Yes, the object in the image is a cat tree, wh... \n", - "4 The weight of the product is 15 oz (257g). \n", + "3 Yes, it is for pets. It appears to be a cat tr... \n", + "4 The image shows that the weight of the product... \n", "\n", " image \n", - "0 {\"access_urls\":{\"expiry_time\":\"2026-02-20T09:0... \n", - "1 {\"access_urls\":{\"expiry_time\":\"2026-02-20T09:0... \n", - "2 {\"access_urls\":{\"expiry_time\":\"2026-02-20T09:0... \n", - "3 {\"access_urls\":{\"expiry_time\":\"2026-02-20T09:0... \n", - "4 {\"access_urls\":{\"expiry_time\":\"2026-02-20T09:0... \n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4... \n", + "1 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4... \n", + "2 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4... \n", + "3 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4... \n", + "4 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4... \n", "\n", "[5 rows x 2 columns]" ] }, - "execution_count": 13, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -840,7 +921,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -858,13 +939,13 @@ "default model will be removed in BigFrames 3.0. Please supply an\n", "explicit model to avoid this message.\n", " return method(*args, **kwargs)\n", - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:987: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:990: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", "instead of using `db_dtypes` in the future when available in pandas\n", "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", " return prop(*args, **kwargs)\n", - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:987: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:990: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", "instead of using `db_dtypes` in the future when available in pandas\n", "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" @@ -905,39 +986,39 @@ " \n", " <NA>\n", " <NA>\n", - " {\"access_urls\":{\"expiry_time\":\"2026-02-20T09:0...\n", + " {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...\n", " \n", " \n", " 1\n", - " [ 0.00973689 0.02148374 0.00244311 ... 0.00...\n", + " [ 0.00973976 0.02148137 0.0024429 ... 0.00...\n", " \n", " <NA>\n", " <NA>\n", - " {\"access_urls\":{\"expiry_time\":\"2026-02-20T09:0...\n", + " {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...\n", " \n", " \n", " 2\n", - " [ 0.01197349 0.02138474 0.05967783 ... -0.01...\n", + " [ 0.01195884 0.02139394 0.05968047 ... -0.01...\n", " \n", " <NA>\n", " <NA>\n", - " {\"access_urls\":{\"expiry_time\":\"2026-02-20T09:0...\n", + " {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...\n", " \n", " \n", " 3\n", - " [-0.02621159 0.02797647 0.04416922 ... -0.01...\n", + " [-0.02621161 0.02797648 0.04416926 ... -0.01...\n", " \n", " <NA>\n", " <NA>\n", - " {\"access_urls\":{\"expiry_time\":\"2026-02-20T09:0...\n", + " {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...\n", " \n", " \n", " 4\n", - " [ 0.05918641 0.01251377 0.01907265 ... 0.01...\n", + " [ 0.05918628 0.0125137 0.01907336 ... 0.01...\n", " \n", " <NA>\n", " <NA>\n", - " {\"access_urls\":{\"expiry_time\":\"2026-02-20T09:0...\n", + " {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...\n", " \n", " \n", "\n", @@ -947,10 +1028,10 @@ "text/plain": [ " ml_generate_embedding_result \\\n", "0 [ 0.00638822 0.01666385 0.00451817 ... -0.02... \n", - "1 [ 0.00973689 0.02148374 0.00244311 ... 0.00... \n", - "2 [ 0.01197349 0.02138474 0.05967783 ... -0.01... \n", - "3 [-0.02621159 0.02797647 0.04416922 ... -0.01... \n", - "4 [ 0.05918641 0.01251377 0.01907265 ... 0.01... \n", + "1 [ 0.00973976 0.02148137 0.0024429 ... 0.00... \n", + "2 [ 0.01195884 0.02139394 0.05968047 ... -0.01... \n", + "3 [-0.02621161 0.02797648 0.04416926 ... -0.01... \n", + "4 [ 0.05918628 0.0125137 0.01907336 ... 0.01... \n", "\n", " ml_generate_embedding_status ml_generate_embedding_start_sec \\\n", "0 \n", @@ -967,16 +1048,16 @@ "4 \n", "\n", " content \n", - "0 {\"access_urls\":{\"expiry_time\":\"2026-02-20T09:0... \n", - "1 {\"access_urls\":{\"expiry_time\":\"2026-02-20T09:0... \n", - "2 {\"access_urls\":{\"expiry_time\":\"2026-02-20T09:0... \n", - "3 {\"access_urls\":{\"expiry_time\":\"2026-02-20T09:0... \n", - "4 {\"access_urls\":{\"expiry_time\":\"2026-02-20T09:0... \n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4... \n", + "1 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4... \n", + "2 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4... \n", + "3 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4... \n", + "4 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4... \n", "\n", "[5 rows x 5 columns]" ] }, - "execution_count": 14, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -1001,7 +1082,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "metadata": {}, "outputs": [ { @@ -1030,12 +1111,9 @@ " import json\n", " from pypdf import PdfReader\n", " import requests\n", - " from requests import adapters\n", - " session = requests.Session()\n", - " session.mount(\"https://\", adapters.HTTPAdapter(max_retries=3))\n", " src_obj_ref_rt_json = json.loads(src_obj_ref_rt)\n", " src_url = src_obj_ref_rt_json[\"access_urls\"][\"read_url\"]\n", - " response = session.get(src_url, timeout=30, stream=True)\n", + " response = requests.get(src_url, timeout=30, stream=True)\n", " response.raise_for_status()\n", " pdf_bytes = response.content\n", " pdf_file = io.BytesIO(pdf_bytes)\n", @@ -1060,12 +1138,9 @@ " import json\n", " from pypdf import PdfReader\n", " import requests\n", - " from requests import adapters\n", - " session = requests.Session()\n", - " session.mount(\"https://\", adapters.HTTPAdapter(max_retries=3))\n", " src_obj_ref_rt_json = json.loads(src_obj_ref_rt)\n", " src_url = src_obj_ref_rt_json[\"access_urls\"][\"read_url\"]\n", - " response = session.get(src_url, timeout=30, stream=True)\n", + " response = requests.get(src_url, timeout=30, stream=True)\n", " response.raise_for_status()\n", " pdf_bytes = response.content\n", " pdf_file = io.BytesIO(pdf_bytes)\n", @@ -1091,7 +1166,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "metadata": {}, "outputs": [ { @@ -1140,7 +1215,7 @@ "[1 rows x 2 columns]" ] }, - "execution_count": 16, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -1162,7 +1237,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "metadata": {}, "outputs": [ { @@ -1187,7 +1262,7 @@ "Name: chunked, dtype: string" ] }, - "execution_count": 17, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -1207,7 +1282,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ @@ -1217,14 +1292,14 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:987: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:990: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", "instead of using `db_dtypes` in the future when available in pandas\n", "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" @@ -1240,7 +1315,7 @@ "Name: transcribed_content, dtype: string" ] }, - "execution_count": 19, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } @@ -1274,7 +1349,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, "metadata": {}, "outputs": [ { @@ -1287,7 +1362,7 @@ "Name: transcription_results, dtype: struct[pyarrow]" ] }, - "execution_count": 20, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -1324,7 +1399,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 20, "metadata": {}, "outputs": [ { @@ -1356,12 +1431,9 @@ " import json\n", " from PIL import ExifTags, Image\n", " import requests\n", - " from requests import adapters\n", - " session = requests.Session()\n", - " session.mount(\"https://\", adapters.HTTPAdapter(max_retries=3))\n", " src_obj_ref_rt_json = json.loads(src_obj_ref_rt)\n", " src_url = src_obj_ref_rt_json[\"access_urls\"][\"read_url\"]\n", - " response = session.get(src_url, timeout=30)\n", + " response = requests.get(src_url, timeout=30)\n", " bts = response.content\n", " image = Image.open(io.BytesIO(bts))\n", " exif_data = image.getexif()\n", @@ -1375,7 +1447,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 21, "metadata": {}, "outputs": [ { @@ -1397,7 +1469,7 @@ "Name: blob_col, dtype: extension>[pyarrow]" ] }, - "execution_count": 22, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" }